Compare commits
237 Commits
2016.07.24
...
2016.08.17
Author | SHA1 | Date | |
---|---|---|---|
|
b3d7dce429 | ||
|
a44694ab4e | ||
|
ab19b46b88 | ||
|
8804f10e6b | ||
|
6be17c0870 | ||
|
8652770bd2 | ||
|
2a1321a272 | ||
|
9c0fa60bf3 | ||
|
502d87c546 | ||
|
b35b0d73d8 | ||
|
6e7e4a6edf | ||
|
53fef319f1 | ||
|
2cabee2a7d | ||
|
11f502fac1 | ||
|
98affc1a48 | ||
|
70a2829fee | ||
|
837e56c8ee | ||
|
b5ddee8c77 | ||
|
fb64adcbd3 | ||
|
4f640f2890 | ||
|
254e64a20a | ||
|
818ac213eb | ||
|
cbef4d5c9f | ||
|
bf90c46790 | ||
|
69eb4d699f | ||
|
6d8ec8c3b7 | ||
|
760845ce99 | ||
|
5c2d087221 | ||
|
b6c4e36728 | ||
|
1a57b8c18c | ||
|
24eb13b1c6 | ||
|
525e0316c0 | ||
|
7e60ce9cf7 | ||
|
e811bcf8f8 | ||
|
6103f59095 | ||
|
9fa5789279 | ||
|
d2ac04674d | ||
|
1fd6e30988 | ||
|
884cdb6cd9 | ||
|
9771b1f901 | ||
|
2118fdd1a9 | ||
|
320d597c21 | ||
|
aaf44a2f47 | ||
|
fafabc0712 | ||
|
409760a932 | ||
|
097eba019d | ||
|
73a85620ee | ||
|
a560f28c98 | ||
|
5ec5461e1a | ||
|
542130a5d9 | ||
|
82997dad57 | ||
|
647a7bf5e8 | ||
|
77afa008dd | ||
|
db535435b3 | ||
|
c2a453b461 | ||
|
cd29eaab95 | ||
|
52aa7e7476 | ||
|
e97c55ee6a | ||
|
acfccacad5 | ||
|
5f2c2b7936 | ||
|
cb55908e51 | ||
|
e581224843 | ||
|
f50365e91c | ||
|
c366f8d30a | ||
|
6a26c5f9d5 | ||
|
bd6fb007de | ||
|
b69b2ff736 | ||
|
794e5dcd7e | ||
|
f0d3669437 | ||
|
98e698f1ff | ||
|
3cddb8d6a7 | ||
|
990d533ee4 | ||
|
b0081562d2 | ||
|
fff37cfd4f | ||
|
a3be69b7f0 | ||
|
0fd1b1624c | ||
|
367976d49f | ||
|
0aef0771f8 | ||
|
0c070681c5 | ||
|
30b25d382d | ||
|
e5f878c205 | ||
|
e2e84aed7e | ||
|
b1927f4e8a | ||
|
3b9323d96e | ||
|
7f832413d6 | ||
|
7f2ed47595 | ||
|
c3fa77bdef | ||
|
57ce8a6d08 | ||
|
69d8eeeec5 | ||
|
81c13222c6 | ||
|
b1ce2ba197 | ||
|
5c8411e968 | ||
|
cc9c8ce5df | ||
|
20ef4123b9 | ||
|
4e62d26aa2 | ||
|
b657816684 | ||
|
9778b3e7ee | ||
|
25dd58ca6a | ||
|
5e42f8a0ad | ||
|
1ad6b891b2 | ||
|
7aa589a5e1 | ||
|
065bc35489 | ||
|
3a380766d1 | ||
|
affaea0688 | ||
|
77426a087b | ||
|
8991844ea2 | ||
|
082395d0a0 | ||
|
e8ed7354e6 | ||
|
1e7f602e2a | ||
|
522f6c066d | ||
|
321b5e082a | ||
|
3711fa1eb2 | ||
|
395c74615c | ||
|
3dc240e8c6 | ||
|
a41a6c5094 | ||
|
d71207121d | ||
|
b1c6f21c74 | ||
|
412abb8760 | ||
|
f17d5f6d14 | ||
|
6bb801cfaf | ||
|
de02d1f4e9 | ||
|
e1f93a0a76 | ||
|
d21a661bb4 | ||
|
b2bd968f4b | ||
|
4a01befb34 | ||
|
845dfcdc40 | ||
|
d92cb46305 | ||
|
a8795327ca | ||
|
d34995a9e3 | ||
|
958849275f | ||
|
998f094452 | ||
|
aaa42cf0cf | ||
|
9fb64c04cd | ||
|
f9622868e7 | ||
|
37768f9242 | ||
|
a1aadd09a4 | ||
|
b47a75017b | ||
|
e37b54b140 | ||
|
c1decda58c | ||
|
d3f8e038fe | ||
|
ad152e2d95 | ||
|
b0af12154e | ||
|
d16b3c6677 | ||
|
c57244cdb1 | ||
|
a7e5f27412 | ||
|
089a40955c | ||
|
d73ebac100 | ||
|
e563c0d73b | ||
|
491c42e690 | ||
|
7f2339c617 | ||
|
8122e79fef | ||
|
fe3ad1d456 | ||
|
038a5e1a65 | ||
|
84bc23b41b | ||
|
46933a15d6 | ||
|
3859ebeee6 | ||
|
d50aca41f8 | ||
|
0ca057b965 | ||
|
5ca968d0a6 | ||
|
f0d31c624e | ||
|
08c655906c | ||
|
5a993e1692 | ||
|
a7d2953073 | ||
|
fdd0b8f8e0 | ||
|
f65dc41b72 | ||
|
962250f7ea | ||
|
7dc2a74e0a | ||
|
b02b960c6b | ||
|
4f427c4be8 | ||
|
8a00ea567b | ||
|
8895be01fc | ||
|
52e7fcfeb7 | ||
|
2396062c74 | ||
|
14704aeff6 | ||
|
3c2c3af059 | ||
|
1891ea2d76 | ||
|
1094074c04 | ||
|
217d5ae013 | ||
|
8b40854529 | ||
|
6bb0fbf9fb | ||
|
8d3b226b83 | ||
|
42b7a5afe0 | ||
|
899d2bea63 | ||
|
9cb0e65d7e | ||
|
b070564efb | ||
|
ce28252c48 | ||
|
3aa9a73554 | ||
|
6a9b3b61ea | ||
|
45408eb075 | ||
|
eafc66855d | ||
|
e03d3e6453 | ||
|
a70e45f80a | ||
|
697655a7c0 | ||
|
e382b953f0 | ||
|
116e7e0d04 | ||
|
cf03e34ad3 | ||
|
2903137292 | ||
|
9361f2169c | ||
|
35aa6c538f | ||
|
fa9f1d16b8 | ||
|
485fedf6fd | ||
|
da0baba5c8 | ||
|
bb9f3bfedf | ||
|
dbc0b39b91 | ||
|
481c5c5137 | ||
|
0cacae2807 | ||
|
d9d56deadf | ||
|
74ba450a81 | ||
|
db19df6ca0 | ||
|
fbdf8d15d1 | ||
|
94aae01548 | ||
|
39eef54cf0 | ||
|
05c8268c81 | ||
|
289a16b4f3 | ||
|
7935926baa | ||
|
dcbb07c35a | ||
|
40090e8d51 | ||
|
3e050d51d4 | ||
|
ced70c8640 | ||
|
9a700deea4 | ||
|
dc35ba0eba | ||
|
88bd486b9a | ||
|
7f8b92e3cf | ||
|
35f6e0ff36 | ||
|
326fa4e6e5 | ||
|
c74299a72c | ||
|
10a1bb3a78 | ||
|
4d3e543c73 | ||
|
05d1e7aaa9 | ||
|
a3aa814b77 | ||
|
5c32a77cad | ||
|
14a28e705b | ||
|
cc99d4f826 | ||
|
712c7530ff | ||
|
0a147785e8 | ||
|
59eaf69e33 | ||
|
e8be2943a7 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.24**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.07.24
|
||||
[debug] youtube-dl version 2016.08.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
2
AUTHORS
2
AUTHORS
@@ -179,3 +179,5 @@ Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
Rob van Bekkum
|
||||
Petr Zvoníček
|
||||
Pratyush Singh
|
||||
|
@@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
|
423
ChangeLog
Normal file
423
ChangeLog
Normal file
@@ -0,0 +1,423 @@
|
||||
version 2016.08.17
|
||||
|
||||
Core
|
||||
+ Add _get_netrc_login_info
|
||||
|
||||
Extractors
|
||||
* [mofosex] Extract all formats (#10335)
|
||||
+ [generic] Add support for vbox7 embeds
|
||||
+ [vbox7] Add support for embed URLs
|
||||
+ [viafree] Add extractor (#10358)
|
||||
+ [mtg] Add support for viafree URLs (#10358)
|
||||
* [theplatform] Extract all subtitles per language
|
||||
+ [xvideos] Fix HLS extraction (#10356)
|
||||
+ [amcnetworks] Add extractor
|
||||
+ [bbc:playlist] Add support for pagination (#10349)
|
||||
+ [fxnetworks] Add extractor (#9462)
|
||||
* [cbslocal] Fix extraction for SendtoNews-based videos
|
||||
* [sendtonews] Fix extraction
|
||||
* [jwplatform] Extract video id from JWPlayer data
|
||||
- [zippcast] Remove extractor (#10332)
|
||||
+ [viceland] Add extractor (#8799)
|
||||
+ [adobepass] Add base extractor for Adobe Pass Authentication
|
||||
* [life:embed] Improve extraction
|
||||
* [vgtv] Detect geo restricted videos (#10348)
|
||||
+ [uplynk] Add extractor
|
||||
* [xiami] Fix extraction (#10342)
|
||||
|
||||
|
||||
version 2016.08.13
|
||||
|
||||
Core
|
||||
* Show progress for curl external downloader
|
||||
* Forward more options to curl external downloader
|
||||
|
||||
Extractors
|
||||
* [pbs] Fix description extraction
|
||||
* [franceculture] Fix extraction (#10324)
|
||||
* [pornotube] Fix extraction (#10322)
|
||||
* [4tube] Fix metadata extraction (#10321)
|
||||
* [imgur] Fix width and height extraction (#10325)
|
||||
* [expotv] Improve extraction
|
||||
+ [vbox7] Fix extraction (#10309)
|
||||
- [tapely] Remove extractor (#10323)
|
||||
* [muenchentv] Fix extraction (#10313)
|
||||
+ [24video] Add support for .me and .xxx TLDs
|
||||
* [24video] Fix comment count extraction
|
||||
* [sunporno] Add support for embed URLs
|
||||
* [sunporno] Fix metadata extraction (#10316)
|
||||
+ [hgtv] Add extractor for hgtv.ca (#3999)
|
||||
- [pbs] Remove request to unavailable API
|
||||
+ [pbs] Add support for high quality HTTP formats
|
||||
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||
|
||||
|
||||
version 2016.08.12
|
||||
|
||||
Core
|
||||
* Subtitles are now written as is. Newline conversions are disabled. (#10268)
|
||||
+ Recognize more formats in unified_timestamp
|
||||
|
||||
Extractors
|
||||
- [goldenmoustache] Remove extractor (#10298)
|
||||
* [drtuber] Improve title extraction
|
||||
* [drtuber] Make dislike count optional (#10297)
|
||||
* [chirbit] Fix extraction (#10296)
|
||||
* [francetvinfo] Relax URL regular expression
|
||||
* [rtlnl] Relax URL regular expression (#10282)
|
||||
* [formula1] Relax URL regular expression (#10283)
|
||||
* [wat] Improve extraction (#10281)
|
||||
* [ctsnews] Fix extraction
|
||||
|
||||
|
||||
version 2016.08.10
|
||||
|
||||
Core
|
||||
* Make --metadata-from-title non fatal when title does not match the pattern
|
||||
* Introduce options for randomized sleep before each download
|
||||
--min-sleep-interval and --max-sleep-interval (#9930)
|
||||
* Respect default in _search_json_ld
|
||||
|
||||
Extractors
|
||||
+ [uol] Add extractor for uol.com.br (#4263)
|
||||
* [rbmaradio] Fix extraction and extract all formats (#10242)
|
||||
+ [sonyliv] Add extractor for sonyliv.com (#10258)
|
||||
* [aparat] Fix extraction
|
||||
* [cwtv] Extract HTTP formats
|
||||
+ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
|
||||
* [kuwo:singer] Fix extraction
|
||||
|
||||
|
||||
version 2016.08.07
|
||||
|
||||
Core
|
||||
+ Add support for TV Parental Guidelines ratings in parse_age_limit
|
||||
+ Add decode_png (#9706)
|
||||
+ Add support for partOfTVSeries in JSON-LD
|
||||
* Lower master M3U8 manifest preference for better format sorting
|
||||
|
||||
Extractors
|
||||
+ [discoverygo] Add extractor (#10245)
|
||||
* [flipagram] Make JSON-LD extraction non fatal
|
||||
* [generic] Make JSON-LD extraction non fatal
|
||||
+ [bbc] Add support for morph embeds (#10239)
|
||||
* [tnaflixnetworkbase] Improve title extraction
|
||||
* [tnaflix] Fix metadata extraction (#10249)
|
||||
* [fox] Fix theplatform release URL query
|
||||
* [openload] Fix extraction (#9706)
|
||||
* [bbc] Skip duplicate manifest URLs
|
||||
* [bbc] Improve format code
|
||||
+ [bbc] Add support for DASH and F4M
|
||||
* [bbc] Improve format sorting and listing
|
||||
* [bbc] Improve playlist extraction
|
||||
+ [pokemon] Add extractor (#10093)
|
||||
+ [condenast] Add fallback scenario for video info extraction
|
||||
|
||||
|
||||
version 2016.08.06
|
||||
|
||||
Core
|
||||
* Add support for JSON-LD root list entries (#10203)
|
||||
* Improve unified_timestamp
|
||||
* Lower preference of RTSP formats in generic sorting
|
||||
+ Add support for multiple properties in _og_search_property
|
||||
* Improve password hiding from verbose output
|
||||
|
||||
Extractors
|
||||
+ [adultswim] Add support for trailers (#10235)
|
||||
* [archiveorg] Improve extraction (#10219)
|
||||
+ [jwplatform] Add support for playlists
|
||||
+ [jwplatform] Add support for relative URLs
|
||||
* [jwplatform] Improve audio detection
|
||||
+ [tvplay] Capture and output native error message
|
||||
+ [tvplay] Extract series metadata
|
||||
+ [tvplay] Add support for subtitles (#10194)
|
||||
* [tvp] Improve extraction (#7799)
|
||||
* [cbslocal] Fix timestamp parsing (#10213)
|
||||
+ [naver] Add support for subtitles (#8096)
|
||||
* [naver] Improve extraction
|
||||
* [condenast] Improve extraction
|
||||
* [engadget] Relax URL regular expression
|
||||
* [5min] Fix extraction
|
||||
+ [nationalgeographic] Add support for Episode Guide
|
||||
+ [kaltura] Add support for subtitles
|
||||
* [kaltura] Optimize network requests
|
||||
+ [vodplatform] Add extractor for vod-platform.net
|
||||
- [gamekings] Remove extractor
|
||||
* [limelight] Extract HTTP formats
|
||||
* [ntvru] Fix extraction
|
||||
+ [comedycentral] Re-add :tds and :thedailyshow shortnames
|
||||
|
||||
|
||||
version 2016.08.01
|
||||
|
||||
Fixed/improved extractors
|
||||
- [yandexmusic:track] Adapt to changes in track location JSON (#10193)
|
||||
- [bloomberg] Support another form of player (#10187)
|
||||
- [limelight] Skip DRM protected videos
|
||||
- [safari] Relax regular expressions for URL matching (#10202)
|
||||
- [cwtv] Add support for cwtvpr.com (#10196)
|
||||
|
||||
|
||||
version 2016.07.30
|
||||
|
||||
Fixed/improved extractors
|
||||
- [twitch:clips] Sort formats
|
||||
- [tv2] Use m3u8_native
|
||||
- [tv2:article] Fix video detection (#10188)
|
||||
- rtve (#10076)
|
||||
- [dailymotion:playlist] Optimize download archive processing (#10180)
|
||||
|
||||
|
||||
version 2016.07.28
|
||||
|
||||
Fixed/improved extractors
|
||||
- shared (#10170)
|
||||
- soundcloud (#10179)
|
||||
- twitch (#9767)
|
||||
|
||||
|
||||
version 2016.07.26.2
|
||||
|
||||
Fixed/improved extractors
|
||||
- smotri
|
||||
- camdemy
|
||||
- mtv
|
||||
- comedycentral
|
||||
- cmt
|
||||
- cbc
|
||||
- mgtv
|
||||
- orf
|
||||
|
||||
|
||||
version 2016.07.24
|
||||
|
||||
New extractors
|
||||
- arkena (#8682)
|
||||
- lcp (#8682)
|
||||
|
||||
Fixed/improved extractors
|
||||
- facebook (#10151)
|
||||
- dailymail
|
||||
- telegraaf
|
||||
- dcn
|
||||
- onet
|
||||
- tvp
|
||||
|
||||
Miscellaneous
|
||||
- Support $Time$ in DASH manifests
|
||||
|
||||
|
||||
version 2016.07.22
|
||||
|
||||
New extractors
|
||||
- odatv (#9285)
|
||||
|
||||
Fixed/improved extractors
|
||||
- bbc
|
||||
- youjizz (#10131)
|
||||
- youtube (#10140)
|
||||
- pornhub (#10138)
|
||||
- eporner (#10139)
|
||||
|
||||
|
||||
version 2016.07.17
|
||||
|
||||
New extractors
|
||||
- nintendo (#9986)
|
||||
- streamable (#9122)
|
||||
|
||||
Fixed/improved extractors
|
||||
- ard (#10095)
|
||||
- mtv
|
||||
- comedycentral (#10101)
|
||||
- viki (#10098)
|
||||
- spike (#10106)
|
||||
|
||||
Miscellaneous
|
||||
- Improved twitter player detection (#10090)
|
||||
|
||||
|
||||
version 2016.07.16
|
||||
|
||||
New extractors
|
||||
- ninenow (#5181)
|
||||
|
||||
Fixed/improved extractors
|
||||
- rtve (#10076)
|
||||
- brightcove
|
||||
- 3qsdn
|
||||
- syfy (#9087, #3820, #2388)
|
||||
- youtube (#10083)
|
||||
|
||||
Miscellaneous
|
||||
- Fix subtitle embedding for video-only and audio-only files (#10081)
|
||||
|
||||
|
||||
version 2016.07.13
|
||||
|
||||
New extractors
|
||||
- rudo
|
||||
|
||||
Fixed/improved extractors
|
||||
- biobiochiletv
|
||||
- tvplay
|
||||
- dbtv
|
||||
- brightcove
|
||||
- tmz
|
||||
- youtube (#10059)
|
||||
- shahid (#10062)
|
||||
- vk
|
||||
- ellentv (#10067)
|
||||
|
||||
|
||||
version 2016.07.11
|
||||
|
||||
New Extractors
|
||||
- roosterteeth (#9864)
|
||||
|
||||
Fixed/improved extractors
|
||||
- miomio (#9605)
|
||||
- vuclip
|
||||
- youtube
|
||||
- vidzi (#10058)
|
||||
|
||||
|
||||
version 2016.07.09.2
|
||||
|
||||
Fixed/improved extractors
|
||||
- vimeo (#1638)
|
||||
- facebook (#10048)
|
||||
- lynda (#10047)
|
||||
- animeondemand
|
||||
|
||||
Fixed/improved features
|
||||
- Embedding subtitles no longer throws an error with problematic inputs (#9063)
|
||||
|
||||
|
||||
version 2016.07.09.1
|
||||
|
||||
Fixed/improved extractors
|
||||
- youtube
|
||||
- ard
|
||||
- srmediatek (#9373)
|
||||
|
||||
|
||||
version 2016.07.09
|
||||
|
||||
New extractors
|
||||
- Flipagram (#9898)
|
||||
|
||||
Fixed/improved extractors
|
||||
- telecinco
|
||||
- toutv
|
||||
- radiocanada
|
||||
- tweakers (#9516)
|
||||
- lynda
|
||||
- nick (#7542)
|
||||
- polskieradio (#10028)
|
||||
- le
|
||||
- facebook (#9851)
|
||||
- mgtv
|
||||
- animeondemand (#10031)
|
||||
|
||||
Fixed/improved features
|
||||
- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs
|
||||
on non-Windows systems
|
||||
|
||||
|
||||
version 2016.07.07
|
||||
|
||||
New extractors
|
||||
- kamcord (#10001)
|
||||
|
||||
Fixed/improved extractors
|
||||
- spiegel (#10018)
|
||||
- metacafe (#8539, #3253)
|
||||
- onet (#9950)
|
||||
- francetv (#9955)
|
||||
- brightcove (#9965)
|
||||
- daum (#9972)
|
||||
|
||||
|
||||
version 2016.07.06
|
||||
|
||||
Fixed/improved extractors
|
||||
- youtube (#10007, #10009)
|
||||
- xuite
|
||||
- stitcher
|
||||
- spiegel
|
||||
- slideshare
|
||||
- sandia
|
||||
- rtvnh
|
||||
- prosiebensat1
|
||||
- onionstudios
|
||||
|
||||
|
||||
version 2016.07.05
|
||||
|
||||
Fixed/improved extractors
|
||||
- brightcove
|
||||
- yahoo (#9995)
|
||||
- pornhub (#9997)
|
||||
- iqiyi
|
||||
- kaltura (#5557)
|
||||
- la7
|
||||
- Changed features
|
||||
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
||||
Miscellaneous
|
||||
- Add script for displaying downloads statistics
|
||||
|
||||
|
||||
version 2016.07.03.1
|
||||
|
||||
Fixed/improved extractors
|
||||
- theplatform
|
||||
- aenetworks
|
||||
- nationalgeographic
|
||||
- hrti (#9482)
|
||||
- facebook (#5701)
|
||||
- buzzfeed (#5701)
|
||||
- rai (#8617, #9157, #9232, #8552, #8551)
|
||||
- nationalgeographic (#9991)
|
||||
- iqiyi
|
||||
|
||||
|
||||
version 2016.07.03
|
||||
|
||||
New extractors
|
||||
- hrti (#9482)
|
||||
|
||||
Fixed/improved extractors
|
||||
- vk (#9981)
|
||||
- facebook (#9938)
|
||||
- xtube (#9953, #9961)
|
||||
|
||||
|
||||
version 2016.07.02
|
||||
|
||||
New extractors
|
||||
- fusion (#9958)
|
||||
|
||||
Fixed/improved extractors
|
||||
- twitch (#9975)
|
||||
- vine (#9970)
|
||||
- periscope (#9967)
|
||||
- pornhub (#8696)
|
||||
|
||||
|
||||
version 2016.07.01
|
||||
|
||||
New extractors
|
||||
- 9c9media
|
||||
- ctvnews (#2156)
|
||||
- ctv (#4077)
|
||||
|
||||
Fixed/Improved extractors
|
||||
- rds
|
||||
- meta (#8789)
|
||||
- pornhub (#9964)
|
||||
- sixplay (#2183)
|
||||
|
||||
New features
|
||||
- Accept quoted strings across multiple lines (#9940)
|
4
Makefile
4
Makefile
@@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
LICENSE README.md README.txt \
|
||||
ChangeLog LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.zsh youtube-dl.fish setup.py \
|
||||
youtube-dl
|
||||
|
12
README.md
12
README.md
@@ -330,7 +330,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download.
|
||||
download when used alone or a lower bound
|
||||
of a range for randomized sleep before each
|
||||
download (minimum possible number of
|
||||
seconds to sleep) when used along with
|
||||
--max-sleep-interval.
|
||||
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
||||
before each download (maximum possible
|
||||
number of seconds to sleep). Must only be
|
||||
used along with --min-sleep-interval.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT Video format code, see the "FORMAT
|
||||
@@ -1196,7 +1204,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
|
@@ -54,17 +54,21 @@ def filter_options(readme):
|
||||
|
||||
if in_options:
|
||||
if line.lstrip().startswith('-'):
|
||||
option, description = re.split(r'\s{2,}', line.lstrip())
|
||||
split_option = option.split(' ')
|
||||
split = re.split(r'\s{2,}', line.lstrip())
|
||||
# Description string may start with `-` as well. If there is
|
||||
# only one piece then it's a description bit not an option.
|
||||
if len(split) > 1:
|
||||
option, description = split
|
||||
split_option = option.split(' ')
|
||||
|
||||
if not split_option[-1].startswith('-'): # metavar
|
||||
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
||||
if not split_option[-1].startswith('-'): # metavar
|
||||
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
||||
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
# for more information.
|
||||
ret += '\n%s\n: %s\n' % (option, description)
|
||||
else:
|
||||
ret += line.lstrip() + '\n'
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
# for more information.
|
||||
ret += '\n%s\n: %s\n' % (option, description)
|
||||
continue
|
||||
ret += line.lstrip() + '\n'
|
||||
else:
|
||||
ret += line + '\n'
|
||||
|
||||
|
@@ -71,9 +71,12 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Changing version in ChangeLog..."
|
||||
sed -i "s/<unreleased>/$version/" ChangeLog
|
||||
|
||||
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog
|
||||
git commit $gpg_sign_commits -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -21,21 +22,26 @@ def format_size(bytes):
|
||||
|
||||
total_bytes = 0
|
||||
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
|
||||
for page in itertools.count(1):
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
|
||||
).read().decode('utf-8'))
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
if not releases:
|
||||
break
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
|
||||
compat_print('total downloads traffic: %s' % format_size(total_bytes))
|
||||
|
@@ -35,6 +35,7 @@
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@@ -142,7 +143,7 @@
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
@@ -182,6 +183,7 @@
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@@ -237,7 +239,6 @@
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceCultureEmission**
|
||||
- **FranceInter**
|
||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||
- **francetvinfo.fr**
|
||||
@@ -247,8 +248,8 @@
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **FXNetworks**
|
||||
- **GameInformer**
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
- **Gamersyde**
|
||||
@@ -265,7 +266,6 @@
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
@@ -278,6 +278,7 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@@ -399,9 +400,9 @@
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -417,7 +418,8 @@
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:channel**
|
||||
- **natgeo:episodeguide**
|
||||
- **natgeo:video**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@@ -441,7 +443,6 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nextmovie.com**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
@@ -520,6 +521,7 @@
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
@@ -564,6 +566,7 @@
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **Rozhlas**
|
||||
- **RTBF**
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
@@ -621,6 +624,7 @@
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SonyLIV**
|
||||
- **soundcloud**
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:search**: Soundcloud search
|
||||
@@ -663,7 +667,6 @@
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
@@ -699,6 +702,7 @@
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
@@ -728,8 +732,8 @@
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@@ -745,6 +749,9 @@
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USAToday**
|
||||
@@ -762,7 +769,9 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
@@ -807,6 +816,7 @@
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
- **Vporn**
|
||||
@@ -883,4 +893,3 @@
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
||||
- **ZippCast**
|
||||
|
@@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
|
@@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
|
@@ -42,6 +42,7 @@ from youtube_dl.utils import (
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_count,
|
||||
@@ -308,6 +309,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -431,6 +433,20 @@ class TestUtil(unittest.TestCase):
|
||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
'trailer.mp4')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
self.assertEqual(parse_age_limit('invalid'), None)
|
||||
self.assertEqual(parse_age_limit(0), 0)
|
||||
self.assertEqual(parse_age_limit(18), 18)
|
||||
self.assertEqual(parse_age_limit(21), 21)
|
||||
self.assertEqual(parse_age_limit(22), None)
|
||||
self.assertEqual(parse_age_limit('18'), 18)
|
||||
self.assertEqual(parse_age_limit('18+'), 18)
|
||||
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||
|
||||
def test_parse_duration(self):
|
||||
self.assertEqual(parse_duration(None), None)
|
||||
self.assertEqual(parse_duration(False), None)
|
||||
@@ -952,6 +968,7 @@ The first line
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
|
||||
|
||||
def test_cli_valueless_option(self):
|
||||
self.assertEqual(cli_valueless_option(
|
||||
|
70
test/test_verbose_output.py
Normal file
70
test/test_verbose_output.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
class TestVerboseOutput(unittest.TestCase):
|
||||
def test_private_info_arg(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'--username', 'johnsmith@gmail.com',
|
||||
'--password', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'-u', 'johnsmith@gmail.com',
|
||||
'-p', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'--username=johnsmith@gmail.com',
|
||||
'--password=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'-u=johnsmith@gmail.com',
|
||||
'-p=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -249,7 +249,16 @@ class YoutubeDL(object):
|
||||
source_address: (Experimental) Client-side IP address to bind to.
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
sleep_interval: Number of seconds to sleep before each download when
|
||||
used alone or a lower bound of a range for randomized
|
||||
sleep before each download (minimum possible number
|
||||
of seconds to sleep) when used along with
|
||||
max_sleep_interval.
|
||||
max_sleep_interval:Upper bound of a range for randomized sleep before each
|
||||
download (maximum possible number of seconds to sleep).
|
||||
Must only be used along with sleep_interval.
|
||||
Actual sleep time will be a random float from range
|
||||
[sleep_interval; max_sleep_interval].
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
match_filter: A function that gets called with the info_dict of
|
||||
@@ -1594,7 +1603,9 @@ class YoutubeDL(object):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
|
@@ -145,6 +145,16 @@ def _real_main(argv=None):
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid max_filesize specified')
|
||||
opts.max_filesize = numeric_limit
|
||||
if opts.sleep_interval is not None:
|
||||
if opts.sleep_interval < 0:
|
||||
parser.error('sleep interval must be positive or 0')
|
||||
if opts.max_sleep_interval is not None:
|
||||
if opts.max_sleep_interval < 0:
|
||||
parser.error('max sleep interval must be positive or 0')
|
||||
if opts.max_sleep_interval < opts.sleep_interval:
|
||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||
else:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
@@ -370,6 +380,7 @@ def _real_main(argv=None):
|
||||
'source_address': opts.source_address,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
'max_sleep_interval': opts.max_sleep_interval,
|
||||
'external_downloader': opts.external_downloader,
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
|
@@ -4,6 +4,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
@@ -342,8 +343,11 @@ class FileDownloader(object):
|
||||
})
|
||||
return True
|
||||
|
||||
sleep_interval = self.params.get('sleep_interval')
|
||||
if sleep_interval:
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
print(min_sleep_interval, max_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
|
@@ -96,6 +96,12 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
cmd += self._option('--retry', 'retries')
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||
@@ -103,6 +109,16 @@ class CurlFD(ExternalFD):
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,6 +83,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -95,6 +97,8 @@ class HlsFD(FragmentFD):
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
if extra_param_to_segment_url:
|
||||
frag_url = update_url_query(frag_url, extra_param_to_segment_url)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
@@ -120,6 +124,8 @@ class HlsFD(FragmentFD):
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_param_to_segment_url:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
134
youtube_dl/extractor/adobepass.py
Normal file
134
youtube_dl/extractor/adobepass.py
Normal file
@@ -0,0 +1,134 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class AdobePassIE(InfoExtractor):
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
channel_title = etree.SubElement(channel, 'title')
|
||||
channel_title.text = provider_id
|
||||
item = etree.SubElement(channel, 'item')
|
||||
resource_title = etree.SubElement(item, 'title')
|
||||
resource_title.text = title
|
||||
resource_guid = etree.SubElement(item, 'guid')
|
||||
resource_guid.text = guid
|
||||
resource_rating = etree.SubElement(item, 'media:rating')
|
||||
resource_rating.attrib = {'scheme': 'urn:v-chip'}
|
||||
resource_rating.text = rating
|
||||
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
'ap_z': self._USER_AGENT,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
|
||||
if token_expires and token_expires <= int(time.time()):
|
||||
authn_token = None
|
||||
requestor_info = {}
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
username, password = self._get_netrc_login_info(mso_id)
|
||||
if not username or not password:
|
||||
return ''
|
||||
|
||||
def post_form(form_page, note, data={}):
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
return self._download_webpage(
|
||||
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
provider_redirect_page = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page = post_form(
|
||||
provider_redirect_page, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
post_form(mvpd_confirm_page, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in session:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
'Retrieving Authorization Token', data=urlencode_postdata({
|
||||
'resource_id': resource,
|
||||
'requestor_id': requestor_id,
|
||||
'authentication_token': authn_token,
|
||||
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
|
||||
})
|
||||
|
||||
short_authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
|
||||
video_id, 'Retrieving Media Token', data=urlencode_postdata({
|
||||
'authz_token': authz_token,
|
||||
'requestor_id': requestor_id,
|
||||
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
return short_authorize
|
@@ -83,6 +83,20 @@ class AdultSwimIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# heroMetadata.trailer
|
||||
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||
'info_dict': {
|
||||
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Decker - Inside Decker: A New Hero',
|
||||
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
||||
'duration': 249.008,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -133,20 +147,26 @@ class AdultSwimIE(InfoExtractor):
|
||||
if video_info is None:
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
else:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
stream = video_info.get('stream')
|
||||
clips = [stream] if stream else video_info.get('clips')
|
||||
if not clips:
|
||||
if stream and stream.get('videoPlaybackID'):
|
||||
segment_ids = [stream['videoPlaybackID']]
|
||||
elif video_info.get('clips'):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.'
|
||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||
expected=True)
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in clips]
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
|
@@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
|
91
youtube_dl/extractor/amcnetworks.py
Normal file
91
youtube_dl/extractor/amcnetworks.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
return info
|
@@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor):
|
||||
'title': 'What To Watch - February 17, 2016',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
'params': {
|
||||
# encrypted m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -15,7 +13,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
@@ -31,13 +29,13 @@ class AparatIE(InfoExtractor):
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
||||
video_id + '/vt/frame')
|
||||
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
||||
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
||||
for i, video_url in enumerate(video_urls):
|
||||
file_list = self._parse_json(self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
|
||||
for i, item in enumerate(file_list[0]):
|
||||
video_url = item['file']
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||
|
@@ -1,67 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
'info_dict': {
|
||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'ext': 'ogv',
|
||||
'ext': 'ogg',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
||||
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'ogv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
||||
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
||||
data = self._download_json(json_url, video_id)
|
||||
def get_optional(metadata, field):
|
||||
return metadata.get(field, [None])[0]
|
||||
|
||||
def get_optional(data_dict, field):
|
||||
return data_dict['metadata'].get(field, [None])[0]
|
||||
|
||||
title = get_optional(data, 'title')
|
||||
description = get_optional(data, 'description')
|
||||
uploader = get_optional(data, 'creator')
|
||||
upload_date = unified_strdate(get_optional(data, 'date'))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format': fdata['format'],
|
||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
||||
'file_size': int(fdata['size']),
|
||||
}
|
||||
for fn, fdata in data['files'].items()
|
||||
if 'Video' in fdata['format']]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': data.get('misc', {}).get('image'),
|
||||
}
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/details/' + video_id, video_id, query={
|
||||
'output': 'json',
|
||||
})['metadata']
|
||||
info.update({
|
||||
'title': get_optional(metadata, 'title') or info.get('title'),
|
||||
'description': clean_html(get_optional(metadata, 'description')),
|
||||
})
|
||||
if info.get('_type') != 'playlist':
|
||||
info.update({
|
||||
'uploader': get_optional(metadata, 'creator'),
|
||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
})
|
||||
return info
|
||||
|
@@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
'skip': 'Video is no longer available',
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
|
@@ -2,19 +2,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@@ -229,51 +233,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
kind = connection.get('kind')
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=supplier, fatal=False))
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier or kind or protocol,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
@@ -294,46 +253,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int_or_none(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if service:
|
||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int_or_none(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
@@ -379,13 +298,87 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
formats = []
|
||||
subtitles = None
|
||||
urls = []
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
href = connection.get('href')
|
||||
if href in urls:
|
||||
continue
|
||||
if href:
|
||||
urls.append(href)
|
||||
conn_kind = connection.get('kind')
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, format_id),
|
||||
})
|
||||
elif transfer_format == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
}
|
||||
if kind == 'video':
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': bitrate,
|
||||
'vcodec': encoding,
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'abr': bitrate,
|
||||
'acodec': encoding,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
if protocol == 'http':
|
||||
# Direct link
|
||||
fmt.update({
|
||||
'url': href,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
fmt.update({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(fmt)
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
return formats, subtitles
|
||||
@@ -589,7 +582,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
@@ -654,6 +647,23 @@ class BBCIE(BBCCoUkIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
'ext': 'mp4',
|
||||
'title': "Nigeria v Japan - Men's First Round",
|
||||
'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
|
||||
'duration': 7980,
|
||||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
@@ -751,7 +761,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
@@ -820,13 +830,19 @@ class BBCIE(BBCCoUkIE):
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
for key in ('progressiveDownload', 'streaming'):
|
||||
entry = None
|
||||
for key in ('streaming', 'progressiveDownload'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist_url, playlist_id, timestamp))
|
||||
info = self._extract_from_playlist_sxml(
|
||||
playlist_url, playlist_id, timestamp)
|
||||
if not entry:
|
||||
entry = info
|
||||
else:
|
||||
entry['title'] = info['title']
|
||||
entry['formats'].extend(info['formats'])
|
||||
except Exception as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
@@ -834,6 +850,9 @@ class BBCIE(BBCCoUkIE):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
self._sort_formats(entry['formats'])
|
||||
entries.append(entry)
|
||||
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -866,6 +885,50 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
@@ -883,7 +946,7 @@ class BBCIE(BBCCoUkIE):
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||
playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||
@@ -995,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
yield self.url_result(
|
||||
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
if single_page:
|
||||
return
|
||||
next_page = self._search_regex(
|
||||
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage, 'next page url', default=None, group='url')
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
|
||||
|
||||
title, description = self._extract_title_and_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(webpage, url, playlist_id),
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
@@ -1056,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# multipage playlist, explicit page
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
# multipage playlist, all pages
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 142,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||
'only_matching': True,
|
||||
|
@@ -11,22 +11,13 @@ from ..compat import compat_urllib_parse_unquote
|
||||
class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
||||
'info_dict': {
|
||||
'id': '16537',
|
||||
'ext': 'mp4',
|
||||
'title': 'Singham Returns',
|
||||
'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
|
||||
}
|
||||
}, {
|
||||
# 2 formats
|
||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||
'info_dict': {
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
|
@@ -25,13 +25,13 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'duration': 308.315,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
@@ -41,73 +41,33 @@ class BiliBiliIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1041170',
|
||||
'id': '1507019',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'id': '7802182',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
||||
'info_dict': {
|
||||
'id': '4808130_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
||||
'info_dict': {
|
||||
'id': '4808130_part2',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
||||
'info_dict': {
|
||||
'id': '4808130_part3',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '7b795e214166501e9141139eea236e91',
|
||||
'info_dict': {
|
||||
'id': '4808130_part4',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'uploader': '黑夜为猫',
|
||||
|
@@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'uploader': 'Dmitry Kotov',
|
||||
}
|
||||
},
|
||||
'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# video ID in BPlayer(...)
|
||||
'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
|
||||
'info_dict': {
|
||||
'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
|
||||
'ext': 'flv',
|
||||
'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
|
||||
'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
@@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'id', group='url')
|
||||
webpage, 'id', group='url', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
video_id = bplayer_data['id']
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,8 +9,10 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
@@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
'duration': 318,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,7 +29,20 @@ class CBCIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# with clipId
|
||||
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||
'info_dict': {
|
||||
'id': '2414435309',
|
||||
'ext': 'mp4',
|
||||
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||
'upload_date': '20131025',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 1382717907,
|
||||
},
|
||||
}, {
|
||||
# with clipId, feed only available via tpfeed.cbc.ca
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
@@ -83,9 +98,15 @@ class CBCIE(InfoExtractor):
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
|
@@ -1,12 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
@@ -43,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
@@ -62,19 +55,15 @@ class CBSLocalIE(AnvatoIE):
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, sendtonews_url),
|
||||
ie=SendtoNewsIE.ie_key())
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = None
|
||||
if time_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
||||
timestamp = unified_timestamp(time_str)
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
|
@@ -70,7 +70,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@@ -78,15 +79,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
'skip': 'Video gone',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Room is offline',
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
|
@@ -1,30 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/PrIPv5',
|
||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||
'url': 'http://chirb.it/be2abG',
|
||||
'info_dict': {
|
||||
'id': 'PrIPv5',
|
||||
'id': 'be2abG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Фасадстрой',
|
||||
'duration': 52,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chirb.it/wp/MN58c2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||
data_fd = self._search_regex(
|
||||
r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'data fd', group='url')
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||
'listen count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>(\d+) Comments?:', webpage,
|
||||
'comment count', fatal=False))
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
|
||||
webpage, 'description', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
@@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
'skip': 'Video not available',
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||
'info_dict': {
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
if 'error_not_available.swf' in rtmp_video_url:
|
||||
raise ExtractorError(
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
@@ -1,17 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
@@ -26,8 +16,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
@@ -35,244 +27,43 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||
(?:/[^/?#]?|[?#]|$))))
|
||||
'''
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||
'info_dict': {
|
||||
'id': 'sarah-chayes-extended-interview',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'mp4',
|
||||
'2200': 'mp4',
|
||||
'1700': 'mp4',
|
||||
'1200': 'mp4',
|
||||
'750': 'mp4',
|
||||
'400': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'2200': (960, 540),
|
||||
'1700': (768, 432),
|
||||
'1200': (640, 360),
|
||||
'750': (512, 288),
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
if mobj.group('shortname'):
|
||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
||||
|
||||
if mobj.group('clip'):
|
||||
if mobj.group('videotitle'):
|
||||
epTitle = mobj.group('videotitle')
|
||||
elif mobj.group('showname') == 'thedailyshow':
|
||||
epTitle = mobj.group('tdstitle')
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
||||
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = float_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._extract_subtitles(cdoc, guid)
|
||||
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': epTitle,
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||
return new_urls
|
||||
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
@@ -306,3 +97,22 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -662,6 +662,24 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self._downloader.params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
@@ -679,16 +697,8 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
else:
|
||||
username, password = self._get_netrc_login_info()
|
||||
|
||||
return (username, password)
|
||||
|
||||
@@ -727,9 +737,14 @@ class InfoExtractor(object):
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if not isinstance(prop, (list, tuple)):
|
||||
prop = [prop]
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
||||
name = 'OpenGraph %s' % prop[0]
|
||||
og_regexes = []
|
||||
for p in prop:
|
||||
og_regexes.extend(self._og_regexes(p))
|
||||
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
|
||||
if escaped is None:
|
||||
return None
|
||||
return unescapeHTML(escaped)
|
||||
@@ -811,11 +826,14 @@ class InfoExtractor(object):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(
|
||||
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
||||
expected_type=expected_type)
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||
# At the same time `default` may be passed that assumes `fatal=False`
|
||||
# for _search_regex. Let's simulate the same behavior here as well.
|
||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
@@ -823,41 +841,47 @@ class InfoExtractor(object):
|
||||
if not json_ld:
|
||||
return {}
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
'episode_number': int_or_none(json_ld.get('episodeNumber')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
})
|
||||
part_of_season = json_ld.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = json_ld.get('partOfSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(json_ld.get('datePublished')),
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': json_ld.get('contentUrl'),
|
||||
'title': unescapeHTML(json_ld.get('name')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||
'duration': parse_duration(json_ld.get('duration')),
|
||||
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||
'width': int_or_none(json_ld.get('width')),
|
||||
'height': int_or_none(json_ld.get('height')),
|
||||
})
|
||||
if not isinstance(json_ld, (list, tuple, dict)):
|
||||
return info
|
||||
if isinstance(json_ld, dict):
|
||||
json_ld = [json_ld]
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
})
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@@ -911,7 +935,8 @@ class InfoExtractor(object):
|
||||
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
||||
preference -= 0.5
|
||||
|
||||
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
|
||||
protocol = f.get('protocol') or determine_protocol(f)
|
||||
proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
preference -= 50
|
||||
@@ -1128,7 +1153,7 @@ class InfoExtractor(object):
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'preference': preference - 100 if preference else -100,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}
|
||||
@@ -1786,7 +1811,7 @@ class InfoExtractor(object):
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
if 'playlist' in tc:
|
||||
if tc.get('playlist', []):
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
|
@@ -5,13 +5,17 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
remove_end,
|
||||
extract_attributes,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '3D Printed Speakers Lit With LED',
|
||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||
'uploader': 'wired',
|
||||
'upload_date': '20130314',
|
||||
'timestamp': 1363219200,
|
||||
}
|
||||
}, {
|
||||
# JS embed
|
||||
@@ -67,70 +74,93 @@ class CondeNastIE(InfoExtractor):
|
||||
'id': '55f9cf8b61646d1acf00000c',
|
||||
'ext': 'mp4',
|
||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
'uploader': 'arstechnica',
|
||||
'upload_date': '20150916',
|
||||
'timestamp': 1442434955,
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
webpage, 'series title', flags=re.DOTALL)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
webpage, 'series title')
|
||||
url_object = compat_urllib_parse_urlparse(url)
|
||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
||||
webpage, flags=re.DOTALL)
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video(self, webpage, url_type):
|
||||
if url_type != 'embed':
|
||||
description = self._html_search_regex(
|
||||
[
|
||||
r'<div class="cne-video-description">(.+?)</div>',
|
||||
r'<div class="video-post-content">(.+?)</div>',
|
||||
],
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
query = {}
|
||||
params = self._search_regex(
|
||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||
if params:
|
||||
query.update({
|
||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||
})
|
||||
else:
|
||||
description = None
|
||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||
'player params', flags=re.DOTALL)
|
||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
||||
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
|
||||
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
|
||||
data = compat_urllib_parse_urlencode({'videoId': video_id,
|
||||
'playerId': player_id,
|
||||
'target': target,
|
||||
})
|
||||
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
||||
webpage, 'base info url',
|
||||
default='http://player.cnevids.com/player/loader.js?')
|
||||
info_url = base_info_url + data
|
||||
info_page = self._download_webpage(info_url, video_id,
|
||||
'Downloading video info')
|
||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
||||
video_info = self._parse_json(video_info, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||
webpage, 'player params element'))
|
||||
query.update({
|
||||
'videoId': params['data-video'],
|
||||
'playerId': params['data-player'],
|
||||
'target': params['id'],
|
||||
})
|
||||
video_id = query['videoId']
|
||||
video_info = None
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', query=query, fatal=False)
|
||||
if info_page:
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||
else:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=query)
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||
title = video_info['title']
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
||||
'url': fdata['src'],
|
||||
'ext': fdata['type'].split('/')[-1],
|
||||
'quality': 1 if fdata['quality'] == 'high' else 0,
|
||||
} for fdata in video_info['sources'][0]]
|
||||
formats = []
|
||||
for fdata in video_info.get('sources', [{}])[0]:
|
||||
src = fdata.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||
quality = fdata.get('quality')
|
||||
formats.append({
|
||||
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
'quality': 1 if quality == 'high' else 0,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(
|
||||
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_info['title'],
|
||||
'thumbnail': video_info['poster_frame'],
|
||||
'description': description,
|
||||
}
|
||||
'title': title,
|
||||
'thumbnail': video_info.get('poster_frame'),
|
||||
'uploader': video_info.get('brand'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'tags': video_info.get('tags'),
|
||||
'series': video_info.get('series_title'),
|
||||
'season': video_info.get('season_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site')
|
||||
url_type = mobj.group('type')
|
||||
item_id = mobj.group('id')
|
||||
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
# Convert JS embed to regular embed
|
||||
if url_type == 'embedjs':
|
||||
|
@@ -114,6 +114,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
'id': '702409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'TV TOKYO',
|
||||
'upload_date': '20160508',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@@ -336,9 +351,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
video_play_path = xpath_text(stream_info, './file')
|
||||
if not video_url or not video_play_path:
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
@@ -353,7 +377,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'url': direct_video_url,
|
||||
@@ -363,7 +387,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
format_info.update({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
@@ -1,13 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
@@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
@@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
@@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
news_id = self._hidden_inputs(page).get('get_id')
|
||||
|
||||
if news_id:
|
||||
mp4_feed = self._download_json(
|
||||
'http://news.cts.com.tw/action/test_mp4feed.php',
|
||||
news_id, note='Fetching feed', query={'news_id': news_id})
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
title = self._html_search_meta('title', page, fatal=True)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = unified_timestamp(datetime_str) - 8 * 3600
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
@@ -28,7 +28,8 @@ class CWTVIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'redirect to http://cwtv.com/shows/arrow/',
|
||||
}, {
|
||||
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'info_dict': {
|
||||
@@ -44,22 +45,43 @@ class CWTVIE(InfoExtractor):
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
|
@@ -331,7 +331,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
|
||||
if video_id not in video_ids:
|
||||
yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
yield self.url_result(
|
||||
'http://www.dailymotion.com/video/%s' % video_id,
|
||||
DailymotionIE.ie_key(), video_id)
|
||||
video_ids.add(video_id)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
|
98
youtube_dl/extractor/discoverygo.py
Normal file
98
youtube_dl/extractor/discoverygo.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
||||
'info_dict': {
|
||||
'id': '57a33c536b66d1cd0345eeb1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kiss First, Ask Questions Later!',
|
||||
'description': 'md5:fe923ba34050eae468bffae10831cb22',
|
||||
'duration': 2579,
|
||||
'series': 'Love at First Kiss',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 14,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
container = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||
webpage, 'video container'))
|
||||
|
||||
video = self._parse_json(
|
||||
unescapeHTML(container.get('data-video') or container.get('data-json')),
|
||||
display_id)
|
||||
|
||||
title = video['name']
|
||||
|
||||
stream = video['stream']
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||
stream_url = stream.get('%s%s' % (stream_kind, suffix))
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_kind == '':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif stream_kind == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = video.get('id') or display_id
|
||||
description = video.get('description', {}).get('detailed')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
series = video.get('show', {}).get('name')
|
||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
|
||||
tags = video.get('tags')
|
||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||
|
||||
subtitles = {}
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
@@ -17,7 +20,6 @@ class DrTuberIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'hot perky blonde naked golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
@@ -36,25 +38,29 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||
(r'class="title_watch"[^>]*><p>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
def extract_count(id_, name):
|
||||
def extract_count(id_, name, default=NO_DEFAULT):
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||
webpage, '%s count' % name, fatal=False))
|
||||
webpage, '%s count' % name, default=default, fatal=False))
|
||||
|
||||
like_count = extract_count('rate_likes', 'like')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
|
||||
comment_count = extract_count('comments_count', 'comment')
|
||||
|
||||
cats_str = self._search_regex(
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(
|
||||
r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -4,9 +4,10 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'http://www.engadget.com/video/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
@@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor):
|
||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -12,23 +10,22 @@ from ..utils import (
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
|
||||
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
'info_dict': {
|
||||
'id': '17561',
|
||||
'id': '667916',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20060212',
|
||||
'title': 'My Favorite Online Scrapbook Store',
|
||||
'view_count': int,
|
||||
'description': 'You\'ll find most everything you need at this virtual store front.',
|
||||
'uploader': 'Anna T.',
|
||||
'title': 'NYX Butter Lipstick Little Susie',
|
||||
'description': 'Goes on like butter, but looks better!',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Stephanie S.',
|
||||
'upload_date': '20150520',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
@@ -66,7 +63,7 @@ class ExpoTVIE(InfoExtractor):
|
||||
fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||
fatal=False))
|
||||
fatal=False), day_first=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -29,6 +29,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
@@ -159,8 +160,9 @@ from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShowsIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
@@ -220,6 +222,7 @@ from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import DiscoveryGoIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
@@ -270,10 +273,7 @@ from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import (
|
||||
FranceCultureIE,
|
||||
FranceCultureEmissionIE,
|
||||
)
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
@@ -288,8 +288,8 @@ from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
GameOnePlaylistIE,
|
||||
@@ -310,7 +310,6 @@ from .globo import (
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -325,6 +324,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
@@ -480,7 +480,6 @@ from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
@@ -492,8 +491,9 @@ from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
NationalGeographicIE,
|
||||
NationalGeographicChannelIE,
|
||||
NationalGeographicEpisodeGuideIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
@@ -530,7 +530,6 @@ from .nextmedia import (
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
)
|
||||
from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import (
|
||||
@@ -637,6 +636,7 @@ from .pluralsight import (
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import PokemonIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -695,6 +695,7 @@ from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rozhlas import RozhlasIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import RtlNlIE
|
||||
@@ -754,6 +755,7 @@ from .smotri import (
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import SonyLIVIE
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
@@ -809,7 +811,6 @@ from .tagesschau import (
|
||||
TagesschauPlayerIE,
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
@@ -893,10 +894,14 @@ from .tvc import (
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@@ -925,6 +930,11 @@ from .udemy import (
|
||||
from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usatoday import USATodayIE
|
||||
@@ -953,6 +963,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@@ -1006,6 +1017,7 @@ from .vk import (
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
@@ -1102,4 +1114,3 @@ from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
|
@@ -1,22 +1,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..utils import str_to_int
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
class ExtremeTubeIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P<display_id>[^/]+)-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'info_dict': {
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'id': '652431',
|
||||
'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow',
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'unknown',
|
||||
@@ -35,58 +30,22 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
if not info['title']:
|
||||
info['title'] = self._search_regex(
|
||||
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
info.update({
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -1,24 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
replace_extension,
|
||||
)
|
||||
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
|
||||
_VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor):
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
'description': 'iPad mini with Retina Display review',
|
||||
'duration': 177,
|
||||
'uploader': 'engadget',
|
||||
'upload_date': '20131115',
|
||||
'timestamp': 1384515288,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||
@@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.5min.com/518726732/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://delivery.vidible.tv/aol?playList=518013791',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_ERRORS = {
|
||||
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
|
||||
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
|
||||
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
|
||||
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
|
||||
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
}
|
||||
_QUALITIES = {
|
||||
1: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
2: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
4: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
8: {
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
},
|
||||
16: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
32: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
64: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
128: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sid = mobj.group('sid')
|
||||
|
||||
if mobj.group('query'):
|
||||
qs = compat_parse_qs(mobj.group('query'))
|
||||
if not qs.get('playList'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
video_id = qs['playList'][0]
|
||||
if qs.get('sid'):
|
||||
sid = qs['sid'][0]
|
||||
|
||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||
if not sid:
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||
|
||||
response = self._download_json(
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'func': 'GetResults',
|
||||
'playlist': video_id,
|
||||
'sid': sid,
|
||||
'isPlayerSeed': 'true',
|
||||
'url': embed_url,
|
||||
}),
|
||||
video_id)
|
||||
if not response['success']:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (
|
||||
self.IE_NAME,
|
||||
self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
|
||||
expected=True)
|
||||
info = response['binding'][0]
|
||||
|
||||
formats = []
|
||||
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
|
||||
for rendition in info['Renditions']:
|
||||
if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
|
||||
continue
|
||||
else:
|
||||
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
|
||||
quality = self._QUALITIES.get(rendition['ID'], {})
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
|
||||
'url': rendition_url,
|
||||
'width': quality.get('width'),
|
||||
'height': quality.get('height'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['Title'],
|
||||
'thumbnail': info.get('ThumbURL'),
|
||||
'duration': parse_duration(info.get('Duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
|
@@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor):
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
|
@@ -5,8 +5,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
@@ -15,7 +15,10 @@ class Formula1IE(InfoExtractor):
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
@@ -43,14 +43,14 @@ class FourTubeIE(InfoExtractor):
|
||||
'uploadDate', webpage))
|
||||
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
categories_html = self._search_regex(
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = None
|
||||
if categories_html:
|
||||
@@ -59,10 +59,10 @@ class FourTubeIE(InfoExtractor):
|
||||
r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
@@ -29,11 +32,12 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
video_id)['release_url']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(
|
||||
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@@ -2,104 +2,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': '4795174',
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
'display_id': 'rendez-vous-au-pays-des-geeks',
|
||||
'ext': 'mp3',
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'alt_title': 'Carnet nomade | 13-14',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': 're:^https?://.*\\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'thumbnail': r're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche',
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_from_player(self, url, video_id):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
video_path = self._search_regex(
|
||||
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
|
||||
video_url = compat_urlparse.urljoin(url, video_path)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<a id="player".*?data-date="([0-9]+)"',
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
|
||||
webpage, 'video path')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r'<a id="player".*?>\s+<img src="([^"]+)"',
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
display_id = self._search_regex(
|
||||
r'<span class="path-diffusion">emission-(.*?)</span>', webpage, 'display_id')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
|
||||
alt_title = self._html_search_regex(
|
||||
r'<span class="title">(.*?)</span>',
|
||||
webpage, 'alt_title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'vcodec': vcodec,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'display_id': display_id,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_player(url, video_id)
|
||||
|
||||
|
||||
class FranceCultureEmissionIE(FranceCultureIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
|
||||
'info_dict': {
|
||||
'title': 'Jean-Gabriel Périot, cinéaste',
|
||||
'alt_title': 'Les Carnets de la création',
|
||||
'id': '5093239',
|
||||
'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
|
||||
'ext': 'mp3',
|
||||
'timestamp': 1444762500,
|
||||
'upload_date': '20151013',
|
||||
'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_path = self._html_search_regex(
|
||||
r'<a class="rf-player-open".*?href="([^"]+)"', webpage, 'video path', 'no_path_player')
|
||||
if video_path == 'no_path_player':
|
||||
raise ExtractorError('no player : no sound in this page.', expected=True)
|
||||
new_id = self._search_regex('play=(?P<id>[0-9]+)', video_path, 'new_id', group='id')
|
||||
video_url = compat_urlparse.urljoin(url, video_path)
|
||||
return self._extract_from_player(video_url, new_id)
|
||||
|
@@ -131,7 +131,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -206,6 +206,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'uploader_id': 'x2q2ez',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
70
youtube_dl/extractor/fxnetworks.py
Normal file
70
youtube_dl/extractor/fxnetworks.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
extract_attributes,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/719841347694',
|
||||
'md5': '1447d4722e42ebca19e5232ab93abb22',
|
||||
'info_dict': {
|
||||
'id': '719841347694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vanpage',
|
||||
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20160706',
|
||||
'timestamp': 1467844741,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -1,76 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# YouTube embed video
|
||||
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
'md5': '5208d3a17adeaef829a7861887cb9029',
|
||||
'info_dict': {
|
||||
'id': 'HkSQKetlGOU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
||||
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
||||
'uploader': 'Gamekings Vault',
|
||||
'upload_date': '20151123',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# vimeo video
|
||||
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||
'info_dict': {
|
||||
'id': 'the-legend-of-zelda-majoras-mask',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Legend of Zelda: Majora’s Mask',
|
||||
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
||||
|
||||
# Check if a YouTube embed is used
|
||||
if YoutubeIE.suitable(playlist_id):
|
||||
return self.url_result(playlist_id, ie='Youtube')
|
||||
|
||||
playlist = self._download_xml(
|
||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||
}
|
||||
|
||||
item = playlist.find('./channel/item')
|
||||
|
||||
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
|
||||
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -71,6 +71,8 @@ from .vessel import VesselIE
|
||||
from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
from .soundcloud import SoundcloudIE
|
||||
from .vbox7 import Vbox7IE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -474,7 +476,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||
'info_dict': {
|
||||
'id': '25753',
|
||||
'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@@ -641,6 +643,8 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||
'timestamp': 1349922600,
|
||||
'upload_date': '20121011',
|
||||
},
|
||||
},
|
||||
# YouTube embed via <data-embed-url="">
|
||||
@@ -782,6 +786,15 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20141029',
|
||||
}
|
||||
},
|
||||
# Soundcloud multiple embeds
|
||||
{
|
||||
'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
|
||||
'info_dict': {
|
||||
'id': '52809',
|
||||
'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
# Livestream embed
|
||||
{
|
||||
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
|
||||
@@ -857,6 +870,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
@@ -1360,6 +1374,18 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
|
||||
'info_dict': {
|
||||
'id': '1c7141f46c',
|
||||
'ext': 'mp4',
|
||||
'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [Vbox7IE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -1996,12 +2022,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(myvi_url)
|
||||
|
||||
# Look for embedded soundcloud player
|
||||
mobj = re.search(
|
||||
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||
if soundcloud_urls:
|
||||
return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
@@ -2197,6 +2220,14 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
|
||||
|
||||
# Look for VODPlatform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
if instagram_embed_url is not None:
|
||||
@@ -2221,10 +2252,15 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# Look for VBOX7 embeds
|
||||
vbox7_url = Vbox7IE._extract_url(webpage)
|
||||
if vbox7_url:
|
||||
return self.url_result(vbox7_url, Vbox7IE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default=None, expected_type='VideoObject')
|
||||
if json_ld and json_ld.get('url'):
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
info_dict.update({
|
||||
'title': video_title or info_dict['title'],
|
||||
'description': video_description,
|
||||
|
@@ -1,48 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GoldenMoustacheIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
|
||||
'md5': '0f904432fa07da5054d6c8beb5efb51a',
|
||||
'info_dict': {
|
||||
'id': '3700',
|
||||
'ext': 'mp4',
|
||||
'title': 'Suricate - Le Poker',
|
||||
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
|
||||
'md5': '27f0c50fb4dd5f01dc9082fc67cd5700',
|
||||
'info_dict': {
|
||||
'id': '55249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
|
||||
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
48
youtube_dl/extractor/hgtv.py
Normal file
48
youtube_dl/extractor/hgtv.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class HGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'aFH__I_5FBOX',
|
||||
'ext': 'mp4',
|
||||
'title': 'Overnight Success',
|
||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'timestamp': 1470320034,
|
||||
'upload_date': '20160804',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
||||
webpage, 'embed vars'), display_id, js_to_json)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'series': embed_vars.get('show'),
|
||||
'season_number': int_or_none(embed_vars.get('season')),
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -50,12 +50,10 @@ class ImgurIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, video_id), video_id)
|
||||
|
||||
width = int_or_none(self._search_regex(
|
||||
r'<param name="width" value="([0-9]+)"',
|
||||
webpage, 'width', fatal=False))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'<param name="height" value="([0-9]+)"',
|
||||
webpage, 'height', fatal=False))
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, default=None))
|
||||
height = int_or_none(self._og_search_property(
|
||||
'video:height', webpage, default=None))
|
||||
|
||||
video_elements = self._search_regex(
|
||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||
|
@@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'BA-pQFBG8HZ',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'britneyspears',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1453760977,
|
||||
|
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,74 +30,86 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
entries = []
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv',
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('file') and track.get('kind') == 'captions':
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track['file'])
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('file') and track.get('kind') == 'captions':
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track['file'])
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class JWPlatformIE(JWPlatformBaseIE):
|
||||
|
@@ -62,6 +62,11 @@ class KalturaIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# video with subtitles
|
||||
'url': 'kaltura:111032:1_cw786r8q',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -130,7 +135,6 @@ class KalturaIE(InfoExtractor):
|
||||
video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
|
||||
|
||||
def _get_video_info(self, video_id, partner_id, service_url=None):
|
||||
signature = self._get_kaltura_signature(video_id, partner_id, service_url)
|
||||
actions = [
|
||||
{
|
||||
'action': 'null',
|
||||
@@ -138,18 +142,30 @@ class KalturaIE(InfoExtractor):
|
||||
'clientTag': 'kdp:v3.8.5',
|
||||
'format': 1, # JSON, 2 = XML, 3 = PHP
|
||||
'service': 'multirequest',
|
||||
'ks': signature,
|
||||
},
|
||||
{
|
||||
'expiry': 86400,
|
||||
'service': 'session',
|
||||
'action': 'startWidgetSession',
|
||||
'widgetId': '_%s' % partner_id,
|
||||
},
|
||||
{
|
||||
'action': 'get',
|
||||
'entryId': video_id,
|
||||
'service': 'baseentry',
|
||||
'version': '-1',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
{
|
||||
'action': 'getbyentryid',
|
||||
'entryId': video_id,
|
||||
'service': 'flavorAsset',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
{
|
||||
'action': 'list',
|
||||
'filter:entryIdEqual': video_id,
|
||||
'service': 'caption_captionasset',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
]
|
||||
return self._kaltura_api_call(
|
||||
@@ -161,8 +177,9 @@ class KalturaIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id, entry_id = mobj.group('partner_id', 'id')
|
||||
ks = None
|
||||
captions = None
|
||||
if partner_id and entry_id:
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
||||
else:
|
||||
path, query = mobj.group('path', 'query')
|
||||
if not path and not query:
|
||||
@@ -181,7 +198,7 @@ class KalturaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
if 'entry_id' in params:
|
||||
entry_id = params['entry_id'][0]
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
|
||||
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
|
||||
reference_id = params['flashvars[referenceId]'][0]
|
||||
webpage = self._download_webpage(url, reference_id)
|
||||
@@ -217,7 +234,7 @@ class KalturaIE(InfoExtractor):
|
||||
formats = []
|
||||
for f in flavor_assets:
|
||||
# Continue if asset is not ready
|
||||
if f['status'] != 2:
|
||||
if f.get('status') != 2:
|
||||
continue
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
@@ -240,13 +257,24 @@ class KalturaIE(InfoExtractor):
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
self._check_formats(formats, entry_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if captions:
|
||||
for caption in captions.get('objects', []):
|
||||
# Continue if caption is not ready
|
||||
if f.get('status') != 2:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
|
||||
'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
|
||||
'ext': caption.get('fileExt'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': entry_id,
|
||||
'title': info['name'],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': clean_html(info.get('description')),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
|
@@ -3,64 +3,124 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_decrypt_text
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
url_basename,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KeezMoviesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||
'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
|
||||
'info_dict': {
|
||||
'id': '1214711',
|
||||
'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
|
||||
'ext': 'mp4',
|
||||
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.keezmovies.com/video/1214711',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_info(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
# embedded video
|
||||
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
|
||||
if mobj:
|
||||
embedded_url = mobj.group(1)
|
||||
return self.url_result(embedded_url)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||
flashvars = self._parse_json(self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers={'Cookie': 'age_verified=1'})
|
||||
|
||||
formats = []
|
||||
for height in (180, 240, 480):
|
||||
if flashvars.get('quality_%dp' % height):
|
||||
video_url = flashvars['quality_%dp' % height]
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
'height': height,
|
||||
'format_id': '%dp' % height,
|
||||
}
|
||||
filename_parts = url_basename(video_url).split('_')
|
||||
if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
|
||||
a_format['tbr'] = int(filename_parts[1][:-1])
|
||||
formats.append(a_format)
|
||||
format_urls = set()
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
title = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
encrypted = False
|
||||
|
||||
return {
|
||||
def extract_format(format_url, height=None):
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
format_urls.add(format_url)
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
|
||||
if not height:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
|
||||
if encrypted:
|
||||
format_url = aes_decrypt_text(
|
||||
video_url, title, 32).decode('utf-8')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'flashvars\s*=\s*({.+?});', webpage,
|
||||
'flashvars', default='{}'),
|
||||
display_id, fatal=False)
|
||||
|
||||
if flashvars:
|
||||
title = flashvars.get('video_title')
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
encrypted = flashvars.get('encrypted') is True
|
||||
for key, value in flashvars.items():
|
||||
mobj = re.search(r'quality_(\d+)[pP]', key)
|
||||
if mobj:
|
||||
extract_format(value, int(mobj.group(1)))
|
||||
video_url = flashvars.get('video_url')
|
||||
if video_url and determine_ext(video_url, None):
|
||||
extract_format(video_url)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
|
||||
webpage, 'video url', default=None, group='url')
|
||||
if video_url:
|
||||
extract_format(compat_urllib_parse_unquote(video_url))
|
||||
|
||||
if not formats:
|
||||
if 'title="This video is no longer available"' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)', webpage, 'title')
|
||||
|
||||
return webpage, {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'display_id': display_id,
|
||||
'title': strip_or_none(title),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': flashvars.get('image_url')
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, info = self._extract_info(url)
|
||||
info['view_count'] = str_to_int(self._search_regex(
|
||||
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
|
||||
return info
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
@@ -242,8 +243,9 @@ class KuwoSingerIE(InfoExtractor):
|
||||
query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
|
||||
|
||||
return [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
|
||||
self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo')
|
||||
for song_url in re.findall(
|
||||
r'<div[^>]+class="name"><a[^>]+href="(/yinyue/\d+)',
|
||||
webpage)
|
||||
]
|
||||
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -96,7 +99,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
|
||||
iframe_links = re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
|
||||
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
|
||||
webpage)
|
||||
|
||||
if not video_urls and not iframe_links:
|
||||
@@ -164,9 +167,9 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'life:embed'
|
||||
_VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
_VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
|
||||
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
|
||||
'info_dict': {
|
||||
@@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor):
|
||||
'title': 'e50c2dec2867350528e2574c899b8291',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# with 1080p
|
||||
'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
thumbnail = None
|
||||
formats = []
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
def extract_m3u8(manifest_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
|
||||
def extract_original(original_url):
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': determine_ext(original_url, None),
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
|
||||
video_id).get('playlist', {})
|
||||
if playlist:
|
||||
master = playlist.get('master')
|
||||
if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
|
||||
extract_m3u8(compat_urlparse.urljoin(url, master))
|
||||
original = playlist.get('original')
|
||||
if isinstance(original, compat_str):
|
||||
extract_original(original)
|
||||
thumbnail = playlist.get('image')
|
||||
|
||||
# Old rendition fallback
|
||||
if not formats:
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
extract_m3u8(video_url)
|
||||
else:
|
||||
extract_original(video_url)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
thumbnail = thumbnail or self._search_regex(
|
||||
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
|
@@ -37,11 +37,12 @@ class LimelightBaseIE(InfoExtractor):
|
||||
|
||||
for stream in streams:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
if not stream_url or stream.get('drmProtected'):
|
||||
continue
|
||||
if '.f4m' in stream_url:
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, fatal=False))
|
||||
stream_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
@@ -50,13 +51,19 @@ class LimelightBaseIE(InfoExtractor):
|
||||
'fps': float_or_none(stream.get('videoFrameRate')),
|
||||
'width': int_or_none(stream.get('videoWidthInPixels')),
|
||||
'height': int_or_none(stream.get('videoHeightInPixels')),
|
||||
'ext': determine_ext(stream_url)
|
||||
'ext': ext,
|
||||
}
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
||||
if rtmp:
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
|
||||
'format_id': format_id.replace('rtmp', 'http'),
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
@@ -68,18 +75,23 @@ class LimelightBaseIE(InfoExtractor):
|
||||
|
||||
for mobile_url in mobile_urls:
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
if not media_url:
|
||||
continue
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
if not media_url or format_id == 'Widevine':
|
||||
continue
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': format_id,
|
||||
'preference': -1,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -145,7 +157,7 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||
'info_dict': {
|
||||
'id': '3ffd040b522b4485b6d84effc750cd86',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'HaP and the HB Prince Trailer',
|
||||
'description': 'md5:8005b944181778e313d95c1237ddb640',
|
||||
'thumbnail': 're:^https?://.*\.jpeg$',
|
||||
@@ -154,27 +166,23 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'upload_date': '20090604',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video with subtitles
|
||||
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
|
||||
'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
|
||||
'info_dict': {
|
||||
'id': 'a3e00274d4564ec4a9b29b9466432335',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '3Play Media Overview Video',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpeg$',
|
||||
'duration': 78.101,
|
||||
'timestamp': 1338929955,
|
||||
'upload_date': '20120605',
|
||||
'subtitles': 'mincount:9',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||
'only_matching': True,
|
||||
|
@@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||
'info_dict': {
|
||||
@@ -20,7 +20,11 @@ class MGTVIE(InfoExtractor):
|
||||
'duration': 7461,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# no tbr extracted from stream_url
|
||||
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -41,7 +45,8 @@ class MGTVIE(InfoExtractor):
|
||||
def extract_format(stream_url, format_id, idx, query={}):
|
||||
format_info = self._download_json(
|
||||
stream_url, video_id,
|
||||
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
|
||||
note='Download video info for format %s' % (format_id or '#%d' % idx),
|
||||
query=query)
|
||||
return {
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
|
@@ -1,53 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
|
||||
_TEST = {
|
||||
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||
class MofosexIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
|
||||
'md5': '39a15853632b7b2e5679f92f69b78e91',
|
||||
'info_dict': {
|
||||
'id': '5018',
|
||||
'id': '318131',
|
||||
'display_id': 'amateur-teen-playing-and-masturbating-318131',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanese Teen Music Video',
|
||||
'title': 'amateur teen playing and masturbating',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20121114',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# This video is no longer available
|
||||
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
|
||||
'like count', fatal=False))
|
||||
dislike_count = int_or_none(self._search_regex(
|
||||
r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
|
||||
'like count', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = '-'.join(format)
|
||||
info.update({
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
})
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
return info
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
@@ -36,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
return uri.split(':')[-1]
|
||||
|
||||
# This was originally implemented for ComedyCentral, but it also works here
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
return rtmp_video_url
|
||||
return {'rtmp': rtmp_video_url}
|
||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||
return base + m.group('finalid')
|
||||
return {'http': base + m.group('finalid')}
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
return self._FEED_URL
|
||||
@@ -86,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
rtmp_video_url = rendition.find('./src').text
|
||||
if rtmp_video_url.endswith('siteunavail.png'):
|
||||
continue
|
||||
new_url = self._transform_rtmp_url(rtmp_video_url)
|
||||
formats.append({
|
||||
new_urls = self._transform_rtmp_url(rtmp_video_url)
|
||||
formats.extend([{
|
||||
'ext': 'flv' if new_url.startswith('rtmp') else ext,
|
||||
'url': new_url,
|
||||
'format_id': rendition.get('bitrate'),
|
||||
'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
|
||||
'width': int(rendition.get('width')),
|
||||
'height': int(rendition.get('height')),
|
||||
})
|
||||
} for kind, new_url in new_urls.items()])
|
||||
except (KeyError, TypeError):
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
self._sort_formats(formats)
|
||||
@@ -136,6 +137,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
description = strip_or_none(xpath_text(itemdoc, 'description'))
|
||||
|
||||
timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
|
||||
|
||||
title_el = None
|
||||
if title_el is None:
|
||||
title_el = find_xpath_attr(
|
||||
@@ -168,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
@@ -186,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
idoc = self._download_xml(
|
||||
url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
|
||||
title = xpath_text(idoc, './channel/title')
|
||||
description = xpath_text(idoc, './channel/description')
|
||||
|
||||
return self.playlist_result(
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')],
|
||||
playlist_title=title, playlist_description=description)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
@@ -233,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
|
||||
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
|
||||
'timestamp': 1400126400,
|
||||
'upload_date': '20140515',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -275,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
'timestamp': 1352610000,
|
||||
'upload_date': '20121111',
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -301,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
return self._get_videos_info(uri)
|
||||
|
||||
|
||||
class MTVIggyIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtviggy.com'
|
||||
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
|
||||
'info_dict': {
|
||||
'id': '984696',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
|
||||
}
|
||||
}
|
||||
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
||||
|
||||
|
||||
class MTVDEIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
||||
@@ -322,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
||||
'info_dict': {
|
||||
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'MusicVideo_cro-traum',
|
||||
'description': 'Cro - Traum',
|
||||
},
|
||||
@@ -330,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked at Travis CI',
|
||||
}, {
|
||||
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
||||
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f5ae778b9832cc837189',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked at Travis CI',
|
||||
}, {
|
||||
# single video in pagePlaylist with different id
|
||||
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-4e760566473c4c8c5344',
|
||||
@@ -355,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -367,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
||||
video_id)
|
||||
|
||||
def _mrss_url(item):
|
||||
return item['mrss'] + item.get('mrssvars', '')
|
||||
|
||||
# news pages contain single video in playlist with different id
|
||||
if len(playlist) == 1:
|
||||
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
|
||||
return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
|
||||
|
||||
for item in playlist:
|
||||
item_id = item.get('id')
|
||||
if item_id and compat_str(item_id) == video_id:
|
||||
return self._get_videos_info_from_url(item['mrss'], video_id)
|
||||
return self._get_videos_info_from_url(_mrss_url(item), video_id)
|
||||
|
@@ -36,7 +36,7 @@ class MuenchenTVIE(InfoExtractor):
|
||||
title = self._live_title(self._og_search_title(webpage))
|
||||
|
||||
data_js = self._search_regex(
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),',
|
||||
webpage, 'playlist configuration')
|
||||
data_json = js_to_json(data_js)
|
||||
data = json.loads(data_json)[0]
|
||||
|
@@ -1,16 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
update_url_query,
|
||||
get_element_by_class,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo'
|
||||
class NationalGeographicVideoIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:video'
|
||||
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TESTS = [
|
||||
@@ -62,16 +65,16 @@ class NationalGeographicIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicChannelIE(ThePlatformIE):
|
||||
IE_NAME = 'natgeo:channel'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
|
||||
class NationalGeographicIE(AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'nB5vIAfmyllm',
|
||||
'id': 'vKInpacll2pC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uncovering a Universal Knowledge',
|
||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||
@@ -85,7 +88,7 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': '3TmMv9OvGwIR',
|
||||
'id': 'Pok5lWCkiEFA',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Stunning Red Bird of Paradise',
|
||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||
@@ -95,6 +98,10 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -112,7 +119,7 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
||||
auth_resource_id = self._search_regex(
|
||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
||||
webpage, 'auth resource id')
|
||||
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
|
||||
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -122,3 +129,40 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
||||
{'force_smil_url': True}),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicEpisodeGuideIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:episodeguide'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
|
||||
'info_dict': {
|
||||
'id': 'the-story-of-god-with-morgan-freeman-season-1',
|
||||
'title': 'The Story of God with Morgan Freeman - Season 1',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
|
||||
'info_dict': {
|
||||
'id': 'underworld-inc-season-2',
|
||||
'title': 'Underworld, Inc. - Season 2',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show = get_element_by_class('show', webpage)
|
||||
selected_season = self._search_regex(
|
||||
r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
|
||||
webpage, 'selected season')
|
||||
entries = [
|
||||
self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
|
||||
for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
|
||||
'%s - %s' % (show, selected_season))
|
||||
|
@@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,48 +49,74 @@ class NaverIE(InfoExtractor):
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
vid = m_id.group(1)
|
||||
key = m_id.group(2)
|
||||
query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, })
|
||||
query_urls = compat_urllib_parse_urlencode({
|
||||
'masterVid': vid,
|
||||
'protocol': 'p2p',
|
||||
'inKey': key,
|
||||
})
|
||||
info = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||
video_id, 'Downloading video info')
|
||||
urls = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
|
||||
video_id, query={
|
||||
'key': m_id.group(2),
|
||||
})
|
||||
meta = video_data['meta']
|
||||
title = meta['subject']
|
||||
formats = []
|
||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||
domain = format_el.find('Domain').text
|
||||
uri = format_el.find('uri').text
|
||||
f = {
|
||||
'url': compat_urlparse.urljoin(domain, uri),
|
||||
'ext': 'mp4',
|
||||
'width': int(format_el.find('width').text),
|
||||
'height': int(format_el.find('height').text),
|
||||
}
|
||||
if domain.startswith('rtmp'):
|
||||
# urlparse does not support custom schemes
|
||||
# https://bugs.python.org/issue18828
|
||||
f.update({
|
||||
'url': domain + uri,
|
||||
'ext': 'flv',
|
||||
'rtmp_protocol': '1', # rtmpt
|
||||
|
||||
def extract_formats(streams, stream_type, query={}):
|
||||
for stream in streams:
|
||||
stream_url = stream.get('source')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_url = update_url_query(stream_url, query)
|
||||
encoding_option = stream.get('encodingOption', {})
|
||||
bitrate = stream.get('bitrate', {})
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')),
|
||||
'url': stream_url,
|
||||
'width': int_or_none(encoding_option.get('width')),
|
||||
'height': int_or_none(encoding_option.get('height')),
|
||||
'vbr': int_or_none(bitrate.get('video')),
|
||||
'abr': int_or_none(bitrate.get('audio')),
|
||||
'filesize': int_or_none(stream.get('size')),
|
||||
'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
extract_formats(video_data.get('videos', {}).get('list', []), 'H264')
|
||||
for stream_set in video_data.get('streams', []):
|
||||
query = {}
|
||||
for param in stream_set.get('keys', []):
|
||||
query[param['name']] = param['value']
|
||||
stream_type = stream_set.get('type')
|
||||
videos = stream_set.get('videos')
|
||||
if videos:
|
||||
extract_formats(videos, stream_type, query)
|
||||
elif stream_type == 'HLS':
|
||||
stream_url = stream_set.get('source')
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(stream_url, query), video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', {}).get('list', []):
|
||||
caption_url = caption.get('source')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({
|
||||
'url': caption_url,
|
||||
})
|
||||
|
||||
upload_date = self._search_regex(
|
||||
r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('.', '')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('Subject').text,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
||||
'view_count': int(info.find('PlayCount').text),
|
||||
'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage),
|
||||
'view_count': int_or_none(meta.get('count')),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
@@ -1,30 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class NextMovieIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nextmovie.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
|
||||
_FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
|
||||
'md5': '09a9199f2f11f10107d04fcb153218aa',
|
||||
'info_dict': {
|
||||
'id': '961726',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Muppets\' Gravity',
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
'feed': '1505',
|
||||
'mgid': uri,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
mgid = self._match_id(url)
|
||||
return self._get_videos_info(mgid)
|
@@ -7,6 +7,7 @@ from ..utils import update_url_query
|
||||
|
||||
|
||||
class NickIE(MTVServicesInfoExtractor):
|
||||
# None of videos on the website are still alive?
|
||||
IE_NAME = 'nick.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||
|
@@ -11,70 +11,64 @@ from ..utils import (
|
||||
|
||||
class NTVRuIE(InfoExtractor):
|
||||
IE_NAME = 'ntv.ru'
|
||||
_VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
|
||||
'info_dict': {
|
||||
'id': '746000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 136,
|
||||
},
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
|
||||
'info_dict': {
|
||||
'id': '746000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 136,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
'md5': 'adecff79691b4d71e25220a191477124',
|
||||
'info_dict': {
|
||||
'id': '750370',
|
||||
'ext': 'mp4',
|
||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 172,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
'md5': 'adecff79691b4d71e25220a191477124',
|
||||
'info_dict': {
|
||||
'id': '750370',
|
||||
'ext': 'mp4',
|
||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 172,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
'info_dict': {
|
||||
'id': '747480',
|
||||
'ext': 'mp4',
|
||||
'title': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'description': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 1496,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
'info_dict': {
|
||||
'id': '747480',
|
||||
'ext': 'mp4',
|
||||
'title': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'description': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 1496,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||
'md5': 'f825770930937aa7e5aca0dc0d29319a',
|
||||
'info_dict': {
|
||||
'id': '1007609',
|
||||
'ext': 'mp4',
|
||||
'title': 'Остросюжетный фильм «Кома»',
|
||||
'description': 'Остросюжетный фильм «Кома»',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 5592,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||
'md5': 'f825770930937aa7e5aca0dc0d29319a',
|
||||
'info_dict': {
|
||||
'id': '1007609',
|
||||
'ext': 'mp4',
|
||||
'title': 'Остросюжетный фильм «Кома»',
|
||||
'description': 'Остросюжетный фильм «Кома»',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 5592,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
|
||||
'info_dict': {
|
||||
'id': '751482',
|
||||
'ext': 'mp4',
|
||||
'title': '«Дело врачей»: «Деревце жизни»',
|
||||
'description': '«Дело врачей»: «Деревце жизни»',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 2590,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
|
||||
'info_dict': {
|
||||
'id': '751482',
|
||||
'ext': 'mp4',
|
||||
'title': '«Дело врачей»: «Деревце жизни»',
|
||||
'description': '«Дело врачей»: «Деревце жизни»',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'duration': 2590,
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
@@ -87,11 +81,21 @@ class NTVRuIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
video_url = self._og_search_property(
|
||||
('video', 'video:iframe'), webpage, default=None)
|
||||
if video_url:
|
||||
video_id = self._search_regex(
|
||||
r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
|
||||
video_url, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
|
||||
player = self._download_xml(
|
||||
'http://www.ntv.ru/vi%s/' % video_id,
|
||||
video_id, 'Downloading video XML')
|
||||
|
||||
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
|
||||
description = clean_html(xpath_text(player, './data/description', 'description'))
|
||||
|
||||
|
@@ -1,15 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import re
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
decode_png,
|
||||
determine_ext,
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -41,60 +40,6 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def openload_level2_debase(m):
|
||||
radix, num = int(m.group(1)) + 27, int(m.group(2))
|
||||
return '"' + encode_base_n(num, radix) + '"'
|
||||
|
||||
@classmethod
|
||||
def openload_level2(cls, txt):
|
||||
# The function name is ǃ \u01c3
|
||||
# Using escaped unicode literals does not work in Python 3.2
|
||||
return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '')
|
||||
|
||||
# Openload uses a variant of aadecode
|
||||
# openload_decode and related functions are originally written by
|
||||
# vitas@matfyz.cz and released with public domain
|
||||
# See https://github.com/rg3/youtube-dl/issues/8489
|
||||
@classmethod
|
||||
def openload_decode(cls, txt):
|
||||
symbol_table = [
|
||||
('_', '(゚Д゚) [゚Θ゚]'),
|
||||
('a', '(゚Д゚) [゚ω゚ノ]'),
|
||||
('b', '(゚Д゚) [゚Θ゚ノ]'),
|
||||
('c', '(゚Д゚) [\'c\']'),
|
||||
('d', '(゚Д゚) [゚ー゚ノ]'),
|
||||
('e', '(゚Д゚) [゚Д゚ノ]'),
|
||||
('f', '(゚Д゚) [1]'),
|
||||
|
||||
('o', '(゚Д゚) [\'o\']'),
|
||||
('u', '(o゚ー゚o)'),
|
||||
('c', '(゚Д゚) [\'c\']'),
|
||||
|
||||
('7', '((゚ー゚) + (o^_^o))'),
|
||||
('6', '((o^_^o) +(o^_^o) +(c^_^o))'),
|
||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
||||
('4', '(-~3)'),
|
||||
('3', '(-~-~1)'),
|
||||
('2', '(-~1)'),
|
||||
('1', '(-~0)'),
|
||||
('0', '((c^_^o)-(c^_^o))'),
|
||||
]
|
||||
delim = '(゚Д゚)[゚ε゚]+'
|
||||
ret = ''
|
||||
for aachar in txt.split(delim):
|
||||
for val, pat in symbol_table:
|
||||
aachar = aachar.replace(pat, val)
|
||||
aachar = aachar.replace('+ ', '')
|
||||
m = re.match(r'^\d+', aachar)
|
||||
if m:
|
||||
ret += compat_chr(int(m.group(0), 8))
|
||||
else:
|
||||
m = re.match(r'^u([\da-f]+)', aachar)
|
||||
if m:
|
||||
ret += compat_chr(int(m.group(1), 16))
|
||||
return cls.openload_level2(ret)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -102,29 +47,77 @@ class OpenloadIE(InfoExtractor):
|
||||
if 'File not found' in webpage:
|
||||
raise ExtractorError('File not found', expected=True)
|
||||
|
||||
code = self._search_regex(
|
||||
r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
|
||||
webpage, 'JS code')
|
||||
# The following extraction logic is proposed by @Belderak and @gdkchan
|
||||
# and declared to be used freely in youtube-dl
|
||||
# See https://github.com/rg3/youtube-dl/issues/9706
|
||||
|
||||
decoded = self.openload_decode(code)
|
||||
numbers_js = self._download_webpage(
|
||||
'https://openload.co/assets/js/obfuscator/n.js', video_id,
|
||||
note='Downloading signature numbers')
|
||||
signums = self._search_regex(
|
||||
r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]',
|
||||
numbers_js, 'signature numbers', group='data')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'return\s+"(https?://[^"]+)"', decoded, 'video URL')
|
||||
linkimg_uri = self._search_regex(
|
||||
r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image')
|
||||
linkimg = self._request_webpage(
|
||||
linkimg_uri, video_id, note=False).read()
|
||||
|
||||
width, height, pixels = decode_png(linkimg)
|
||||
|
||||
output = ''
|
||||
for y in range(height):
|
||||
for x in range(width):
|
||||
r, g, b = pixels[y][3 * x:3 * x + 3]
|
||||
if r == 0 and g == 0 and b == 0:
|
||||
break
|
||||
else:
|
||||
output += compat_chr(r)
|
||||
output += compat_chr(g)
|
||||
output += compat_chr(b)
|
||||
|
||||
img_str_length = len(output) // 200
|
||||
img_str = [[0 for x in range(img_str_length)] for y in range(10)]
|
||||
|
||||
sig_str_length = len(signums) // 260
|
||||
sig_str = [[0 for x in range(sig_str_length)] for y in range(10)]
|
||||
|
||||
for i in range(10):
|
||||
for j in range(img_str_length):
|
||||
begin = i * img_str_length * 20 + j * 20
|
||||
img_str[i][j] = output[begin:begin + 20]
|
||||
for j in range(sig_str_length):
|
||||
begin = i * sig_str_length * 26 + j * 26
|
||||
sig_str[i][j] = signums[begin:begin + 26]
|
||||
|
||||
parts = []
|
||||
# TODO: find better names for str_, chr_ and sum_
|
||||
str_ = ''
|
||||
for i in [2, 3, 5, 7]:
|
||||
str_ = ''
|
||||
sum_ = float(99)
|
||||
for j in range(len(sig_str[i])):
|
||||
for chr_idx in range(len(img_str[i][j])):
|
||||
if sum_ > float(122):
|
||||
sum_ = float(98)
|
||||
chr_ = compat_chr(int(math.floor(sum_)))
|
||||
if sig_str[i][j][chr_idx] == chr_ and j >= len(str_):
|
||||
sum_ += float(2.5)
|
||||
str_ += img_str[i][j][chr_idx]
|
||||
parts.append(str_.replace(',', ''))
|
||||
|
||||
video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0])
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
ext = mimetype2ext(self._search_regex(
|
||||
r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded,
|
||||
'mimetype', default=None, group='mimetype')) or determine_ext(
|
||||
video_url, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
# Seems all videos have extensions in their titles
|
||||
'ext': determine_ext(title),
|
||||
}
|
||||
|
@@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor):
|
||||
class ORFOE1IE(InfoExtractor):
|
||||
IE_NAME = 'orf:oe1'
|
||||
IE_DESC = 'Radio Österreich 1'
|
||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
|
||||
|
||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
@@ -201,7 +201,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'description': 'md5:31b664af3c65fd07fa460d306b837d00',
|
||||
'duration': 3190,
|
||||
},
|
||||
},
|
||||
@@ -212,7 +212,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'description': 'md5:5979a4d069b157f622d02bff62fbe654',
|
||||
'duration': 5050,
|
||||
},
|
||||
},
|
||||
@@ -223,7 +223,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||
'description': 'md5:86ab9a3d04458b876147b355788b8781',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
@@ -268,7 +268,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||
'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
|
||||
'duration': 682,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -294,13 +294,13 @@ class PBSIE(InfoExtractor):
|
||||
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
||||
# https://github.com/rg3/youtube-dl/issues/7059)
|
||||
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
||||
'md5': '84ced42850d78f1d4650297356e95e6f',
|
||||
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
|
||||
'info_dict': {
|
||||
'id': '2365546844',
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
'ext': 'mp4',
|
||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||
'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -313,7 +313,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||
'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -324,7 +324,7 @@ class PBSIE(InfoExtractor):
|
||||
{
|
||||
# Serves hd only via wigget/partnerplayer page
|
||||
'url': 'http://www.pbs.org/video/2365641075/',
|
||||
'md5': 'acfd4c400b48149a44861cb16dd305cf',
|
||||
'md5': 'fdf907851eab57211dd589cf12006666',
|
||||
'info_dict': {
|
||||
'id': '2365641075',
|
||||
'ext': 'mp4',
|
||||
@@ -353,11 +353,16 @@ class PBSIE(InfoExtractor):
|
||||
def _extract_webpage(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
description = None
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
description = strip_or_none(self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, default=None))
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
|
||||
webpage, 'upload date', default=None))
|
||||
@@ -370,7 +375,7 @@ class PBSIE(InfoExtractor):
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
return tabbed_videos, presumptive_id, upload_date, description
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
@@ -382,7 +387,7 @@ class PBSIE(InfoExtractor):
|
||||
media_id = self._search_regex(
|
||||
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||
if media_id:
|
||||
return media_id, presumptive_id, upload_date
|
||||
return media_id, presumptive_id, upload_date, description
|
||||
|
||||
# Fronline video embedded via flp
|
||||
video_id = self._search_regex(
|
||||
@@ -399,7 +404,7 @@ class PBSIE(InfoExtractor):
|
||||
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
|
||||
presumptive_id, 'Downloading getdir JSON',
|
||||
transform_source=strip_jsonp)
|
||||
return getdir['mid'], presumptive_id, upload_date
|
||||
return getdir['mid'], presumptive_id, upload_date, description
|
||||
|
||||
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
|
||||
url = self._search_regex(
|
||||
@@ -423,10 +428,10 @@ class PBSIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id, None
|
||||
return video_id, display_id, None, description
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, upload_date = self._extract_webpage(url)
|
||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||
|
||||
if isinstance(video_id, list):
|
||||
entries = [self.url_result(
|
||||
@@ -448,17 +453,6 @@ class PBSIE(InfoExtractor):
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
extract_redirect_urls(video_info)
|
||||
info = video_info
|
||||
except ExtractorError as e:
|
||||
# videoInfo API may not work for some videos
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
@@ -511,15 +505,19 @@ class PBSIE(InfoExtractor):
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
# extract only the formats that we know that they will be available as http format.
|
||||
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
|
||||
bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None)
|
||||
# Lower qualities (150k and 192k) are not available as HTTP formats (see [1]),
|
||||
# we won't try extracting them.
|
||||
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
|
||||
# albeit they are not documented in [2].
|
||||
# 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
|
||||
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or int(bitrate) < 400:
|
||||
continue
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
|
||||
# This may produce invalid links sometimes (e.g.
|
||||
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
@@ -562,11 +560,14 @@ class PBSIE(InfoExtractor):
|
||||
if alt_title:
|
||||
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
|
||||
|
||||
description = info.get('description') or info.get(
|
||||
'program', {}).get('description') or description
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||
'description': description,
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
|
58
youtube_dl/extractor/pokemon.py
Normal file
58
youtube_dl/extractor/pokemon.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PokemonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
|
||||
'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
|
||||
'info_dict': {
|
||||
'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
|
||||
'ext': 'mp4',
|
||||
'title': 'From A to Z!',
|
||||
'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
|
||||
'timestamp': 1460478136,
|
||||
'upload_date': '20160412',
|
||||
},
|
||||
'add_id': ['LimelightMedia']
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id or display_id)
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
|
||||
webpage, 'video data element'))
|
||||
video_id = video_data['data-video-id']
|
||||
title = video_data['data-video-title']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:%s' % video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('data-video-summary'),
|
||||
'thumbnail': video_data.get('data-video-poster'),
|
||||
'series': 'Pokémon',
|
||||
'season_number': int_or_none(video_data.get('data-video-season')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('data-video-episode')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
@@ -3,10 +3,7 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PornotubeIE(InfoExtractor):
|
||||
@@ -31,59 +28,55 @@ class PornotubeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Fetch origin token
|
||||
js_config = self._download_webpage(
|
||||
'http://www.pornotube.com/assets/src/app/config.js', video_id,
|
||||
note='Download JS config')
|
||||
originAuthenticationSpaceKey = self._search_regex(
|
||||
r"constant\('originAuthenticationSpaceKey',\s*'([^']+)'",
|
||||
js_config, 'originAuthenticationSpaceKey')
|
||||
token = self._download_json(
|
||||
'https://api.aebn.net/auth/v2/origins/authenticate',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'http://www.pornotube.com',
|
||||
})['tokenKey']
|
||||
|
||||
# Fetch actual token
|
||||
token_req_data = {
|
||||
'authenticationSpaceKey': originAuthenticationSpaceKey,
|
||||
'credentials': 'Clip Application',
|
||||
}
|
||||
token_req = sanitized_Request(
|
||||
'https://api.aebn.net/auth/v1/token/primal',
|
||||
data=json.dumps(token_req_data).encode('utf-8'))
|
||||
token_req.add_header('Content-Type', 'application/json')
|
||||
token_req.add_header('Origin', 'http://www.pornotube.com')
|
||||
token_answer = self._download_json(
|
||||
token_req, video_id, note='Requesting primal token')
|
||||
token = token_answer['tokenKey']
|
||||
video_url = self._download_json(
|
||||
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id,
|
||||
video_id, note='Downloading delivery information',
|
||||
headers={'Authorization': token})['mediaUrl']
|
||||
|
||||
# Get video URL
|
||||
delivery_req = sanitized_Request(
|
||||
'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
|
||||
delivery_req.add_header('Authorization', token)
|
||||
delivery_info = self._download_json(
|
||||
delivery_req, video_id, note='Downloading delivery information')
|
||||
video_url = delivery_info['mediaUrl']
|
||||
FIELDS = (
|
||||
'title', 'description', 'startSecond', 'endSecond', 'publishDate',
|
||||
'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber'
|
||||
)
|
||||
|
||||
# Get additional info (title etc.)
|
||||
info_req = sanitized_Request(
|
||||
'https://api.aebn.net/content/v1/clips/%s?expand='
|
||||
'title,description,primaryImageNumber,startSecond,endSecond,'
|
||||
'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'
|
||||
'movie.studios,stars.name,studios.name,categories.name,'
|
||||
'clipActive,movieActive,publishDate,orientations' % video_id)
|
||||
info_req.add_header('Authorization', token)
|
||||
info = self._download_json(
|
||||
info_req, video_id, note='Downloading metadata')
|
||||
'https://api.aebn.net/content/v2/clips/%s?fields=%s'
|
||||
% (video_id, ','.join(FIELDS)), video_id,
|
||||
note='Downloading metadata',
|
||||
headers={'Authorization': token})
|
||||
|
||||
if isinstance(info, list):
|
||||
info = info[0]
|
||||
|
||||
title = info['title']
|
||||
|
||||
timestamp = int_or_none(info.get('publishDate'), scale=1000)
|
||||
uploader = info.get('studios', [{}])[0].get('name')
|
||||
movie_id = info['movie']['movieId']
|
||||
thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
|
||||
movie_id, movie_id, info['primaryImageNumber'])
|
||||
categories = [c['name'] for c in info.get('categories')]
|
||||
movie_id = info.get('movieId')
|
||||
primary_image_number = info.get('primaryImageNumber')
|
||||
thumbnail = None
|
||||
if movie_id and primary_image_number:
|
||||
thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
|
||||
movie_id, movie_id, primary_image_number)
|
||||
start = int_or_none(info.get('startSecond'))
|
||||
end = int_or_none(info.get('endSecond'))
|
||||
duration = end - start if start and end else None
|
||||
categories = [c['name'] for c in info.get('categories', []) if c.get('name')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': info['title'],
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -1,55 +1,71 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class RBMARadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
|
||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
'info_dict': {
|
||||
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||
'ext': 'mp3',
|
||||
'uploader_id': 'ford-lopatin',
|
||||
'location': 'Spain',
|
||||
'description': 'Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.',
|
||||
'uploader': 'Ford & Lopatin',
|
||||
'title': 'Live at Primavera Sound 2011',
|
||||
'title': 'Main Stage - Ford & Lopatin',
|
||||
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 2452,
|
||||
'timestamp': 1307103164,
|
||||
'upload_date': '20110603',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_id = mobj.group('show_id')
|
||||
episode_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
|
||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
||||
webpage, 'json data', flags=re.MULTILINE)
|
||||
episode = self._parse_json(
|
||||
self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'json data'),
|
||||
episode_id)['episodes'][show_id][episode_id]
|
||||
|
||||
try:
|
||||
data = json.loads(json_data)
|
||||
except ValueError as e:
|
||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||
title = episode['title']
|
||||
|
||||
video_url = data['akamai_url'] + '&cbr=256'
|
||||
show_title = episode.get('showTitle')
|
||||
if show_title:
|
||||
title = '%s - %s' % (show_title, title)
|
||||
|
||||
formats = [{
|
||||
'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
|
||||
'format_id': compat_str(abr),
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
} for abr in (96, 128, 256)]
|
||||
|
||||
description = clean_html(episode.get('longTeaser'))
|
||||
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
||||
duration = int_or_none(episode.get('duration'))
|
||||
timestamp = unified_timestamp(episode.get('publishedAt'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': data['title'],
|
||||
'description': data.get('teaser_text'),
|
||||
'location': data.get('country_of_origin'),
|
||||
'uploader': data.get('host', {}).get('name'),
|
||||
'uploader_id': data.get('host', {}).get('slug'),
|
||||
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
||||
'duration': data.get('duration'),
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
|
50
youtube_dl/extractor/rozhlas.py
Normal file
50
youtube_dl/extractor/rozhlas.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class RozhlasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://prehravac.rozhlas.cz/audio/3421320',
|
||||
'md5': '504c902dbc9e9a1fd50326eccf02a7e2',
|
||||
'info_dict': {
|
||||
'id': '3421320',
|
||||
'ext': 'mp3',
|
||||
'title': 'Echo Pavla Klusáka (30.06.2015 21:00)',
|
||||
'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
|
||||
'skip_download': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
|
||||
webpage, 'title', default=None) or remove_start(
|
||||
self._og_search_title(webpage), 'Radio Wave - ')
|
||||
description = self._html_search_regex(
|
||||
r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
|
||||
webpage, 'description', fatal=False, group='url')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration=["\'](\d+)', webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'vcodec': 'none',
|
||||
}
|
@@ -14,7 +14,7 @@ class RtlNlIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/\#!/[^/]+/|
|
||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
@@ -67,6 +67,9 @@ class RtlNlIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -113,6 +113,8 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
if '?' not in video_url:
|
||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
subtitles = None
|
||||
|
@@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor):
|
||||
class SafariIE(SafariBaseIE):
|
||||
IE_NAME = 'safari'
|
||||
IE_DESC = 'safaribooksonline.com online video'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||
@@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE):
|
||||
# non-digits in course id
|
||||
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE):
|
||||
|
||||
class SafariApiIE(SafariBaseIE):
|
||||
IE_NAME = 'safari:api'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -4,33 +4,43 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
|
||||
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
|
||||
'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'duration': 49,
|
||||
'id': 'GxfCe0Zo7D-175909-5588'
|
||||
},
|
||||
'playlist_count': 9,
|
||||
# test the first video only to prevent lengthy tests
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '198180',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 5, LAA 4',
|
||||
'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win',
|
||||
'duration': 57.343,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'upload_date': '20160815',
|
||||
'timestamp': 1471221961,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
@@ -39,48 +49,41 @@ class SendtoNewsIE(JWPlatformBaseIE):
|
||||
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
|
||||
\1>''', webpage)
|
||||
if mobj:
|
||||
sk, mk, pk = mobj.group('SC').split('-')
|
||||
return cls._URL_TEMPLATE % (sk, mk, pk)
|
||||
sc = mobj.group('SC')
|
||||
return cls._URL_TEMPLATE % sc
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
params = compat_parse_qs(mobj.group('query'))
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
data_url = update_url_query(
|
||||
url.replace('embedplayer.php', 'data_read.php'),
|
||||
{'cmd': 'loadInitial'})
|
||||
playlist_data = self._download_json(data_url, playlist_id)
|
||||
|
||||
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
|
||||
entries = []
|
||||
for video in playlist_data['playlistData'][0]:
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
video['jwconfiguration'],
|
||||
require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
thumbnails = []
|
||||
if video.get('thumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'normal',
|
||||
'url': video['thumbnailUrl'],
|
||||
})
|
||||
if video.get('smThumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'small',
|
||||
'url': video['smThumbnailUrl'],
|
||||
})
|
||||
info_dict.update({
|
||||
'title': video['S_headLine'],
|
||||
'description': video.get('S_fullStory'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(video.get('SM_length')),
|
||||
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
|
||||
})
|
||||
entries.append(info_dict)
|
||||
|
||||
jwplayer_data_str = self._search_regex(
|
||||
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
|
||||
js_vars = {
|
||||
'w': 1024,
|
||||
'h': 768,
|
||||
'modeVar': 'html5',
|
||||
}
|
||||
for name, val in js_vars.items():
|
||||
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
|
||||
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
self._parse_json(jwplayer_data_str, video_id),
|
||||
video_id, require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
|
||||
'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -6,7 +6,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -37,28 +36,33 @@ class SharedIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
if '>File does not exist<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = self._hidden_inputs(webpage)
|
||||
request = sanitized_Request(
|
||||
url, urlencode_postdata(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video page')
|
||||
urlh.geturl(), video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(download_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': urlh.geturl(),
|
||||
})
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-url="([^"]+)"', video_page, 'video URL')
|
||||
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
video_page, 'video URL', group='url')
|
||||
title = base64.b64decode(self._html_search_meta(
|
||||
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta(
|
||||
'full:size', webpage, 'file size', fatal=False))
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
|
||||
r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
video_page, 'thumbnail', default=None, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -13,20 +13,21 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
||||
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
|
||||
'info_dict': {
|
||||
'id': 'v261036632ab',
|
||||
'ext': 'mp4',
|
||||
@@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor):
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
request = sanitized_Request(
|
||||
'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video = self._download_json(request, video_id, 'Downloading video JSON')
|
||||
video = self._download_json(
|
||||
'http://smotri.com/video/view/url/bot/',
|
||||
video_id, 'Downloading video JSON',
|
||||
data=urlencode_postdata(video_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
|
||||
@@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor):
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
title = video['title']
|
||||
thumbnail = video['_imgURL']
|
||||
upload_date = unified_strdate(video['added'])
|
||||
uploader = video['userNick']
|
||||
uploader_id = video['userLogin']
|
||||
duration = int_or_none(video['duration'])
|
||||
thumbnail = video.get('_imgURL')
|
||||
upload_date = unified_strdate(video.get('added'))
|
||||
uploader = video.get('userNick')
|
||||
uploader_id = video.get('userLogin')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
@@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor):
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', webpage,
|
||||
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
@@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor):
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', webpage) is not None:
|
||||
if 'EroConfirmText">' in webpage:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
||||
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
|
||||
webpage, 'confirm string')
|
||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
|
||||
webpage = self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
|
||||
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor):
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com community videos'
|
||||
IE_NAME = 'smotri:community'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://smotri.com/community/video/kommuna',
|
||||
'info_dict': {
|
||||
'id': 'kommuna',
|
||||
'title': 'КПРФ',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
community_id = mobj.group('communityid')
|
||||
community_id = self._match_id(url)
|
||||
|
||||
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
|
||||
rss = self._download_xml(url, community_id, 'Downloading community RSS')
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
|
||||
community_id, 'Downloading community RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
entries = [
|
||||
self.url_result(video_url.text, SmotriIE.ie_key())
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
community_title = self._html_search_regex(
|
||||
'^Видео сообщества "([^"]+)"$', description_text, 'community title')
|
||||
|
||||
return self.playlist_result(entries, community_id, community_title)
|
||||
return self.playlist_result(entries, community_id)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com user videos'
|
||||
IE_NAME = 'smotri:user'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://smotri.com/user/inspector',
|
||||
'info_dict': {
|
||||
@@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('userid')
|
||||
user_id = self._match_id(url)
|
||||
|
||||
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
|
||||
rss = self._download_xml(url, user_id, 'Downloading user RSS')
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
|
||||
user_id, 'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
user_nickname = self._html_search_regex(
|
||||
'^Видео режиссера (.*)$', description_text,
|
||||
'user nickname')
|
||||
description_text = xpath_text(rss, './channel/description') or ''
|
||||
user_nickname = self._search_regex(
|
||||
'^Видео режиссера (.+)$', description_text,
|
||||
'user nickname', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
||||
|
||||
@@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor):
|
||||
class SmotriBroadcastIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com broadcasts'
|
||||
IE_NAME = 'smotri:broadcast'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
broadcast_id = mobj.group('broadcastid')
|
||||
broadcast_id = mobj.group('id')
|
||||
|
||||
broadcast_url = 'http://' + mobj.group('url')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
@@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
||||
self.raise_login_required(
|
||||
'Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
@@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
|
||||
raise ExtractorError('Unable to log in: bad username or password', expected=True)
|
||||
if '>Неверный логин или пароль<' in broadcast_page:
|
||||
raise ExtractorError(
|
||||
'Unable to log in: bad username or password', expected=True)
|
||||
|
||||
adult_content = True
|
||||
else:
|
||||
@@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||
broadcast_thumbnail = broadcast_json['_imgURL']
|
||||
broadcast_thumbnail = broadcast_json.get('_imgURL')
|
||||
broadcast_title = self._live_title(broadcast_json['title'])
|
||||
broadcast_description = broadcast_json['description']
|
||||
broadcaster_nick = broadcast_json['nick']
|
||||
broadcaster_login = broadcast_json['login']
|
||||
broadcast_description = broadcast_json.get('description')
|
||||
broadcaster_nick = broadcast_json.get('nick')
|
||||
broadcaster_login = broadcast_json.get('login')
|
||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||
except KeyError:
|
||||
if protected_broadcast:
|
||||
|
@@ -14,10 +14,10 @@ from ..utils import ExtractorError
|
||||
class SohuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||
|
||||
# Sohu videos give different MD5 sums on Travis CI and my machine
|
||||
_TESTS = [{
|
||||
'note': 'This video is available only in Mainland China',
|
||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||
'md5': '29175c8cadd8b5cc4055001e85d6b372',
|
||||
'info_dict': {
|
||||
'id': '382479172',
|
||||
'ext': 'mp4',
|
||||
@@ -26,7 +26,6 @@ class SohuIE(InfoExtractor):
|
||||
'skip': 'On available in China',
|
||||
}, {
|
||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||
'info_dict': {
|
||||
'id': '409385080',
|
||||
'ext': 'mp4',
|
||||
@@ -34,7 +33,6 @@ class SohuIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||
'info_dict': {
|
||||
'id': '78693464',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +46,6 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||
'info_dict': {
|
||||
'id': '78910339_part1',
|
||||
'ext': 'mp4',
|
||||
@@ -56,7 +53,6 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||
'info_dict': {
|
||||
'id': '78910339_part2',
|
||||
'ext': 'mp4',
|
||||
@@ -64,7 +60,6 @@ class SohuIE(InfoExtractor):
|
||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||
}
|
||||
}, {
|
||||
'md5': '8407e634175fdac706766481b9443450',
|
||||
'info_dict': {
|
||||
'id': '78910339_part3',
|
||||
'ext': 'mp4',
|
||||
|
34
youtube_dl/extractor/sonyliv.py
Normal file
34
youtube_dl/extractor/sonyliv.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': '5024612095001',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20160707',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '4338955589001',
|
||||
'timestamp': 1467870968,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
|
||||
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [m.group('url') for m in re.finditer(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen('%s: Resolving id' % video_id)
|
||||
|
@@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'South Park|Bat Daded',
|
||||
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
'timestamp': 1112760000,
|
||||
'upload_date': '20050406',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
|
||||
'info_dict': {
|
||||
'title': 'Cartman Consigue Una Sonda Anal',
|
||||
'description': 'Cartman Consigue Una Sonda Anal',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
@@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'info_dict': {
|
||||
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government Won\'t Respect My Privacy',
|
||||
'title': 'South Park|The Government Won\'t Respect My Privacy',
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
'timestamp': 1380160800,
|
||||
'upload_date': '20130926',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII characters in initial URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Hashtag „Aufwärmen“',
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# non-ASCII characters in redirect URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Hashtag „Aufwärmen“',
|
||||
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
|
||||
@@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Freemium Isn\'t Free',
|
||||
'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
|
||||
@@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'Grounded Vindaloop',
|
||||
'description': 'Butters is convinced he\'s living in a virtual reality.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
@@ -11,8 +11,10 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||
'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
|
||||
'timestamp': 1388120400,
|
||||
'upload_date': '20131227',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/',
|
||||
|
@@ -12,25 +12,29 @@ from ..utils import (
|
||||
|
||||
|
||||
class SunPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sunporno.com/videos/807778/',
|
||||
'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
|
||||
'md5': '507887e29033502f29dba69affeebfc9',
|
||||
'info_dict': {
|
||||
'id': '807778',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0a400058e8105d39e35c35e7c5184164',
|
||||
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 302,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embeds.sunporno.com/embed/807778',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.sunporno.com/videos/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
@@ -40,7 +44,8 @@ class SunPornoIE(InfoExtractor):
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'itemprop="duration">\s*(\d+:\d+)\s*<',
|
||||
(r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
|
||||
r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
@@ -48,7 +53,7 @@ class SunPornoIE(InfoExtractor):
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+)</b> Comments?',
|
||||
webpage, 'comment count', fatal=False))
|
||||
webpage, 'comment count', fatal=False, default=None))
|
||||
|
||||
formats = []
|
||||
quality = qualities(['mp4', 'flv'])
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user