Compare commits
310 Commits
2017.05.09
...
2017.07.23
Author | SHA1 | Date | |
---|---|---|---|
|
0db492c02a | ||
|
425f41319a | ||
|
71dde5eecf | ||
|
935d6c20c0 | ||
|
e0f1fb0a27 | ||
|
0017d9ad6d | ||
|
327c8364f1 | ||
|
359aa2fdd1 | ||
|
f76c02c87b | ||
|
7d9a1db111 | ||
|
0396806f67 | ||
|
dc6520aa3d | ||
|
c653326a14 | ||
|
3fcf346ac1 | ||
|
fa63cf6c23 | ||
|
85f5a74b6c | ||
|
d20b1c6725 | ||
|
bb176df3bb | ||
|
83d00044c1 | ||
|
7abed4e06c | ||
|
13eb526f11 | ||
|
00d06e3cfc | ||
|
749ca5eced | ||
|
3f59b0154a | ||
|
089b97cfee | ||
|
decf86044d | ||
|
94b817edeb | ||
|
cea931a9e5 | ||
|
ef78563e9c | ||
|
961ea474b6 | ||
|
ea3f20494f | ||
|
c7604d79e9 | ||
|
4e826cd9ae | ||
|
2583c0b54e | ||
|
7d02dcfaa2 | ||
|
00dbdfc1f7 | ||
|
f354d84807 | ||
|
15da37c7dc | ||
|
9a0942ad55 | ||
|
f2bb33a986 | ||
|
3615bfe1b4 | ||
|
e8f20ffa03 | ||
|
9be31e771c | ||
|
7f176ac477 | ||
|
2edfd745df | ||
|
708f6f511e | ||
|
bb13949197 | ||
|
c3c94ca4a4 | ||
|
e3cd1fcdd1 | ||
|
b71c18b434 | ||
|
7bf539edcc | ||
|
65c416dda8 | ||
|
207acd8465 | ||
|
71a1db8919 | ||
|
6e925598d6 | ||
|
73cf76a93f | ||
|
256a746d21 | ||
|
58179eb7d9 | ||
|
485cb37576 | ||
|
ed84454d35 | ||
|
a02682fd13 | ||
|
0d2f0b0357 | ||
|
c319d1c483 | ||
|
d2b9f362fa | ||
|
4328ddf82b | ||
|
250b042c7e | ||
|
665e945246 | ||
|
5af2fd7fa0 | ||
|
15237fcd51 | ||
|
7a57730907 | ||
|
8b347a389e | ||
|
a49804816c | ||
|
eadd313321 | ||
|
d852c6bc59 | ||
|
00e5c36315 | ||
|
8a04ade86b | ||
|
ab328411d5 | ||
|
ddeff4be3f | ||
|
60d4401c5e | ||
|
dee2ff1d81 | ||
|
6554708252 | ||
|
0a2e1b2e30 | ||
|
babbc04d45 | ||
|
609ff8ca19 | ||
|
b6c9fe4162 | ||
|
4d9ba27bba | ||
|
50ae3f646e | ||
|
99a7e76240 | ||
|
a3a6d01a96 | ||
|
02d61a65e2 | ||
|
9b35297be1 | ||
|
4917478803 | ||
|
54faac2235 | ||
|
c69701c6ab | ||
|
d4f8ce6e91 | ||
|
b311b0ead2 | ||
|
72d256c434 | ||
|
b2ed954fc6 | ||
|
a919ca0ad6 | ||
|
88d6b7c2bd | ||
|
fd1c5fba6b | ||
|
0646e34c7d | ||
|
bf2dc9cc6e | ||
|
f1c051009b | ||
|
33ffb645a6 | ||
|
35544690e4 | ||
|
136503e302 | ||
|
4a87de72df | ||
|
a7ce8f16c4 | ||
|
a5aea53fc8 | ||
|
0c7a631b61 | ||
|
fd9ee4de8c | ||
|
5744cf6c03 | ||
|
9c48b5a193 | ||
|
449c665776 | ||
|
23aec3d623 | ||
|
27449ad894 | ||
|
bd65f18153 | ||
|
73af5cc817 | ||
|
b5f523ed62 | ||
|
4f4dd8d797 | ||
|
4cb18ab1b9 | ||
|
ac7409eec5 | ||
|
170719414d | ||
|
38dad4737f | ||
|
ddbb4c5c3e | ||
|
fa3ea7223a | ||
|
0f4a5a73e7 | ||
|
18166bb8e8 | ||
|
d4893e764b | ||
|
97b6e30113 | ||
|
9be9ec5980 | ||
|
048b55804d | ||
|
6ce79d7ac0 | ||
|
1641ca402d | ||
|
85cbcede5b | ||
|
a1de83e5f0 | ||
|
fee00b3884 | ||
|
2d2132ac6e | ||
|
cc2ffe5afe | ||
|
560050669b | ||
|
eaa006d1bd | ||
|
a6f29820c6 | ||
|
1433734c35 | ||
|
aefce8e6dc | ||
|
8b6ac49ecc | ||
|
b08e235f09 | ||
|
be80986ed9 | ||
|
473e87064b | ||
|
4f90d2aeac | ||
|
b230fefc3c | ||
|
96a2daa1ee | ||
|
0ea6efbb7a | ||
|
6a9cb29509 | ||
|
ca27037171 | ||
|
0bf4b71b75 | ||
|
5215f45327 | ||
|
0a268c6e11 | ||
|
7dd5415cd0 | ||
|
b5dc33daa9 | ||
|
97fa1f8dc4 | ||
|
b081f53b08 | ||
|
cb1e6d8985 | ||
|
9932ac5c58 | ||
|
bf87c36c93 | ||
|
b4a3d461e4 | ||
|
72b409559c | ||
|
534863e057 | ||
|
16bc958287 | ||
|
624bd0104c | ||
|
28a4d6cce8 | ||
|
2ae2ffda5e | ||
|
70e7967202 | ||
|
6e999fbc12 | ||
|
7409af9eb3 | ||
|
4e3637034c | ||
|
1afd0b0da7 | ||
|
7515830422 | ||
|
f5521ea209 | ||
|
34646967ba | ||
|
e4d2e76d8e | ||
|
87f5646937 | ||
|
cc69a3de1b | ||
|
15aeeb1188 | ||
|
1693bebe4d | ||
|
4244a13a1d | ||
|
931adf8cc1 | ||
|
c996943418 | ||
|
76e6378358 | ||
|
a355b57f58 | ||
|
1508da30c2 | ||
|
eb703e5380 | ||
|
0a3924e746 | ||
|
e1db730d86 | ||
|
537191826f | ||
|
130880ba48 | ||
|
f8ba3fda4d | ||
|
e1b90cc3db | ||
|
43e6579558 | ||
|
6d923aab35 | ||
|
62bafabc09 | ||
|
9edcdac90c | ||
|
cd138d8bd4 | ||
|
cd750b731c | ||
|
4bede0d8f5 | ||
|
f129c3f349 | ||
|
39d4c1be4d | ||
|
f7a747ce59 | ||
|
4489d41816 | ||
|
87b5184a0d | ||
|
c56ad5c975 | ||
|
6b7ce85cdc | ||
|
d10d0e3cf8 | ||
|
941ea38ef5 | ||
|
99bea8d298 | ||
|
a49eccdfa7 | ||
|
a846173d93 | ||
|
78e210dea5 | ||
|
8555204274 | ||
|
164fcbfeb7 | ||
|
bc22df29c4 | ||
|
7e688d2f6a | ||
|
5a6d1da442 | ||
|
703751add4 | ||
|
4050be78e5 | ||
|
4d9fc40100 | ||
|
765522345f | ||
|
6bceb36b99 | ||
|
1e0d65f0bd | ||
|
03327bc9a6 | ||
|
b407d8533d | ||
|
20e2c9de04 | ||
|
d16c0121b9 | ||
|
7f4c3a7439 | ||
|
28dbde9cc3 | ||
|
cc304ce588 | ||
|
98a0618941 | ||
|
fd545fc6d1 | ||
|
97067db2ae | ||
|
c130f0a37b | ||
|
d3d4ba7f24 | ||
|
5552c9eb0f | ||
|
59ed87cbd9 | ||
|
b7f8749304 | ||
|
5192ee17e7 | ||
|
e834f04400 | ||
|
884d09f330 | ||
|
9e35298f97 | ||
|
0551f1b07b | ||
|
de53511201 | ||
|
2570e85167 | ||
|
9dc5ab041f | ||
|
01f3c8e290 | ||
|
06c1b3ce07 | ||
|
0b75e42dfb | ||
|
a609e61a90 | ||
|
afdb387cd8 | ||
|
dc4e4f90a2 | ||
|
fdc20f87a6 | ||
|
35a2d221a3 | ||
|
daa4e9ff90 | ||
|
2ca29f1aaf | ||
|
77d682da9d | ||
|
8fffac6927 | ||
|
5f6fbcea08 | ||
|
00cb0faca8 | ||
|
bfdf6fcc66 | ||
|
bcaa1dd060 | ||
|
0e2d626ddd | ||
|
9221d5d7a8 | ||
|
9d63e57d1f | ||
|
3bc1eea0d8 | ||
|
7769f83701 | ||
|
650bd94716 | ||
|
36b226d48f | ||
|
f2e2f0c777 | ||
|
6f76679804 | ||
|
7073015a23 | ||
|
89fd03079b | ||
|
1c45b7a8a9 | ||
|
60f5c9fb19 | ||
|
c360e641e9 | ||
|
6f3c632c24 | ||
|
09b866e171 | ||
|
166d12b00c | ||
|
2b8e6a68f8 | ||
|
d105a7edc6 | ||
|
5d29af3d15 | ||
|
ca04de463d | ||
|
946826eec7 | ||
|
76d5a36391 | ||
|
56f9c77f0e | ||
|
0de136341a | ||
|
1339ecb2f8 | ||
|
efe9316703 | ||
|
851a01aed6 | ||
|
b845766597 | ||
|
fa26734e07 | ||
|
12f01118b0 | ||
|
7fc60f4ee9 | ||
|
58bb440283 | ||
|
7ad4362357 | ||
|
6c52477f59 | ||
|
116283ff64 | ||
|
7274f3d0e9 | ||
|
3166b1f0ac | ||
|
39ee263819 | ||
|
96820c1c6b | ||
|
e095109da1 | ||
|
d68afc5bc9 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.09**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.23**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.05.09
|
[debug] youtube-dl version 2017.07.23
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
11
AUTHORS
11
AUTHORS
@@ -212,3 +212,14 @@ Xiao Di Guan
|
|||||||
Thomas Winant
|
Thomas Winant
|
||||||
Daniel Twardowski
|
Daniel Twardowski
|
||||||
Jeremie Jarosh
|
Jeremie Jarosh
|
||||||
|
Gerard Rovira
|
||||||
|
Marvin Ewald
|
||||||
|
Frédéric Bournival
|
||||||
|
Timendum
|
||||||
|
gritstub
|
||||||
|
Adam Voss
|
||||||
|
Mike Fährmann
|
||||||
|
Jan Kundrát
|
||||||
|
Giuseppe Fabiano
|
||||||
|
Örn Guðjónsson
|
||||||
|
Parmjit Virk
|
||||||
|
310
ChangeLog
310
ChangeLog
@@ -1,3 +1,313 @@
|
|||||||
|
version 2017.07.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Improve default format specification (#13704)
|
||||||
|
* [YoutubeDL] Do not override id, extractor and extractor_key for
|
||||||
|
url_transparent entities
|
||||||
|
* [extractor/common] Fix playlist_from_matches
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [itv] Fix production id extraction (#13671, #13703)
|
||||||
|
* [vidio] Make duration non fatal and fix typo
|
||||||
|
* [mtv] Skip missing video parts (#13690)
|
||||||
|
* [sportbox:embed] Fix extraction
|
||||||
|
+ [npo] Add support for npo3.nl URLs (#13695)
|
||||||
|
* [dramafever] Remove video id from title (#13699)
|
||||||
|
+ [egghead:lesson] Add support for lessons (#6635)
|
||||||
|
* [funnyordie] Extract more metadata (#13677)
|
||||||
|
* [youku:show] Fix playlist extraction (#13248)
|
||||||
|
+ [dispeak] Recognize sevt subdomain (#13276)
|
||||||
|
* [adn] Improve error reporting (#13663)
|
||||||
|
* [crunchyroll] Relax series and season regex (#13659)
|
||||||
|
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
|
||||||
|
+ [nexx:embed] Add support for iframe embeds
|
||||||
|
* [nexx] Improve JS embed extraction
|
||||||
|
+ [pearvideo] Add support for pearvideo.com (#13031)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.07.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [spiegeltv] Delegate extraction to nexx extractor (#13159)
|
||||||
|
+ [nexx] Add support for nexx.cloud (#10807, #13465)
|
||||||
|
* [generic] Fix rutube embeds extraction (#13641)
|
||||||
|
* [karrierevideos] Fix title extraction (#13641)
|
||||||
|
* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
|
||||||
|
* [slideshare] Fix extraction (#13617)
|
||||||
|
+ [5tv] Add another video URL pattern (#13354, #13606)
|
||||||
|
* [drtv] Make HLS and HDS extraction non fatal
|
||||||
|
* [ted] Fix subtitles extraction (#13628, #13629)
|
||||||
|
* [vine] Make sure the title won't be empty
|
||||||
|
+ [twitter] Support HLS streams in vmap URLs
|
||||||
|
+ [periscope] Support pscp.tv URLs in embedded frames
|
||||||
|
* [twitter] Extract mp4 urls via mobile API (#12726)
|
||||||
|
* [niconico] Fix authentication error handling (#12486)
|
||||||
|
* [giantbomb] Extract m3u8 formats (#13626)
|
||||||
|
+ [vlive:playlist] Add support for playlists (#13613)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.07.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
|
||||||
|
+ [utils] Support attributes with no values in get_elements_by_attribute
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [dailymail] Add support for embeds
|
||||||
|
+ [joj] Add support for joj.sk (#13268)
|
||||||
|
* [abc.net.au:iview] Extract more formats (#13492, #13489)
|
||||||
|
* [egghead:course] Fix extraction (#6635, #13370)
|
||||||
|
+ [cjsw] Add support for cjsw.com (#13525)
|
||||||
|
+ [eagleplatform] Add support for referrer protected videos (#13557)
|
||||||
|
+ [eagleplatform] Add support for another embed pattern (#13557)
|
||||||
|
* [veoh] Extend URL regular expression (#13601)
|
||||||
|
* [npo:live] Fix live stream id extraction (#13568, #13605)
|
||||||
|
* [googledrive] Fix height extraction (#13603)
|
||||||
|
+ [dailymotion] Add support for new layout (#13580)
|
||||||
|
- [yam] Remove extractor
|
||||||
|
* [xhamster] Extract all formats and fix duration extraction (#13593)
|
||||||
|
+ [xhamster] Add support for new URL schema (#13593)
|
||||||
|
* [espn] Extend URL regular expression (#13244, #13549)
|
||||||
|
* [kaltura] Fix typo in subtitles extraction (#13569)
|
||||||
|
* [vier] Adapt extraction to redesign (#13575)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.07.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve _json_ld
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [thisoldhouse] Add more fallbacks for video id
|
||||||
|
* [thisoldhouse] Fix video id extraction (#13540, #13541)
|
||||||
|
* [xfileshare] Extend format regular expression (#13536)
|
||||||
|
* [ted] Fix extraction (#13535)
|
||||||
|
+ [tastytrade] Add support for tastytrade.com (#13521)
|
||||||
|
* [dplayit] Relax video id regular expression (#13524)
|
||||||
|
+ [generic] Extract more generic metadata (#13527)
|
||||||
|
+ [bbccouk] Capture and output error message (#13501, #13518)
|
||||||
|
* [cbsnews] Relax video info regular expression (#13284, #13503)
|
||||||
|
+ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
|
||||||
|
* [soundcloud] Switch to https for API requests (#13502)
|
||||||
|
* [pandatv] Switch to https for API and download URLs
|
||||||
|
+ [pandatv] Add support for https URLs (#13491)
|
||||||
|
+ [niconico] Support sp subdomain (#13494)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.25
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
|
||||||
|
* [YoutubeDL] Skip malformed formats for better extraction robustness
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [wsj] Add support for barrons.com (#13470)
|
||||||
|
+ [ign] Add another video id pattern (#13328)
|
||||||
|
+ [raiplay:live] Add support for live streams (#13414)
|
||||||
|
+ [redbulltv] Add support for live videos and segments (#13486)
|
||||||
|
+ [onetpl] Add support for videos embedded via pulsembed (#13482)
|
||||||
|
* [ooyala] Make more robust
|
||||||
|
* [ooyala] Skip empty format URLs (#13471, #13476)
|
||||||
|
* [hgtv.com:show] Fix typo
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [adobepass] Fix extraction on older python 2.6
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Adapt to new automatic captions rendition (#13467)
|
||||||
|
* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
|
||||||
|
* [drtuber] Fix formats extraction (#12058)
|
||||||
|
* [youporn] Fix upload date extraction
|
||||||
|
* [youporn] Improve formats extraction
|
||||||
|
* [youporn] Fix title extraction (#13456)
|
||||||
|
* [googledrive] Fix formats sorting (#13443)
|
||||||
|
* [watchindianporn] Fix extraction (#13411, #13415)
|
||||||
|
+ [vimeo] Add fallback mp4 extension for original format
|
||||||
|
+ [ruv] Add support for ruv.is (#13396)
|
||||||
|
* [viu] Fix extraction on older python 2.6
|
||||||
|
* [pandora.tv] Fix upload_date extraction (#12846)
|
||||||
|
+ [asiancrush] Add support for asiancrush.com (#13420)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/common] Use utils.shell_quote for debug command line
|
||||||
|
* [utils] Use compat_shlex_quote in shell_quote
|
||||||
|
* [postprocessor/execafterdownload] Encode command line (#13407)
|
||||||
|
* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
|
||||||
|
* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
|
||||||
|
in --metadata-from-title (#13408)
|
||||||
|
* [extractor/common] Fix json dumping with --geo-bypass
|
||||||
|
+ [extractor/common] Improve jwplayer subtitles extraction
|
||||||
|
+ [extractor/common] Improve jwplayer formats extraction (#13379)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [polskieradio] Fix extraction (#13392)
|
||||||
|
+ [xfileshare] Add support for fastvideo.me (#13385)
|
||||||
|
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
|
||||||
|
* [4tube] Fix extraction (#13381, #13382)
|
||||||
|
+ [disney] Add support for disneychannel.de (#13383)
|
||||||
|
* [npo] Improve URL regular expression (#13376)
|
||||||
|
+ [corus] Add support for showcase.ca
|
||||||
|
+ [corus] Add support for history.ca (#13359)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
|
||||||
|
+ [compat] Introduce compat_HTMLParseError
|
||||||
|
* [utils] Improve unified_timestamp
|
||||||
|
* [extractor/generic] Ensure format id is unicode string
|
||||||
|
* [extractor/common] Return unicode string from _match_id
|
||||||
|
+ [YoutubeDL] Sanitize more fields (#13313)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [xfileshare] Add support for rapidvideo.tv (#13348)
|
||||||
|
* [xfileshare] Modernize and pass Referer
|
||||||
|
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
|
||||||
|
+ [newgrounds] Extract more metadata (#13232)
|
||||||
|
+ [newgrounds:playlist] Add support for playlists (#10611)
|
||||||
|
* [newgrounds] Improve formats and uploader extraction (#13346)
|
||||||
|
* [msn] Fix formats extraction
|
||||||
|
* [turbo] Ensure format id is string
|
||||||
|
* [sexu] Ensure height is int
|
||||||
|
* [jove] Ensure comment count is int
|
||||||
|
* [golem] Ensure format id is string
|
||||||
|
* [gfycat] Ensure filesize is int
|
||||||
|
* [foxgay] Ensure height is int
|
||||||
|
* [flickr] Ensure format id is string
|
||||||
|
* [sohu] Fix numeric fields
|
||||||
|
* [safari] Improve authentication detection (#13319)
|
||||||
|
* [liveleak] Ensure height is int (#13313)
|
||||||
|
* [streamango] Make title optional (#13292)
|
||||||
|
* [rtlnl] Improve URL regular expression (#13295)
|
||||||
|
* [tvplayer] Fix extraction (#13291)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.06.05
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
|
||||||
|
* [pornhub:playlist] Fix extraction (#13281)
|
||||||
|
- [godtv] Remove extractor (#13175)
|
||||||
|
* [safari] Fix typo (#13252)
|
||||||
|
* [youtube] Improve chapters extraction (#13247)
|
||||||
|
* [1tv] Lower preference for HTTP formats (#13246)
|
||||||
|
* [francetv] Relax URL regular expression
|
||||||
|
* [drbonanza] Fix extraction (#13231)
|
||||||
|
* [packtpub] Fix authentication (#13240)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.29
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
|
||||||
|
(#13211)
|
||||||
|
* [xhamster] Fix uploader and like/dislike count extraction (#13216))
|
||||||
|
+ [xhamster] Extract categories (#11728)
|
||||||
|
+ [abcnews] Add support for embed URLs (#12851)
|
||||||
|
* [gaskrank] Fix extraction (#12493)
|
||||||
|
* [medialaan] Fix videos with missing videoUrl (#12774)
|
||||||
|
* [dvtv] Fix playlist support
|
||||||
|
+ [dvtv] Add support for DASH and HLS formats (#3063)
|
||||||
|
+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
|
||||||
|
* [cbsinteractive] Relax URL regular expression (#13213)
|
||||||
|
* [adn] Fix formats extraction
|
||||||
|
+ [youku] Extract more metadata (#10433)
|
||||||
|
* [cbsnews] Fix extraction (#13205)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.26
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] strip_jsonp() can recognize more patterns
|
||||||
|
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
|
||||||
|
+ [bbc] Add support for authentication
|
||||||
|
* [tudou] Merge into youku extractor (#12214)
|
||||||
|
* [youku:show] Fix extraction
|
||||||
|
* [youku] Fix extraction (#13191)
|
||||||
|
* [udemy] Fix extraction for outputs' format entries without URL (#13192)
|
||||||
|
* [vimeo] Fix formats' sorting (#13189)
|
||||||
|
* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
|
||||||
|
+ [adobepass] Add support for Bright House Networks (#13149)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [streamcz] Add support for subtitles (#13174)
|
||||||
|
* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
|
||||||
|
* [toggle] Relax URL regular expression (#13172)
|
||||||
|
* [toypics] Fix extraction (#13077)
|
||||||
|
* [njpwworld] Fix extraction (#13162, #13169)
|
||||||
|
+ [hitbox] Add support for smashcast.tv (#13154)
|
||||||
|
* [mitele] Update app key regular expression (#13158)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.18.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [jsinterp] Fix typo and cleanup regular expressions (#13134)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
|
||||||
|
#13126, #13128, #13129, #13130, #13131, #13132)
|
||||||
|
+ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
|
||||||
|
(#13088, #13092)
|
||||||
|
+ [utils] Recognize more audio codecs (#13081)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [vier] Extract more metadata (#12539)
|
||||||
|
* [vier] Improve extraction (#12801)
|
||||||
|
+ Add support for authentication
|
||||||
|
* Bypass authentication when no credentials provided
|
||||||
|
* Improve extraction robustness
|
||||||
|
* [dailymail] Fix sources extraction (#13057)
|
||||||
|
* [dailymotion] Extend URL regular expression (#13079)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.05.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Respect Width and Height attributes in ISM manifests
|
||||||
|
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
|
||||||
|
--metadata-from-title (#13065)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
|
||||||
|
* [orf:radio] Fix extraction (#11643, #12926)
|
||||||
|
* [aljazeera] Extend URL regular expression (#13053)
|
||||||
|
* [imdb] Relax URL regular expression (#13056)
|
||||||
|
+ [francetv] Add support for mobile.france.tv (#13068)
|
||||||
|
+ [upskill] Add support for upskillcourses.com (#13043)
|
||||||
|
* [thescene] Fix extraction (#13061)
|
||||||
|
* [condenast] Improve embed support
|
||||||
|
* [liveleak] Fix extraction (#12053)
|
||||||
|
+ [douyu] Support Douyu shows (#12228)
|
||||||
|
* [myspace] Improve URL regular expression (#13040)
|
||||||
|
* [adultswim] Use desktop platform in assets URL (#13041)
|
||||||
|
|
||||||
|
|
||||||
version 2017.05.09
|
version 2017.05.09
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
2
Makefile
2
Makefile
@@ -101,7 +101,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
--exclude '*.pyc' \
|
--exclude '*.pyc' \
|
||||||
--exclude '*.pyo' \
|
--exclude '*.pyo' \
|
||||||
--exclude '*~' \
|
--exclude '*~' \
|
||||||
--exclude '__pycache' \
|
--exclude '__pycache__' \
|
||||||
--exclude '.git' \
|
--exclude '.git' \
|
||||||
--exclude 'testdata' \
|
--exclude 'testdata' \
|
||||||
--exclude 'docs/_build' \
|
--exclude 'docs/_build' \
|
||||||
|
53
README.md
53
README.md
@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
--match-filter FILTER Generic video filter. Specify any key (see
|
--match-filter FILTER Generic video filter. Specify any key (see
|
||||||
help for -o for a list of available keys)
|
the "OUTPUT TEMPLATE" for a list of
|
||||||
to match if the key is present, !key to
|
available keys) to match if the key is
|
||||||
check if the key is not present, key >
|
present, !key to check if the key is not
|
||||||
NUMBER (like "comment_count > 12", also
|
present, key > NUMBER (like "comment_count
|
||||||
works with >=, <, <=, !=, =) to compare
|
> 12", also works with >=, <, <=, !=, =) to
|
||||||
against a number, key = 'LITERAL' (like
|
compare against a number, key = 'LITERAL'
|
||||||
"uploader = 'Mike Smith'", also works with
|
(like "uploader = 'Mike Smith'", also works
|
||||||
!=) to match against a string literal and &
|
with !=) to match against a string literal
|
||||||
to require multiple matches. Values which
|
and & to require multiple matches. Values
|
||||||
are not known are excluded unless you put a
|
which are not known are excluded unless you
|
||||||
question mark (?) after the operator. For
|
put a question mark (?) after the operator.
|
||||||
example, to only match videos that have
|
For example, to only match videos that have
|
||||||
been liked more than 100 times and disliked
|
been liked more than 100 times and disliked
|
||||||
less than 50 times (or the dislike
|
less than 50 times (or the dislike
|
||||||
functionality is not available at the given
|
functionality is not available at the given
|
||||||
@@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--get-filename Simulate, quiet but print output filename
|
--get-filename Simulate, quiet but print output filename
|
||||||
--get-format Simulate, quiet but print output format
|
--get-format Simulate, quiet but print output format
|
||||||
-j, --dump-json Simulate, quiet but print JSON information.
|
-j, --dump-json Simulate, quiet but print JSON information.
|
||||||
See --output for a description of available
|
See the "OUTPUT TEMPLATE" for a description
|
||||||
keys.
|
of available keys.
|
||||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||||
for each command-line argument. If the URL
|
for each command-line argument. If the URL
|
||||||
refers to a playlist, dump the whole
|
refers to a playlist, dump the whole
|
||||||
@@ -400,12 +400,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--add-metadata Write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--metadata-from-title FORMAT Parse additional metadata like song title /
|
--metadata-from-title FORMAT Parse additional metadata like song title /
|
||||||
artist from the video title. The format
|
artist from the video title. The format
|
||||||
syntax is the same as --output, the parsed
|
syntax is the same as --output. Regular
|
||||||
parameters replace existing values.
|
expression with named capture groups may
|
||||||
Additional templates: %(album)s,
|
also be used. The parsed parameters replace
|
||||||
%(artist)s. Example: --metadata-from-title
|
existing values. Example: --metadata-from-
|
||||||
"%(artist)s - %(title)s" matches a title
|
title "%(artist)s - %(title)s" matches a
|
||||||
like "Coldplay - Paradise"
|
title like "Coldplay - Paradise". Example
|
||||||
|
(regex): --metadata-from-title
|
||||||
|
"(?P<artist>.+?) - (?P<title>.+)"
|
||||||
--xattrs Write metadata to the video file's xattrs
|
--xattrs Write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
--fixup POLICY Automatically correct known faults of the
|
--fixup POLICY Automatically correct known faults of the
|
||||||
@@ -472,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
|
|||||||
```
|
```
|
||||||
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
||||||
|
|
||||||
On Windows you may also need to setup the `%HOME%` environment variable manually.
|
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
|
||||||
|
```
|
||||||
|
set HOME=%USERPROFILE%
|
||||||
|
```
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
@@ -530,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `playlist_id` (string): Playlist identifier
|
- `playlist_id` (string): Playlist identifier
|
||||||
- `playlist_title` (string): Playlist title
|
- `playlist_title` (string): Playlist title
|
||||||
|
|
||||||
|
|
||||||
Available for the video that belongs to some logical chapter or section:
|
Available for the video that belongs to some logical chapter or section:
|
||||||
|
|
||||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||||
|
|
||||||
Available for the video that is an episode of some series or programme:
|
Available for the video that is an episode of some series or programme:
|
||||||
|
|
||||||
- `series` (string): Title of the series or programme the video episode belongs to
|
- `series` (string): Title of the series or programme the video episode belongs to
|
||||||
- `season` (string): Title of the season the video episode belongs to
|
- `season` (string): Title of the season the video episode belongs to
|
||||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||||
@@ -546,6 +552,7 @@ Available for the video that is an episode of some series or programme:
|
|||||||
- `episode_id` (string): Id of the video episode
|
- `episode_id` (string): Id of the video episode
|
||||||
|
|
||||||
Available for the media that is a track or a part of a music album:
|
Available for the media that is a track or a part of a music album:
|
||||||
|
|
||||||
- `track` (string): Title of the track
|
- `track` (string): Title of the track
|
||||||
- `track_number` (numeric): Number of the track within an album or a disc
|
- `track_number` (numeric): Number of the track within an album or a disc
|
||||||
- `track_id` (string): Id of the track
|
- `track_id` (string): Id of the track
|
||||||
@@ -647,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
|||||||
- `acodec`: Name of the audio codec in use
|
- `acodec`: Name of the audio codec in use
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec`: Name of the video codec in use
|
||||||
- `container`: Name of the container format
|
- `container`: Name of the container format
|
||||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||||
- `format_id`: A short description of the format
|
- `format_id`: A short description of the format
|
||||||
|
|
||||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||||
|
@@ -8,7 +8,7 @@ import re
|
|||||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||||
|
|
||||||
PREFIX = '''%YOUTUBE-DL(1)
|
PREFIX = r'''%YOUTUBE-DL(1)
|
||||||
|
|
||||||
# NAME
|
# NAME
|
||||||
|
|
||||||
|
@@ -42,7 +42,7 @@
|
|||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
- **AMCNetworks**
|
- **AMCNetworks**
|
||||||
- **anderetijden**: npo.nl and ntr.nl
|
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **AnimeOnDemand**
|
- **AnimeOnDemand**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **Anvato**
|
- **Anvato**
|
||||||
@@ -67,6 +67,8 @@
|
|||||||
- **arte.tv:info**
|
- **arte.tv:info**
|
||||||
- **arte.tv:magazine**
|
- **arte.tv:magazine**
|
||||||
- **arte.tv:playlist**
|
- **arte.tv:playlist**
|
||||||
|
- **AsianCrush**
|
||||||
|
- **AsianCrushPlaylist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
- **ATTTechChannel**
|
- **ATTTechChannel**
|
||||||
- **ATVAt**
|
- **ATVAt**
|
||||||
@@ -87,13 +89,13 @@
|
|||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
|
- **Bandcamp:weekly**
|
||||||
- **bangumi.bilibili.com**: BiliBili番剧
|
- **bangumi.bilibili.com**: BiliBili番剧
|
||||||
- **bbc**: BBC
|
- **bbc**: BBC
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
- **bbc.co.uk:article**: BBC articles
|
- **bbc.co.uk:article**: BBC articles
|
||||||
- **bbc.co.uk:iplayer:playlist**
|
- **bbc.co.uk:iplayer:playlist**
|
||||||
- **bbc.co.uk:playlist**
|
- **bbc.co.uk:playlist**
|
||||||
- **Beam:live**
|
|
||||||
- **Beatport**
|
- **Beatport**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
@@ -152,6 +154,7 @@
|
|||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
|
- **CJSW**
|
||||||
- **Clipfish**
|
- **Clipfish**
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
@@ -216,6 +219,7 @@
|
|||||||
- **DiscoveryVR**
|
- **DiscoveryVR**
|
||||||
- **Disney**
|
- **Disney**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DouyuShow**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
- **DPlayIt**
|
- **DPlayIt**
|
||||||
@@ -234,6 +238,7 @@
|
|||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
- **EchoMsk**
|
- **EchoMsk**
|
||||||
- **egghead:course**: egghead.io course
|
- **egghead:course**: egghead.io course
|
||||||
|
- **egghead:lesson**: egghead.io lesson
|
||||||
- **eHow**
|
- **eHow**
|
||||||
- **Einthusan**
|
- **Einthusan**
|
||||||
- **eitb.tv**
|
- **eitb.tv**
|
||||||
@@ -310,7 +315,6 @@
|
|||||||
- **Go**
|
- **Go**
|
||||||
- **Go90**
|
- **Go90**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
- **GodTV**
|
|
||||||
- **Golem**
|
- **Golem**
|
||||||
- **GoogleDrive**
|
- **GoogleDrive**
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
@@ -367,6 +371,7 @@
|
|||||||
- **Jamendo**
|
- **Jamendo**
|
||||||
- **JamendoAlbum**
|
- **JamendoAlbum**
|
||||||
- **JeuxVideo**
|
- **JeuxVideo**
|
||||||
|
- **Joj**
|
||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
@@ -433,6 +438,7 @@
|
|||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **Medialaan**
|
- **Medialaan**
|
||||||
|
- **Mediaset**
|
||||||
- **Medici**
|
- **Medici**
|
||||||
- **Meipai**: 美拍
|
- **Meipai**: 美拍
|
||||||
- **MelonVOD**
|
- **MelonVOD**
|
||||||
@@ -451,6 +457,8 @@
|
|||||||
- **mixcloud:playlist**
|
- **mixcloud:playlist**
|
||||||
- **mixcloud:stream**
|
- **mixcloud:stream**
|
||||||
- **mixcloud:user**
|
- **mixcloud:user**
|
||||||
|
- **Mixer:live**
|
||||||
|
- **Mixer:vod**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
- **Mnet**
|
- **Mnet**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
@@ -509,10 +517,13 @@
|
|||||||
- **netease:song**: 网易云音乐
|
- **netease:song**: 网易云音乐
|
||||||
- **Netzkino**
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
|
- **NewgroundsPlaylist**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
- **NextMedia**: 蘋果日報
|
- **NextMedia**: 蘋果日報
|
||||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||||
- **NextTV**: 壹電視
|
- **NextTV**: 壹電視
|
||||||
|
- **Nexx**
|
||||||
|
- **NexxEmbed**
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **NhkVod**
|
- **NhkVod**
|
||||||
@@ -543,7 +554,7 @@
|
|||||||
- **NowTVList**
|
- **NowTVList**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **Noz**
|
- **Noz**
|
||||||
- **npo**: npo.nl and ntr.nl
|
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **npo.nl:live**
|
- **npo.nl:live**
|
||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
@@ -587,6 +598,7 @@
|
|||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||||
- **pcmag**
|
- **pcmag**
|
||||||
|
- **PearVideo**
|
||||||
- **People**
|
- **People**
|
||||||
- **periscope**: Periscope
|
- **periscope**: Periscope
|
||||||
- **periscope:user**: Periscope user videos
|
- **periscope:user**: Periscope user videos
|
||||||
@@ -638,6 +650,7 @@
|
|||||||
- **RadioJavan**
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RaiPlay**
|
- **RaiPlay**
|
||||||
|
- **RaiPlayLive**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RDS**: RDS.ca
|
- **RDS**: RDS.ca
|
||||||
- **RedBullTV**
|
- **RedBullTV**
|
||||||
@@ -682,6 +695,7 @@
|
|||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
- **Ruutu**
|
- **Ruutu**
|
||||||
|
- **Ruv**
|
||||||
- **safari**: safaribooksonline.com online video
|
- **safari**: safaribooksonline.com online video
|
||||||
- **safari:api**
|
- **safari:api**
|
||||||
- **safari:course**: safaribooksonline.com online courses
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
@@ -760,7 +774,8 @@
|
|||||||
- **Tagesschau**
|
- **Tagesschau**
|
||||||
- **tagesschau:player**
|
- **tagesschau:player**
|
||||||
- **Tass**
|
- **Tass**
|
||||||
- **TBS**
|
- **TastyTrade**
|
||||||
|
- **TBS** (Currently broken)
|
||||||
- **TDSLifeway**
|
- **TDSLifeway**
|
||||||
- **teachertube**: teachertube.com videos
|
- **teachertube**: teachertube.com videos
|
||||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
@@ -801,16 +816,13 @@
|
|||||||
- **ToonGoggles**
|
- **ToonGoggles**
|
||||||
- **Tosh**: Tosh.0
|
- **Tosh**: Tosh.0
|
||||||
- **tou.tv**
|
- **tou.tv**
|
||||||
- **Toypics**: Toypics user profile
|
- **Toypics**: Toypics video
|
||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
- **TruTV**
|
- **TruTV**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
- **tudou**
|
|
||||||
- **tudou:album**
|
|
||||||
- **tudou:playlist**
|
|
||||||
- **Tumblr**
|
- **Tumblr**
|
||||||
- **tunein:clip**
|
- **tunein:clip**
|
||||||
- **tunein:program**
|
- **tunein:program**
|
||||||
@@ -861,6 +873,8 @@
|
|||||||
- **uol.com.br**
|
- **uol.com.br**
|
||||||
- **uplynk**
|
- **uplynk**
|
||||||
- **uplynk:preplay**
|
- **uplynk:preplay**
|
||||||
|
- **Upskill**
|
||||||
|
- **UpskillCourse**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **URPlay**
|
- **URPlay**
|
||||||
- **USANetwork**
|
- **USANetwork**
|
||||||
@@ -932,13 +946,14 @@
|
|||||||
- **vk:wallpost**
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **vlive:channel**
|
- **vlive:channel**
|
||||||
|
- **vlive:playlist**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VODPl**
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
- **vpro**: npo.nl and ntr.nl
|
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **Vrak**
|
- **Vrak**
|
||||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||||
- **vrv**
|
- **vrv**
|
||||||
@@ -964,7 +979,7 @@
|
|||||||
- **wholecloud**: WholeCloud
|
- **wholecloud**: WholeCloud
|
||||||
- **Wimp**
|
- **Wimp**
|
||||||
- **Wistia**
|
- **Wistia**
|
||||||
- **wnl**: npo.nl and ntr.nl
|
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
- **wrzuta.pl**
|
- **wrzuta.pl**
|
||||||
- **wrzuta.pl:playlist**
|
- **wrzuta.pl:playlist**
|
||||||
@@ -972,7 +987,7 @@
|
|||||||
- **WSJArticle**
|
- **WSJArticle**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
- **xiami:album**: 虾米音乐 - 专辑
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
@@ -988,7 +1003,6 @@
|
|||||||
- **XVideos**
|
- **XVideos**
|
||||||
- **XXXYMovies**
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
- **Yam**: 蕃薯藤yam天空部落
|
|
||||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
|
@@ -41,6 +41,7 @@ def _make_result(formats, **kwargs):
|
|||||||
'id': 'testid',
|
'id': 'testid',
|
||||||
'title': 'testttitle',
|
'title': 'testttitle',
|
||||||
'extractor': 'testex',
|
'extractor': 'testex',
|
||||||
|
'extractor_key': 'TestEx',
|
||||||
}
|
}
|
||||||
res.update(**kwargs)
|
res.update(**kwargs)
|
||||||
return res
|
return res
|
||||||
@@ -448,6 +449,17 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
pass
|
pass
|
||||||
self.assertEqual(ydl.downloaded_info_dicts, [])
|
self.assertEqual(ydl.downloaded_info_dicts, [])
|
||||||
|
|
||||||
|
def test_default_format_spec(self):
|
||||||
|
ydl = YDL({'simulate': True})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
|
||||||
|
|
||||||
|
ydl = YDL({'outtmpl': '-'})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'best')
|
||||||
|
|
||||||
|
ydl = YDL({})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
|
||||||
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeDL(unittest.TestCase):
|
class TestYoutubeDL(unittest.TestCase):
|
||||||
def test_subtitles(self):
|
def test_subtitles(self):
|
||||||
@@ -527,6 +539,8 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'width': None,
|
'width': None,
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
|
'title1': '$PATH',
|
||||||
|
'title2': '%PATH%',
|
||||||
}
|
}
|
||||||
|
|
||||||
def fname(templ):
|
def fname(templ):
|
||||||
@@ -545,10 +559,14 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
|
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
|
||||||
|
self.assertEqual(fname('%%'), '%')
|
||||||
|
self.assertEqual(fname('%%%%'), '%%')
|
||||||
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
|
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
|
||||||
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
|
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
|
||||||
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
|
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
|
||||||
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
|
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
|
||||||
|
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
|
||||||
|
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
|
||||||
|
|
||||||
def test_format_note(self):
|
def test_format_note(self):
|
||||||
ydl = YoutubeDL()
|
ydl = YoutubeDL()
|
||||||
@@ -755,7 +773,8 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'foo2:',
|
'url': 'foo2:',
|
||||||
'ie_key': 'Foo2',
|
'ie_key': 'Foo2',
|
||||||
'title': 'foo1 title'
|
'title': 'foo1 title',
|
||||||
|
'id': 'foo1_id',
|
||||||
}
|
}
|
||||||
|
|
||||||
class Foo2IE(InfoExtractor):
|
class Foo2IE(InfoExtractor):
|
||||||
@@ -781,6 +800,9 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['url'], TEST_URL)
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
self.assertEqual(downloaded['title'], 'foo1 title')
|
self.assertEqual(downloaded['title'], 'foo1 title')
|
||||||
|
self.assertEqual(downloaded['id'], 'testid')
|
||||||
|
self.assertEqual(downloaded['extractor'], 'testex')
|
||||||
|
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@@ -98,6 +98,7 @@ from youtube_dl.compat import (
|
|||||||
compat_chr,
|
compat_chr,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
|
compat_os_name,
|
||||||
compat_setenv,
|
compat_setenv,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@@ -340,6 +341,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||||
|
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
@@ -447,7 +449,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
|
|
||||||
def test_shell_quote(self):
|
def test_shell_quote(self):
|
||||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
||||||
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
self.assertEqual(
|
||||||
|
shell_quote(args),
|
||||||
|
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
|
||||||
|
|
||||||
def test_str_to_int(self):
|
def test_str_to_int(self):
|
||||||
self.assertEqual(str_to_int('123,456'), 123456)
|
self.assertEqual(str_to_int('123,456'), 123456)
|
||||||
@@ -678,6 +682,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, {'status': 'success'})
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
|
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'status': 'success'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
@@ -907,6 +919,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
supports_outside_bmp = False
|
supports_outside_bmp = False
|
||||||
if supports_outside_bmp:
|
if supports_outside_bmp:
|
||||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||||
|
# Malformed HTML should not break attributes extraction on older Python
|
||||||
|
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
@@ -921,7 +935,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
def test_args_to_str(self):
|
def test_args_to_str(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
|
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
|
||||||
'foo ba/r -baz \'2 be\' \'\''
|
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_parse_filesize(self):
|
def test_parse_filesize(self):
|
||||||
@@ -1217,6 +1231,12 @@ part 3</font></u>
|
|||||||
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||||
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||||
|
|
||||||
|
html = '''
|
||||||
|
<div itemprop="author" itemscope>foo</div>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
|
||||||
|
|
||||||
def test_get_elements_by_class(self):
|
def test_get_elements_by_class(self):
|
||||||
html = '''
|
html = '''
|
||||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||||
|
@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
|
|||||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||||
}]
|
}]
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||||
|
# time point more than duration
|
||||||
|
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||||
|
283,
|
||||||
|
[]
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def test_youtube_chapters(self):
|
def test_youtube_chapters(self):
|
||||||
|
@@ -26,6 +26,8 @@ import tokenize
|
|||||||
import traceback
|
import traceback
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
from string import ascii_letters
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
@@ -58,6 +60,7 @@ from .utils import (
|
|||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
@@ -302,6 +305,17 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_NUMERIC_FIELDS = set((
|
||||||
|
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||||
|
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||||
|
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||||
|
'average_rating', 'comment_count', 'age_limit',
|
||||||
|
'start_time', 'end_time',
|
||||||
|
'chapter_number', 'season_number', 'episode_number',
|
||||||
|
'track_number', 'disc_number', 'release_year',
|
||||||
|
'playlist_index',
|
||||||
|
))
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
_ies = []
|
_ies = []
|
||||||
_pps = []
|
_pps = []
|
||||||
@@ -498,24 +512,25 @@ class YoutubeDL(object):
|
|||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
if compat_os_name == 'nt':
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||||
# already of type unicode()
|
# c_wchar_p() might not be necessary if `message` is
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
# already of type unicode()
|
||||||
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
elif 'TERM' in os.environ:
|
elif 'TERM' in os.environ:
|
||||||
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
def save_console_title(self):
|
def save_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Save the title on stack
|
# Save the title on stack
|
||||||
self._write_string('\033[22;0t', self._screen_file)
|
self._write_string('\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
def restore_console_title(self):
|
def restore_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||||
# Restore the title from stack
|
# Restore the title from stack
|
||||||
self._write_string('\033[23;0t', self._screen_file)
|
self._write_string('\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
@@ -638,22 +653,11 @@ class YoutubeDL(object):
|
|||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
outtmpl)
|
outtmpl)
|
||||||
|
|
||||||
NUMERIC_FIELDS = set((
|
|
||||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
|
||||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
|
||||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
|
||||||
'average_rating', 'comment_count', 'age_limit',
|
|
||||||
'start_time', 'end_time',
|
|
||||||
'chapter_number', 'season_number', 'episode_number',
|
|
||||||
'track_number', 'disc_number', 'release_year',
|
|
||||||
'playlist_index',
|
|
||||||
))
|
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string 'NA' is returned for missing fields. We will patch output
|
||||||
# template for missing fields to meet string presentation type.
|
# template for missing fields to meet string presentation type.
|
||||||
for numeric_field in NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
@@ -672,7 +676,19 @@ class YoutubeDL(object):
|
|||||||
FORMAT_RE.format(numeric_field),
|
FORMAT_RE.format(numeric_field),
|
||||||
r'%({0})s'.format(numeric_field), outtmpl)
|
r'%({0})s'.format(numeric_field), outtmpl)
|
||||||
|
|
||||||
filename = expand_path(outtmpl % template_dict)
|
# expand_path translates '%%' into '%' and '$$' into '$'
|
||||||
|
# correspondingly that is not what we want since we need to keep
|
||||||
|
# '%%' intact for template dict substitution step. Working around
|
||||||
|
# with boundary-alike separator hack.
|
||||||
|
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
|
||||||
|
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
|
||||||
|
|
||||||
|
# outtmpl should be expand_path'ed before template dict substitution
|
||||||
|
# because meta fields may contain env variables we don't want to
|
||||||
|
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
|
||||||
|
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
||||||
|
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
||||||
|
|
||||||
# Temporary fix for #4787
|
# Temporary fix for #4787
|
||||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||||
@@ -844,7 +860,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
force_properties = dict(
|
force_properties = dict(
|
||||||
(k, v) for k, v in ie_result.items() if v is not None)
|
(k, v) for k, v in ie_result.items() if v is not None)
|
||||||
for f in ('_type', 'url', 'ie_key'):
|
for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
|
||||||
if f in force_properties:
|
if f in force_properties:
|
||||||
del force_properties[f]
|
del force_properties[f]
|
||||||
new_result = info.copy()
|
new_result = info.copy()
|
||||||
@@ -1048,6 +1064,25 @@ class YoutubeDL(object):
|
|||||||
return op(actual_value, comparison_value)
|
return op(actual_value, comparison_value)
|
||||||
return _filter
|
return _filter
|
||||||
|
|
||||||
|
def _default_format_spec(self, info_dict, download=True):
|
||||||
|
req_format_list = []
|
||||||
|
|
||||||
|
def can_have_partial_formats():
|
||||||
|
if self.params.get('simulate', False):
|
||||||
|
return True
|
||||||
|
if not download:
|
||||||
|
return True
|
||||||
|
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
|
||||||
|
return False
|
||||||
|
if info_dict.get('is_live'):
|
||||||
|
return False
|
||||||
|
merger = FFmpegMergerPP(self)
|
||||||
|
return merger.available and merger.can_merge()
|
||||||
|
if can_have_partial_formats():
|
||||||
|
req_format_list.append('bestvideo+bestaudio')
|
||||||
|
req_format_list.append('best')
|
||||||
|
return '/'.join(req_format_list)
|
||||||
|
|
||||||
def build_format_selector(self, format_spec):
|
def build_format_selector(self, format_spec):
|
||||||
def syntax_error(note, start):
|
def syntax_error(note, start):
|
||||||
message = (
|
message = (
|
||||||
@@ -1344,9 +1379,28 @@ class YoutubeDL(object):
|
|||||||
if 'title' not in info_dict:
|
if 'title' not in info_dict:
|
||||||
raise ExtractorError('Missing "title" field in extractor result')
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if not isinstance(info_dict['id'], compat_str):
|
def report_force_conversion(field, field_not, conversion):
|
||||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
self.report_warning(
|
||||||
info_dict['id'] = compat_str(info_dict['id'])
|
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||||
|
% (field, field_not, conversion))
|
||||||
|
|
||||||
|
def sanitize_string_field(info, string_field):
|
||||||
|
field = info.get(string_field)
|
||||||
|
if field is None or isinstance(field, compat_str):
|
||||||
|
return
|
||||||
|
report_force_conversion(string_field, 'a string', 'string')
|
||||||
|
info[string_field] = compat_str(field)
|
||||||
|
|
||||||
|
def sanitize_numeric_fields(info):
|
||||||
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
|
field = info.get(numeric_field)
|
||||||
|
if field is None or isinstance(field, compat_numeric_types):
|
||||||
|
continue
|
||||||
|
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||||
|
info[numeric_field] = int_or_none(field)
|
||||||
|
|
||||||
|
sanitize_string_field(info_dict, 'id')
|
||||||
|
sanitize_numeric_fields(info_dict)
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
@@ -1427,15 +1481,25 @@ class YoutubeDL(object):
|
|||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('No video formats found!')
|
raise ExtractorError('No video formats found!')
|
||||||
|
|
||||||
|
def is_wellformed(f):
|
||||||
|
url = f.get('url')
|
||||||
|
valid_url = url and isinstance(url, compat_str)
|
||||||
|
if not valid_url:
|
||||||
|
self.report_warning(
|
||||||
|
'"url" field is missing or empty - skipping format, '
|
||||||
|
'there is an error in extractor')
|
||||||
|
return valid_url
|
||||||
|
|
||||||
|
# Filter out malformed formats for better extraction robustness
|
||||||
|
formats = list(filter(is_wellformed, formats))
|
||||||
|
|
||||||
formats_dict = {}
|
formats_dict = {}
|
||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for i, format in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
if 'url' not in format:
|
sanitize_string_field(format, 'format_id')
|
||||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
sanitize_numeric_fields(format)
|
||||||
|
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
else:
|
else:
|
||||||
@@ -1489,14 +1553,10 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
req_format = self.params.get('format')
|
req_format = self.params.get('format')
|
||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format_list = []
|
req_format = self._default_format_spec(info_dict, download=download)
|
||||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
if self.params.get('verbose'):
|
||||||
not info_dict.get('is_live')):
|
self.to_stdout('[debug] Default format spec: %s' % req_format)
|
||||||
merger = FFmpegMergerPP(self)
|
|
||||||
if merger.available and merger.can_merge():
|
|
||||||
req_format_list.append('bestvideo+bestaudio')
|
|
||||||
req_format_list.append('best')
|
|
||||||
req_format = '/'.join(req_format_list)
|
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
# While in format selection we may need to have an access to the original
|
# While in format selection we may need to have an access to the original
|
||||||
@@ -1859,7 +1919,7 @@ class YoutubeDL(object):
|
|||||||
info_dict.get('protocol') == 'm3u8' and
|
info_dict.get('protocol') == 'm3u8' and
|
||||||
self.params.get('hls_prefer_native')):
|
self.params.get('hls_prefer_native')):
|
||||||
if fixup_policy == 'warn':
|
if fixup_policy == 'warn':
|
||||||
self.report_warning('%s: malformated aac bitstream.' % (
|
self.report_warning('%s: malformed AAC bitstream detected.' % (
|
||||||
info_dict['id']))
|
info_dict['id']))
|
||||||
elif fixup_policy == 'detect_or_warn':
|
elif fixup_policy == 'detect_or_warn':
|
||||||
fixup_pp = FFmpegFixupM3u8PP(self)
|
fixup_pp = FFmpegFixupM3u8PP(self)
|
||||||
@@ -1868,7 +1928,7 @@ class YoutubeDL(object):
|
|||||||
info_dict['__postprocessors'].append(fixup_pp)
|
info_dict['__postprocessors'].append(fixup_pp)
|
||||||
else:
|
else:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'%s: malformated aac bitstream. %s'
|
'%s: malformed AAC bitstream detected. %s'
|
||||||
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
|
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
|
||||||
else:
|
else:
|
||||||
assert fixup_policy in ('ignore', 'never')
|
assert fixup_policy in ('ignore', 'never')
|
||||||
|
@@ -2322,6 +2322,19 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
|
||||||
|
try: # Python 2
|
||||||
|
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python <3.4
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||||
|
except ImportError: # Python >3.4
|
||||||
|
|
||||||
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
|
# and uniform cross-version exceptiong handling
|
||||||
|
class compat_HTMLParseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
@@ -2604,14 +2617,22 @@ except ImportError: # Python 2
|
|||||||
parsed_result[name] = [value]
|
parsed_result[name] = [value]
|
||||||
return parsed_result
|
return parsed_result
|
||||||
|
|
||||||
try:
|
|
||||||
from shlex import quote as compat_shlex_quote
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
except ImportError: # Python < 3.3
|
|
||||||
|
|
||||||
|
if compat_os_name == 'nt':
|
||||||
def compat_shlex_quote(s):
|
def compat_shlex_quote(s):
|
||||||
if re.match(r'^[-_\w./]+$', s):
|
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||||
return s
|
else:
|
||||||
else:
|
try:
|
||||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
from shlex import quote as compat_shlex_quote
|
||||||
|
except ImportError: # Python < 3.3
|
||||||
|
def compat_shlex_quote(s):
|
||||||
|
if re.match(r'^[-_\w./]+$', s):
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -2636,9 +2657,6 @@ def compat_ord(c):
|
|||||||
return ord(c)
|
return ord(c)
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
@@ -2882,6 +2900,7 @@ else:
|
|||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'compat_HTMLParseError',
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
|
@@ -8,10 +8,11 @@ import random
|
|||||||
|
|
||||||
from ..compat import compat_os_name
|
from ..compat import compat_os_name
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
decodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
decodeArgument,
|
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
shell_quote,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -381,10 +382,5 @@ class FileDownloader(object):
|
|||||||
if exe is None:
|
if exe is None:
|
||||||
exe = os.path.basename(str_args[0])
|
exe = os.path.basename(str_args[0])
|
||||||
|
|
||||||
try:
|
|
||||||
import pipes
|
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
|
||||||
except ImportError:
|
|
||||||
shell_quote = repr
|
|
||||||
self.to_screen('[debug] %s command line: %s' % (
|
self.to_screen('[debug] %s command line: %s' % (
|
||||||
exe, shell_quote(str_args)))
|
exe, shell_quote(str_args)))
|
||||||
|
@@ -212,6 +212,11 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
args = [ffpp.executable, '-y']
|
args = [ffpp.executable, '-y']
|
||||||
|
|
||||||
|
for log_level in ('quiet', 'verbose'):
|
||||||
|
if self.params.get(log_level, False):
|
||||||
|
args += ['-loglevel', log_level]
|
||||||
|
break
|
||||||
|
|
||||||
seekable = info_dict.get('_seekable')
|
seekable = info_dict.get('_seekable')
|
||||||
if seekable is not None:
|
if seekable is not None:
|
||||||
# setting -seekable prevents ffmpeg from guessing if the server
|
# setting -seekable prevents ffmpeg from guessing if the server
|
||||||
|
@@ -98,7 +98,7 @@ def write_piff_header(stream, params):
|
|||||||
|
|
||||||
if is_audio:
|
if is_audio:
|
||||||
smhd_payload = s88.pack(0) # balance
|
smhd_payload = s88.pack(0) # balance
|
||||||
smhd_payload = u16.pack(0) # reserved
|
smhd_payload += u16.pack(0) # reserved
|
||||||
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
||||||
else:
|
else:
|
||||||
vmhd_payload = u16.pack(0) # graphics mode
|
vmhd_payload = u16.pack(0) # graphics mode
|
||||||
@@ -126,7 +126,6 @@ def write_piff_header(stream, params):
|
|||||||
if fourcc == 'AACL':
|
if fourcc == 'AACL':
|
||||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||||
else:
|
else:
|
||||||
sample_entry_payload = sample_entry_payload
|
|
||||||
sample_entry_payload += u16.pack(0) # pre defined
|
sample_entry_payload += u16.pack(0) # pre defined
|
||||||
sample_entry_payload += u16.pack(0) # reserved
|
sample_entry_payload += u16.pack(0) # reserved
|
||||||
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
||||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -124,7 +126,20 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
title = video_params.get('title') or video_params['seriesTitle']
|
title = video_params.get('title') or video_params['seriesTitle']
|
||||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||||
|
|
||||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
format_urls = [
|
||||||
|
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
|
||||||
|
|
||||||
|
# May have higher quality video
|
||||||
|
sd_url = try_get(
|
||||||
|
stream, lambda x: x['streams']['hds']['sd'], compat_str)
|
||||||
|
if sd_url:
|
||||||
|
format_urls.append(sd_url.replace('metered', 'um'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_url in format_urls:
|
||||||
|
if format_url:
|
||||||
|
formats.extend(
|
||||||
|
self._extract_akamai_formats(format_url, video_id))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@@ -12,7 +12,15 @@ from ..compat import compat_urlparse
|
|||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
IE_NAME = 'abcnews:video'
|
IE_NAME = 'abcnews:video'
|
||||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
abcnews\.go\.com/
|
||||||
|
(?:
|
||||||
|
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
|
video/embed\?.*?\bid=
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||||
@@ -29,6 +37,9 @@ class AbcNewsVideoIE(AMPIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/video/embed?id=46979033',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
|
|||||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1421123075,
|
'timestamp': 1421123075,
|
||||||
'upload_date': '20150113',
|
'upload_date': '20150113',
|
||||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,25 +32,28 @@ class ADNIE(InfoExtractor):
|
|||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
if not sub_path:
|
if not sub_path:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
'http://animedigitalnetwork.fr/' + sub_path,
|
urljoin(self._BASE_URL, sub_path),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
||||||
|
})
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||||
None, fatal=False)
|
None, fatal=False)
|
||||||
if not subtitles_json:
|
if not subtitles_json:
|
||||||
return None
|
return None
|
||||||
@@ -103,9 +107,18 @@ class ADNIE(InfoExtractor):
|
|||||||
metas = options.get('metas') or {}
|
metas = options.get('metas') or {}
|
||||||
title = metas.get('title') or video_info['title']
|
title = metas.get('title') or video_info['title']
|
||||||
links = player_config.get('links') or {}
|
links = player_config.get('links') or {}
|
||||||
|
error = None
|
||||||
|
if not links:
|
||||||
|
links_url = player_config['linksurl']
|
||||||
|
links_data = self._download_json(urljoin(
|
||||||
|
self._BASE_URL, links_url), video_id)
|
||||||
|
links = links_data.get('links') or {}
|
||||||
|
error = links_data.get('error')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, qualities in links.items():
|
for format_id, qualities in links.items():
|
||||||
|
if not isinstance(qualities, dict):
|
||||||
|
continue
|
||||||
for load_balancer_url in qualities.values():
|
for load_balancer_url in qualities.values():
|
||||||
load_balancer_data = self._download_json(
|
load_balancer_data = self._download_json(
|
||||||
load_balancer_url, video_id, fatal=False) or {}
|
load_balancer_url, video_id, fatal=False) or {}
|
||||||
@@ -119,7 +132,8 @@ class ADNIE(InfoExtractor):
|
|||||||
for f in m3u8_formats:
|
for f in m3u8_formats:
|
||||||
f['language'] = 'fr'
|
f['language'] = 'fr'
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
error = options.get('error')
|
if not error:
|
||||||
|
error = options.get('error')
|
||||||
if not formats and error:
|
if not formats and error:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@@ -6,12 +6,16 @@ import time
|
|||||||
import xml.etree.ElementTree as etree
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_kwargs,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
NO_DEFAULT,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -21,6 +25,11 @@ MSO_INFO = {
|
|||||||
'username_field': 'username',
|
'username_field': 'username',
|
||||||
'password_field': 'password',
|
'password_field': 'password',
|
||||||
},
|
},
|
||||||
|
'ATTOTT': {
|
||||||
|
'name': 'DIRECTV NOW',
|
||||||
|
'username_field': 'email',
|
||||||
|
'password_field': 'loginpassword',
|
||||||
|
},
|
||||||
'Rogers': {
|
'Rogers': {
|
||||||
'name': 'Rogers',
|
'name': 'Rogers',
|
||||||
'username_field': 'UserName',
|
'username_field': 'UserName',
|
||||||
@@ -36,6 +45,11 @@ MSO_INFO = {
|
|||||||
'username_field': 'Ecom_User_ID',
|
'username_field': 'Ecom_User_ID',
|
||||||
'password_field': 'Ecom_Password',
|
'password_field': 'Ecom_Password',
|
||||||
},
|
},
|
||||||
|
'Brighthouse': {
|
||||||
|
'name': 'Bright House Networks | Spectrum',
|
||||||
|
'username_field': 'j_username',
|
||||||
|
'password_field': 'j_password',
|
||||||
|
},
|
||||||
'Charter_Direct': {
|
'Charter_Direct': {
|
||||||
'name': 'Charter Spectrum',
|
'name': 'Charter Spectrum',
|
||||||
'username_field': 'IDToken1',
|
'username_field': 'IDToken1',
|
||||||
@@ -1308,11 +1322,14 @@ class AdobePassIE(InfoExtractor):
|
|||||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||||
_MVPD_CACHE = 'ap-mvpd'
|
_MVPD_CACHE = 'ap-mvpd'
|
||||||
|
|
||||||
|
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
headers = kwargs.get('headers', {})
|
headers = kwargs.get('headers', {})
|
||||||
headers.update(self.geo_verification_headers())
|
headers.update(self.geo_verification_headers())
|
||||||
kwargs['headers'] = headers
|
kwargs['headers'] = headers
|
||||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
return super(AdobePassIE, self)._download_webpage_handle(
|
||||||
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||||
@@ -1356,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
|
|||||||
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
|
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
|
||||||
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
|
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
|
||||||
|
|
||||||
|
def extract_redirect_url(html, url=None, fatal=False):
|
||||||
|
# TODO: eliminate code duplication with generic extractor and move
|
||||||
|
# redirection code into _download_webpage_handle
|
||||||
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
|
redirect_url = self._search_regex(
|
||||||
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||||
|
html, 'meta refresh redirect',
|
||||||
|
default=NO_DEFAULT if fatal else None, fatal=fatal)
|
||||||
|
if not redirect_url:
|
||||||
|
return None
|
||||||
|
if url:
|
||||||
|
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
|
||||||
|
return redirect_url
|
||||||
|
|
||||||
mvpd_headers = {
|
mvpd_headers = {
|
||||||
'ap_42': 'anonymous',
|
'ap_42': 'anonymous',
|
||||||
'ap_11': 'Linux i686',
|
'ap_11': 'Linux i686',
|
||||||
@@ -1405,16 +1437,15 @@ class AdobePassIE(InfoExtractor):
|
|||||||
if '<form name="signin"' in provider_redirect_page:
|
if '<form name="signin"' in provider_redirect_page:
|
||||||
provider_login_page_res = provider_redirect_page_res
|
provider_login_page_res = provider_redirect_page_res
|
||||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||||
oauth_redirect_url = self._html_search_regex(
|
oauth_redirect_url = extract_redirect_url(
|
||||||
r'content="0;\s*url=([^\'"]+)',
|
provider_redirect_page, fatal=True)
|
||||||
provider_redirect_page, 'meta refresh redirect')
|
|
||||||
provider_login_page_res = self._download_webpage_handle(
|
provider_login_page_res = self._download_webpage_handle(
|
||||||
oauth_redirect_url, video_id,
|
oauth_redirect_url, video_id,
|
||||||
'Downloading Provider Login Page')
|
self._DOWNLOADING_LOGIN_PAGE)
|
||||||
else:
|
else:
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res,
|
provider_redirect_page_res,
|
||||||
'Downloading Provider Login Page')
|
self._DOWNLOADING_LOGIN_PAGE)
|
||||||
|
|
||||||
mvpd_confirm_page_res = post_form(
|
mvpd_confirm_page_res = post_form(
|
||||||
provider_login_page_res, 'Logging in', {
|
provider_login_page_res, 'Logging in', {
|
||||||
@@ -1461,8 +1492,17 @@ class AdobePassIE(InfoExtractor):
|
|||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||||
|
# based redirect that should be followed.
|
||||||
|
provider_redirect_page, urlh = provider_redirect_page_res
|
||||||
|
provider_refresh_redirect_url = extract_redirect_url(
|
||||||
|
provider_redirect_page, url=urlh.geturl())
|
||||||
|
if provider_refresh_redirect_url:
|
||||||
|
provider_redirect_page_res = self._download_webpage_handle(
|
||||||
|
provider_refresh_redirect_url, video_id,
|
||||||
|
'Downloading Provider Redirect Page (meta refresh)')
|
||||||
provider_login_page_res = post_form(
|
provider_login_page_res = post_form(
|
||||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||||
mso_info.get('username_field', 'username'): username,
|
mso_info.get('username_field', 'username'): username,
|
||||||
mso_info.get('password_field', 'password'): password,
|
mso_info.get('password_field', 'password'): password,
|
||||||
|
@@ -126,7 +126,7 @@ class AdultSwimIE(TurnerBaseIE):
|
|||||||
video_id = video_data['id']
|
video_id = video_data['id']
|
||||||
|
|
||||||
info = self._extract_cvp_info(
|
info = self._extract_cvp_info(
|
||||||
'http://www.adultswim.com/videos/api/v0/assets?id=' + video_id,
|
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
|
||||||
video_id, {
|
video_id, {
|
||||||
'secure': {
|
'secure': {
|
||||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||||
|
@@ -4,9 +4,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class AlJazeeraIE(InfoExtractor):
|
class AlJazeeraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3792260579001',
|
'id': '3792260579001',
|
||||||
@@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
93
youtube_dl/extractor/asiancrush.py
Normal file
93
youtube_dl/extractor/asiancrush.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .kaltura import KalturaIE
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
remove_end,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AsianCrushIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||||
|
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_y4tmjm5r',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Women Who Flirt',
|
||||||
|
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||||
|
'timestamp': 1496936429,
|
||||||
|
'upload_date': '20170608',
|
||||||
|
'uploader_id': 'craig@crifkin.com',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'postid': video_id,
|
||||||
|
'action': 'get_channel_kaltura_vars',
|
||||||
|
}))
|
||||||
|
|
||||||
|
entry_id = data['entry_id']
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||||
|
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||||
|
video_title=data.get('vid_label'))
|
||||||
|
|
||||||
|
|
||||||
|
class AsianCrushPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12481',
|
||||||
|
'title': 'Scholar Who Walks the Night',
|
||||||
|
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||||
|
webpage):
|
||||||
|
attrs = extract_attributes(mobj.group(0))
|
||||||
|
if attrs.get('class') == 'clearfix':
|
||||||
|
entries.append(self.url_result(
|
||||||
|
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||||
|
|
||||||
|
title = remove_end(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||||
|
'title', default=None) or self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title',
|
||||||
|
default=None) or self._search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||||
|
' | AsianCrush')
|
||||||
|
|
||||||
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:description', webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title, description)
|
@@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor):
|
|||||||
|
|
||||||
def from_clip(field):
|
def from_clip(field):
|
||||||
if clip:
|
if clip:
|
||||||
clip.get(field)
|
return clip.get(field)
|
||||||
|
|
||||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||||
'audio', webpage, 'audio url')
|
'audio', webpage, 'audio url')
|
||||||
|
@@ -14,14 +14,16 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
@@ -155,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@@ -222,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False
|
||||||
|
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||||
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
@@ -250,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BandcampWeeklyIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://bandcamp.com/?show=224',
|
||||||
|
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '224',
|
||||||
|
'ext': 'opus',
|
||||||
|
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||||
|
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||||
|
'duration': 5829.77,
|
||||||
|
'release_date': '20170404',
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': 'Magic Moments',
|
||||||
|
'episode_number': 208,
|
||||||
|
'episode_id': '224',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
blob = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||||
|
'blob', group='blob'),
|
||||||
|
video_id, transform_source=unescapeHTML)
|
||||||
|
|
||||||
|
show = blob['bcw_show']
|
||||||
|
|
||||||
|
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||||
|
# which happens to expose the latest Bandcamp Weekly episode.
|
||||||
|
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
if not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
|
for known_ext in KNOWN_EXTENSIONS:
|
||||||
|
if known_ext in format_id:
|
||||||
|
ext = known_ext
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ext = None
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||||
|
subtitle = show.get('subtitle')
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
|
episode_number = None
|
||||||
|
seq = blob.get('bcw_seq')
|
||||||
|
|
||||||
|
if seq and isinstance(seq, list):
|
||||||
|
try:
|
||||||
|
episode_number = next(
|
||||||
|
int_or_none(e.get('episode_number'))
|
||||||
|
for e in seq
|
||||||
|
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
'is_live': False,
|
||||||
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': show.get('subtitle'),
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'episode_id': compat_str(video_id),
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
@@ -6,14 +6,18 @@ import itertools
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
@@ -32,12 +36,15 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
programmes/(?!articles/)|
|
programmes/(?!articles/)|
|
||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/clips[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/
|
radio/player/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
|
|
||||||
|
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||||
|
_NETRC_MACHINE = 'bbc'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIASELECTOR_URLS = [
|
||||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||||
@@ -222,11 +229,46 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}, {
|
||||||
]
|
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None, 'Downloading signin page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
post_url = urljoin(self._LOGIN_URL, self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||||
|
'post url', default=self._LOGIN_URL, group='url'))
|
||||||
|
|
||||||
|
response, urlh = self._download_webpage_handle(
|
||||||
|
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||||
|
headers={'Referer': self._LOGIN_URL})
|
||||||
|
|
||||||
|
if self._LOGIN_URL in urlh.geturl():
|
||||||
|
error = clean_html(get_element_by_class('form-message', response))
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
class MediaSelectionError(Exception):
|
class MediaSelectionError(Exception):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
@@ -483,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
|
|
||||||
|
error = self._search_regex(
|
||||||
|
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||||
|
webpage, 'error', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
programme_id = None
|
programme_id = None
|
||||||
duration = None
|
duration = None
|
||||||
|
|
||||||
|
@@ -6,18 +6,33 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BeamProLiveIE(InfoExtractor):
|
class BeamProBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'Beam:live'
|
_API_BASE = 'https://mixer.com/api/v1'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
|
||||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||||
|
|
||||||
|
def _extract_channel_info(self, chan):
|
||||||
|
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||||
|
return {
|
||||||
|
'uploader': chan.get('token') or try_get(
|
||||||
|
chan, lambda x: x['user']['username'], compat_str),
|
||||||
|
'uploader_id': compat_str(user_id) if user_id else None,
|
||||||
|
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProLiveIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:live'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.beam.pro/niterhayven',
|
'url': 'http://mixer.com/niterhayven',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '261562',
|
'id': '261562',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -38,11 +53,17 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_name = self._match_id(url)
|
channel_name = self._match_id(url)
|
||||||
|
|
||||||
chan = self._download_json(
|
chan = self._download_json(
|
||||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||||
|
|
||||||
if chan.get('online') is False:
|
if chan.get('online') is False:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -50,24 +71,118 @@ class BeamProLiveIE(InfoExtractor):
|
|||||||
|
|
||||||
channel_id = chan['id']
|
channel_id = chan['id']
|
||||||
|
|
||||||
|
def manifest_url(kind):
|
||||||
|
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
fatal=False)
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
manifest_url('smil'), channel_name, fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
info = {
|
||||||
|
|
||||||
return {
|
|
||||||
'id': compat_str(chan.get('id') or channel_name),
|
'id': compat_str(chan.get('id') or channel_name),
|
||||||
'title': self._live_title(chan.get('name') or channel_name),
|
'title': self._live_title(chan.get('name') or channel_name),
|
||||||
'description': clean_html(chan.get('description')),
|
'description': clean_html(chan.get('description')),
|
||||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
'thumbnail': try_get(
|
||||||
|
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||||
'uploader': chan.get('token') or try_get(
|
|
||||||
chan, lambda x: x['user']['username'], compat_str),
|
|
||||||
'uploader_id': compat_str(user_id) if user_id else None,
|
|
||||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
info.update(self._extract_channel_info(chan))
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProVodIE(BeamProBaseIE):
|
||||||
|
IE_NAME = 'Mixer:vod'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||||
|
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2259830',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'willow8714\'s Channel',
|
||||||
|
'duration': 6828.15,
|
||||||
|
'thumbnail': r're:https://.*source\.png$',
|
||||||
|
'timestamp': 1494046474,
|
||||||
|
'upload_date': '20170506',
|
||||||
|
'uploader': 'willow8714',
|
||||||
|
'uploader_id': '6085379',
|
||||||
|
'age_limit': 13,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_format(vod, vod_type):
|
||||||
|
if not vod.get('baseUrl'):
|
||||||
|
return []
|
||||||
|
|
||||||
|
if vod_type == 'hls':
|
||||||
|
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||||
|
elif vod_type == 'raw':
|
||||||
|
filename, protocol = 'source.mp4', 'https'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||||
|
|
||||||
|
format_id = [vod_type]
|
||||||
|
if isinstance(data.get('Height'), compat_str):
|
||||||
|
format_id.append('%sp' % data['Height'])
|
||||||
|
|
||||||
|
return [{
|
||||||
|
'url': urljoin(vod['baseUrl'], filename),
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': protocol,
|
||||||
|
'width': int_or_none(data.get('Width')),
|
||||||
|
'height': int_or_none(data.get('Height')),
|
||||||
|
'fps': int_or_none(data.get('Fps')),
|
||||||
|
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
vod_id = self._match_id(url)
|
||||||
|
|
||||||
|
vod_info = self._download_json(
|
||||||
|
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||||
|
|
||||||
|
state = vod_info.get('state')
|
||||||
|
if state != 'AVAILABLE':
|
||||||
|
raise ExtractorError(
|
||||||
|
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail_url = None
|
||||||
|
|
||||||
|
for vod in vod_info['vods']:
|
||||||
|
vod_type = vod.get('format')
|
||||||
|
if vod_type in ('hls', 'raw'):
|
||||||
|
formats.extend(self._extract_format(vod, vod_type))
|
||||||
|
elif vod_type == 'thumbnail':
|
||||||
|
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': vod_id,
|
||||||
|
'title': vod_info.get('name') or vod_id,
|
||||||
|
'duration': float_or_none(vod_info.get('duration')),
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||||
|
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||||
|
|
||||||
|
return info
|
||||||
|
@@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to China',
|
'skip': 'Geo-restricted to China',
|
||||||
|
}, {
|
||||||
|
# Title with double quotes
|
||||||
|
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8903802',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||||
|
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||||
|
'uploader': '阿滴英文',
|
||||||
|
'uploader_id': '65880958',
|
||||||
|
'timestamp': 1488382620,
|
||||||
|
'upload_date': '20170301',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Test metadata only
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = '84956560bc028eb7'
|
_APP_KEY = '84956560bc028eb7'
|
||||||
@@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||||
|
@@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
entries.append(self.url_result(video['url']))
|
entries.append(self.url_result(video['url']))
|
||||||
|
|
||||||
facebook_url = FacebookIE._extract_url(webpage)
|
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||||
if facebook_url:
|
entries.extend([
|
||||||
entries.append(self.url_result(facebook_url))
|
self.url_result(facebook_url)
|
||||||
|
for facebook_url in facebook_urls])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
@@ -49,13 +49,13 @@ class CBSIE(CBSBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, content_id):
|
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||||
items_data = self._download_xml(
|
items_data = self._download_xml(
|
||||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
content_id, query={'partner': site, 'contentId': content_id})
|
||||||
video_data = xpath_element(items_data, './/item')
|
video_data = xpath_element(items_data, './/item')
|
||||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||||
|
|
||||||
asset_types = []
|
asset_types = []
|
||||||
|
@@ -3,17 +3,18 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .cbs import CBSIE
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CBSInteractiveIE(ThePlatformIE):
|
class CBSInteractiveIE(CBSIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
|
||||||
'ext': 'flv',
|
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||||
@@ -22,13 +23,19 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'timestamp': 1396479627,
|
'timestamp': 1396479627,
|
||||||
'upload_date': '20140402',
|
'upload_date': '20140402',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||||
|
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
|
||||||
'ext': 'flv',
|
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||||
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
|
||||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||||
'uploader': 'Ashley Esqueda',
|
'uploader': 'Ashley Esqueda',
|
||||||
'duration': 1482,
|
'duration': 1482,
|
||||||
@@ -38,23 +45,28 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
|
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
|
||||||
|
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||||
'uploader': 'Adrian Kingsley-Hughes',
|
'uploader': 'Adrian Kingsley-Hughes',
|
||||||
'timestamp': 1448961720,
|
'duration': 731,
|
||||||
'upload_date': '20151201',
|
'timestamp': 1449129925,
|
||||||
|
'upload_date': '20151203',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
|
|
||||||
MPX_ACCOUNTS = {
|
MPX_ACCOUNTS = {
|
||||||
'cnet': 2288573011,
|
'cnet': 2198311517,
|
||||||
'zdnet': 2387448114,
|
'zdnet': 2387448114,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,7 +80,8 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
data = self._parse_json(data_json, display_id)
|
data = self._parse_json(data_json, display_id)
|
||||||
vdata = data.get('video') or data['videos'][0]
|
vdata = data.get('video') or data['videos'][0]
|
||||||
|
|
||||||
video_id = vdata['id']
|
video_id = vdata['mpxRefId']
|
||||||
|
|
||||||
title = vdata['title']
|
title = vdata['title']
|
||||||
author = vdata.get('author')
|
author = vdata.get('author')
|
||||||
if author:
|
if author:
|
||||||
@@ -78,20 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
uploader = None
|
uploader = None
|
||||||
uploader_id = None
|
uploader_id = None
|
||||||
|
|
||||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
|
||||||
formats, subtitles = [], {}
|
|
||||||
for (fkey, vid) in vdata['files'].items():
|
|
||||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
|
||||||
continue
|
|
||||||
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
|
|
||||||
if fkey == 'hds':
|
|
||||||
release_url += '&manifest=f4m'
|
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
|
||||||
formats.extend(tp_formats)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@@ -99,7 +99,5 @@ class CBSInteractiveIE(ThePlatformIE):
|
|||||||
'duration': int_or_none(vdata.get('duration')),
|
'duration': int_or_none(vdata.get('duration')),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
@@ -15,19 +15,23 @@ class CBSNewsIE(CBSIE):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
|
# 60 minutes
|
||||||
|
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
|
'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
|
'title': 'Artificial Intelligence',
|
||||||
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
|
'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
|
||||||
'duration': 791,
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1606,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
|
'timestamp': 1498431900,
|
||||||
|
'upload_date': '20170625',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Subscribers only',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
@@ -52,6 +56,22 @@ class CBSNewsIE(CBSIE):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# 48 hours
|
||||||
|
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cold as Ice',
|
||||||
|
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
|
||||||
|
'upload_date': '20170604',
|
||||||
|
'timestamp': 1496538000,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -60,12 +80,18 @@ class CBSNewsIE(CBSIE):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_info = self._parse_json(self._html_search_regex(
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
webpage, 'video JSON info'), video_id)
|
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
if video_info:
|
||||||
guid = item['mpxRefId']
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
return self._extract_video_info(guid)
|
else:
|
||||||
|
state = self._parse_json(self._search_regex(
|
||||||
|
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||||
|
'playlist JSON info', group='json'), video_id)['state']
|
||||||
|
item = state['playlist'][state['pid']]
|
||||||
|
|
||||||
|
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
72
youtube_dl/extractor/cjsw.py
Normal file
72
youtube_dl/extractor/cjsw.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CJSWIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
|
||||||
|
'md5': 'cee14d40f1e9433632c56e3d14977120',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Freshly Squeezed – Episode June 20, 2017',
|
||||||
|
'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
|
||||||
|
'series': 'Freshly Squeezed',
|
||||||
|
'episode_id': '20170620',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# no description
|
||||||
|
'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
program, episode_id = mobj.group('program', 'id')
|
||||||
|
audio_id = '%s/%s' % (program, episode_id)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
|
||||||
|
title = unescapeHTML(self._search_regex(
|
||||||
|
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
|
||||||
|
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'title', group='title'))
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'audio url', group='url')
|
||||||
|
|
||||||
|
audio_id = self._search_regex(
|
||||||
|
r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
|
||||||
|
audio_url, 'audio id', default=audio_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': audio_url,
|
||||||
|
'ext': determine_ext(audio_url, 'mp3'),
|
||||||
|
'vcodec': 'none',
|
||||||
|
}]
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p>(?P<description>.+?)</p>', webpage, 'description',
|
||||||
|
default=None)
|
||||||
|
series = self._search_regex(
|
||||||
|
r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'series', default=program, group='name')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'series': series,
|
||||||
|
'episode_id': episode_id,
|
||||||
|
}
|
@@ -376,7 +376,7 @@ class InfoExtractor(object):
|
|||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
m = cls._VALID_URL_RE.match(url)
|
m = cls._VALID_URL_RE.match(url)
|
||||||
assert m
|
assert m
|
||||||
return m.group('id')
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
@@ -420,7 +420,7 @@ class InfoExtractor(object):
|
|||||||
if country_code:
|
if country_code:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_stdout(
|
self._downloader.to_screen(
|
||||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||||
|
|
||||||
@@ -730,12 +730,12 @@ class InfoExtractor(object):
|
|||||||
video_info['title'] = video_title
|
video_info['title'] = video_title
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
|
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
|
||||||
urlrs = orderedSet(
|
urls = orderedSet(
|
||||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||||
for m in matches)
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urls, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
||||||
@@ -1002,17 +1002,17 @@ class InfoExtractor(object):
|
|||||||
item_type = e.get('@type')
|
item_type = e.get('@type')
|
||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
return info
|
||||||
if item_type == 'TVEpisode':
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
info.update({
|
info.update({
|
||||||
'episode': unescapeHTML(e.get('name')),
|
'episode': unescapeHTML(e.get('name')),
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
})
|
})
|
||||||
part_of_season = e.get('partOfSeason')
|
part_of_season = e.get('partOfSeason')
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
elif item_type == 'Article':
|
elif item_type == 'Article':
|
||||||
info.update({
|
info.update({
|
||||||
@@ -1022,10 +1022,10 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
elif item_type == 'WebPage':
|
continue
|
||||||
video = e.get('video')
|
video = e.get('video')
|
||||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||||
extract_video_object(video)
|
extract_video_object(video)
|
||||||
break
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@@ -2001,6 +2001,12 @@ class InfoExtractor(object):
|
|||||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||||
|
|
||||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||||
|
"""
|
||||||
|
Parse formats from ISM manifest.
|
||||||
|
References:
|
||||||
|
1. [MS-SSTR]: Smooth Streaming Protocol,
|
||||||
|
https://msdn.microsoft.com/en-us/library/ff469518.aspx
|
||||||
|
"""
|
||||||
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -2022,8 +2028,11 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('%s is not a supported codec' % fourcc)
|
self.report_warning('%s is not a supported codec' % fourcc)
|
||||||
continue
|
continue
|
||||||
tbr = int(track.attrib['Bitrate']) // 1000
|
tbr = int(track.attrib['Bitrate']) // 1000
|
||||||
width = int_or_none(track.get('MaxWidth'))
|
# [1] does not mention Width and Height attributes. However,
|
||||||
height = int_or_none(track.get('MaxHeight'))
|
# they're often present while MaxWidth and MaxHeight are
|
||||||
|
# missing, so should be used as fallbacks
|
||||||
|
width = int_or_none(track.get('MaxWidth') or track.get('Width'))
|
||||||
|
height = int_or_none(track.get('MaxHeight') or track.get('Height'))
|
||||||
sampling_rate = int_or_none(track.get('SamplingRate'))
|
sampling_rate = int_or_none(track.get('SamplingRate'))
|
||||||
|
|
||||||
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
||||||
@@ -2123,15 +2132,18 @@ class InfoExtractor(object):
|
|||||||
return is_plain_url, formats
|
return is_plain_url, formats
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
# amp-video and amp-audio are very similar to their HTML5 counterparts
|
||||||
|
# so we wll include them right here (see
|
||||||
|
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
media_tags = [(media_tag, media_type, '')
|
media_tags = [(media_tag, media_type, '')
|
||||||
for media_tag, media_type
|
for media_tag, media_type
|
||||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
|
||||||
media_tags.extend(re.findall(
|
media_tags.extend(re.findall(
|
||||||
# We only allow video|audio followed by a whitespace or '>'.
|
# We only allow video|audio followed by a whitespace or '>'.
|
||||||
# Allowing more characters may end up in significant slow down (see
|
# Allowing more characters may end up in significant slow down (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||||
# http://www.porntrex.com/maps/videositemap.xml).
|
# http://www.porntrex.com/maps/videositemap.xml).
|
||||||
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||||
for media_tag, media_type, media_content in media_tags:
|
for media_tag, media_type, media_content in media_tags:
|
||||||
media_info = {
|
media_info = {
|
||||||
'formats': [],
|
'formats': [],
|
||||||
@@ -2196,8 +2208,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
url_base = self._search_regex(
|
||||||
http_base_url = 'http' + url_base
|
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
|
||||||
|
http_base_url = '%s:%s' % ('http', url_base)
|
||||||
formats = []
|
formats = []
|
||||||
if 'm3u8' not in skip_protocols:
|
if 'm3u8' not in skip_protocols:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@@ -2231,7 +2244,7 @@ class InfoExtractor(object):
|
|||||||
for protocol in ('rtmp', 'rtsp'):
|
for protocol in ('rtmp', 'rtsp'):
|
||||||
if protocol not in skip_protocols:
|
if protocol not in skip_protocols:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': protocol + url_base,
|
'url': '%s:%s' % (protocol, url_base),
|
||||||
'format_id': protocol,
|
'format_id': protocol,
|
||||||
'protocol': protocol,
|
'protocol': protocol,
|
||||||
})
|
})
|
||||||
@@ -2289,6 +2302,8 @@ class InfoExtractor(object):
|
|||||||
tracks = video_data.get('tracks')
|
tracks = video_data.get('tracks')
|
||||||
if tracks and isinstance(tracks, list):
|
if tracks and isinstance(tracks, list):
|
||||||
for track in tracks:
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
if track.get('kind') != 'captions':
|
if track.get('kind') != 'captions':
|
||||||
continue
|
continue
|
||||||
track_url = urljoin(base_url, track.get('file'))
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
@@ -2318,6 +2333,8 @@ class InfoExtractor(object):
|
|||||||
urls = []
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
for source in jwplayer_sources_data:
|
for source in jwplayer_sources_data:
|
||||||
|
if not isinstance(source, dict):
|
||||||
|
continue
|
||||||
source_url = self._proto_relative_url(source.get('file'))
|
source_url = self._proto_relative_url(source.get('file'))
|
||||||
if not source_url:
|
if not source_url:
|
||||||
continue
|
continue
|
||||||
|
@@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -50,10 +49,17 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'wmagazine': 'W Magazine',
|
'wmagazine': 'W Magazine',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
_VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
embed(?:js)?|
|
||||||
|
(?:script|inline)/video
|
||||||
|
)/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?|
|
||||||
|
(?P<type>watch|series|video)/(?P<display_id>[^/?#]+)
|
||||||
|
)''' % '|'.join(_SITES.keys())
|
||||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
@@ -89,6 +95,12 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434955,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_series(self, url, webpage):
|
def _extract_series(self, url, webpage):
|
||||||
@@ -104,7 +116,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage, url_type):
|
def _extract_video_params(self, webpage):
|
||||||
query = {}
|
query = {}
|
||||||
params = self._search_regex(
|
params = self._search_regex(
|
||||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||||
@@ -123,17 +135,30 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'playerId': params['data-player'],
|
'playerId': params['data-player'],
|
||||||
'target': params['id'],
|
'target': params['id'],
|
||||||
})
|
})
|
||||||
video_id = query['videoId']
|
return query
|
||||||
|
|
||||||
|
def _extract_video(self, params):
|
||||||
|
video_id = params['videoId']
|
||||||
|
|
||||||
video_info = None
|
video_info = None
|
||||||
info_page = self._download_json(
|
if params.get('playerId'):
|
||||||
'http://player.cnevids.com/player/video.js',
|
info_page = self._download_json(
|
||||||
video_id, 'Downloading video info', fatal=False, query=query)
|
'http://player.cnevids.com/player/video.js',
|
||||||
if info_page:
|
video_id, 'Downloading video info', fatal=False, query=params)
|
||||||
video_info = info_page.get('video')
|
if info_page:
|
||||||
if not video_info:
|
video_info = info_page.get('video')
|
||||||
|
if not video_info:
|
||||||
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=params)
|
||||||
|
else:
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://player.cnevids.com/player/loader.js',
|
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||||
video_id, 'Downloading loader info', query=query)
|
video_id, 'Downloading inline info', query={
|
||||||
|
'target': params.get('target', 'embedplayer')
|
||||||
|
})
|
||||||
|
|
||||||
|
if not video_info:
|
||||||
video_info = self._parse_json(
|
video_info = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
||||||
@@ -161,9 +186,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self._search_json_ld(
|
return {
|
||||||
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -174,22 +197,26 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'series': video_info.get('series_title'),
|
'series': video_info.get('series_title'),
|
||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
})
|
'categories': video_info.get('categories'),
|
||||||
return info
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
# Convert JS embed to regular embed
|
if video_id:
|
||||||
if url_type == 'embedjs':
|
return self._extract_video({
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
'videoId': video_id,
|
||||||
url = compat_urlparse.urlunparse(parsed_url._replace(
|
'playerId': player_id,
|
||||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
'target': target,
|
||||||
url_type = 'embed'
|
})
|
||||||
|
|
||||||
webpage = self._download_webpage(url, item_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
return self._extract_video(webpage, url_type)
|
params = self._extract_video_params(webpage)
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage, display_id, fatal=False)
|
||||||
|
info.update(self._extract_video(params))
|
||||||
|
return info
|
||||||
|
@@ -8,7 +8,16 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class CorusIE(ThePlatformFeedIE):
|
class CorusIE(ThePlatformFeedIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:globaltv|etcanada)\.com|
|
||||||
|
(?:hgtv|foodnetwork|slice|history|showcase)\.ca
|
||||||
|
)
|
||||||
|
/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||||
@@ -27,6 +36,12 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TP_FEEDS = {
|
_TP_FEEDS = {
|
||||||
@@ -50,6 +65,14 @@ class CorusIE(ThePlatformFeedIE):
|
|||||||
'feed_id': '5tUJLgV2YNJ5',
|
'feed_id': '5tUJLgV2YNJ5',
|
||||||
'account_id': 2414427935,
|
'account_id': 2414427935,
|
||||||
},
|
},
|
||||||
|
'history': {
|
||||||
|
'feed_id': 'tQFx_TyyEq4J',
|
||||||
|
'account_id': 2369613659,
|
||||||
|
},
|
||||||
|
'showcase': {
|
||||||
|
'feed_id': '9H6qyshBZU3E',
|
||||||
|
'account_id': 2414426607,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -510,7 +510,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
# webpage provide more accurate data than series_title from XML
|
# webpage provide more accurate data than series_title from XML
|
||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
|
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||||
webpage, 'series', fatal=False)
|
webpage, 'series', fatal=False)
|
||||||
season = xpath_text(metadata, 'series_title')
|
season = xpath_text(metadata, 'series_title')
|
||||||
|
|
||||||
@@ -518,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||||
|
|
||||||
season_number = int_or_none(self._search_regex(
|
season_number = int_or_none(self._search_regex(
|
||||||
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
webpage, 'season number', default=None))
|
webpage, 'season number', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -1,17 +1,21 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DailyMailIE(InfoExtractor):
|
class DailyMailIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||||
'md5': 'f6129624562251f628296c3a9ffde124',
|
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -20,7 +24,16 @@ class DailyMailIE(InfoExtractor):
|
|||||||
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||||
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -28,8 +41,14 @@ class DailyMailIE(InfoExtractor):
|
|||||||
video_data = self._parse_json(self._search_regex(
|
video_data = self._parse_json(self._search_regex(
|
||||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||||
title = unescapeHTML(video_data['title'])
|
title = unescapeHTML(video_data['title'])
|
||||||
video_sources = self._download_json(video_data.get(
|
|
||||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
sources_url = (try_get(
|
||||||
|
video_data,
|
||||||
|
(lambda x: x['plugins']['sources']['url'],
|
||||||
|
lambda x: x['sources']['url']), compat_str) or
|
||||||
|
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||||
|
|
||||||
|
video_sources = self._download_json(sources_url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in video_sources['renditions']:
|
for rendition in video_sources['renditions']:
|
||||||
|
@@ -38,7 +38,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
|
|
||||||
_FORMATS = [
|
_FORMATS = [
|
||||||
@@ -49,87 +49,82 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
('stream_h264_hd1080_url', 'hd180'),
|
('stream_h264_hd1080_url', 'hd180'),
|
||||||
]
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'x5kesuj',
|
||||||
'id': 'x5kesuj',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
'duration': 187,
|
||||||
'duration': 187,
|
'timestamp': 1493651285,
|
||||||
'timestamp': 1493651285,
|
'upload_date': '20170501',
|
||||||
'upload_date': '20170501',
|
'uploader': 'Deadline',
|
||||||
'uploader': 'Deadline',
|
'uploader_id': 'x1xm8ri',
|
||||||
'uploader_id': 'x1xm8ri',
|
'age_limit': 0,
|
||||||
'age_limit': 0,
|
'view_count': int,
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x2iuewm',
|
'id': 'x2iuewm',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||||
'description': 'Several come bundled with the Steam Controller.',
|
'description': 'Several come bundled with the Steam Controller.',
|
||||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||||
'duration': 74,
|
'duration': 74,
|
||||||
'timestamp': 1425657362,
|
'timestamp': 1425657362,
|
||||||
'upload_date': '20150306',
|
'upload_date': '20150306',
|
||||||
'uploader': 'IGN',
|
'uploader': 'IGN',
|
||||||
'uploader_id': 'xijv66',
|
'uploader_id': 'xijv66',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
|
||||||
'skip': 'video gone',
|
|
||||||
},
|
},
|
||||||
|
'skip': 'video gone',
|
||||||
|
}, {
|
||||||
# Vevo video
|
# Vevo video
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
|
||||||
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
|
'info_dict': {
|
||||||
'info_dict': {
|
'title': 'Roar (Official)',
|
||||||
'title': 'Roar (Official)',
|
'id': 'USUV71301934',
|
||||||
'id': 'USUV71301934',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'uploader': 'Katy Perry',
|
||||||
'uploader': 'Katy Perry',
|
'upload_date': '20130905',
|
||||||
'upload_date': '20130905',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'VEVO is only available in some countries',
|
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'VEVO is only available in some countries',
|
||||||
|
}, {
|
||||||
# age-restricted video
|
# age-restricted video
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||||
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
|
||||||
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'xyh2zz',
|
||||||
'id': 'xyh2zz',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||||
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
'uploader': 'HotWaves1012',
|
||||||
'uploader': 'HotWaves1012',
|
'age_limit': 18,
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'skip': 'video gone',
|
|
||||||
},
|
},
|
||||||
|
'skip': 'video gone',
|
||||||
|
}, {
|
||||||
# geo-restricted, player v5
|
# geo-restricted, player v5
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/xhza0o',
|
||||||
'url': 'http://www.dailymotion.com/video/xhza0o',
|
'only_matching': True,
|
||||||
'only_matching': True,
|
}, {
|
||||||
},
|
|
||||||
# with subtitles
|
# with subtitles
|
||||||
{
|
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
||||||
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
'only_matching': True,
|
||||||
'only_matching': True,
|
}, {
|
||||||
},
|
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
||||||
{
|
'only_matching': True,
|
||||||
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
}, {
|
||||||
'only_matching': True,
|
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||||
}
|
'only_matching': True,
|
||||||
]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
@@ -152,7 +147,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
view_count_str = self._search_regex(
|
view_count_str = self._search_regex(
|
||||||
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
|
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
|
||||||
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
|
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
|
||||||
webpage, 'view count', fatal=False)
|
webpage, 'view count', default=None)
|
||||||
if view_count_str:
|
if view_count_str:
|
||||||
view_count_str = re.sub(r'\s', '', view_count_str)
|
view_count_str = re.sub(r'\s', '', view_count_str)
|
||||||
view_count = str_to_int(view_count_str)
|
view_count = str_to_int(view_count_str)
|
||||||
@@ -164,7 +159,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||||
r'buildPlayer\(({.+?})\);',
|
r'buildPlayer\(({.+?})\);',
|
||||||
r'var\s+config\s*=\s*({.+?});'],
|
r'var\s+config\s*=\s*({.+?});',
|
||||||
|
# New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
|
||||||
|
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
||||||
webpage, 'player v5', default=None)
|
webpage, 'player v5', default=None)
|
||||||
if player_v5:
|
if player_v5:
|
||||||
player = self._parse_json(player_v5, video_id)
|
player = self._parse_json(player_v5, video_id)
|
||||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class DisneyIE(InfoExtractor):
|
class DisneyIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Disney.EmbedVideo
|
# Disney.EmbedVideo
|
||||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||||
@@ -68,6 +68,9 @@ class DisneyIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DigitallySpeakingIE(InfoExtractor):
|
class DigitallySpeakingIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
_VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
||||||
@@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor):
|
|||||||
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
||||||
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||||
|
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_mp4(self, metadata):
|
def _parse_mp4(self, metadata):
|
||||||
|
@@ -3,11 +3,14 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -117,3 +120,82 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
|
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rjNBdvnVXNzvE2yw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||||
|
'duration': 7150.08,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '陈一发儿',
|
||||||
|
'uploader_id': 'XrZwYelr5wbK',
|
||||||
|
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||||
|
'upload_date': '20170402',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url = url.replace('vmobile.', 'v.')
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
room_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||||
|
|
||||||
|
video_info = None
|
||||||
|
|
||||||
|
for trial in range(5):
|
||||||
|
# Sometimes Douyu rejects our request. Let's try it more times
|
||||||
|
try:
|
||||||
|
video_info = self._download_json(
|
||||||
|
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||||
|
query={'vid': video_id},
|
||||||
|
headers={
|
||||||
|
'Referer': url,
|
||||||
|
'x-requested-with': 'XMLHttpRequest',
|
||||||
|
})
|
||||||
|
break
|
||||||
|
except ExtractorError:
|
||||||
|
self._sleep(1, video_id)
|
||||||
|
|
||||||
|
if not video_info:
|
||||||
|
raise ExtractorError('Can\'t fetch video info')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_info['data']['video_url'], video_id,
|
||||||
|
entry_protocol='m3u8_native', ext='mp4')
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||||
|
'upload date', fatal=False))
|
||||||
|
|
||||||
|
uploader = uploader_id = uploader_url = None
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
uploader_id, uploader = mobj.groups()
|
||||||
|
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': room_info['name'],
|
||||||
|
'formats': formats,
|
||||||
|
'duration': room_info.get('duration'),
|
||||||
|
'thumbnail': room_info.get('pic'),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
}
|
||||||
|
@@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
info_url = self._search_regex(
|
info_url = self._search_regex(
|
||||||
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
urlencode_postdata
|
urlencode_postdata
|
||||||
)
|
)
|
||||||
@@ -72,15 +73,15 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4512.1',
|
'id': '4512.1',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Cooking with Shin 4512.1',
|
'title': 'Cooking with Shin',
|
||||||
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
|
||||||
'episode': 'Episode 1',
|
'episode': 'Episode 1',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1404336058,
|
'timestamp': 1404336058,
|
||||||
'upload_date': '20140702',
|
'upload_date': '20140702',
|
||||||
'duration': 343,
|
'duration': 344,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -90,15 +91,15 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
|
'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4826.4',
|
'id': '4826.4',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Mnet Asian Music Awards 2015 4826.4',
|
'title': 'Mnet Asian Music Awards 2015',
|
||||||
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
|
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
|
||||||
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
|
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
|
||||||
'episode_number': 4,
|
'episode_number': 4,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1450213200,
|
'timestamp': 1450213200,
|
||||||
'upload_date': '20151215',
|
'upload_date': '20151215',
|
||||||
'duration': 5602,
|
'duration': 5359,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -122,6 +123,10 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
countries=self._GEO_COUNTRIES)
|
countries=self._GEO_COUNTRIES)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# title is postfixed with video id for some reason, removing
|
||||||
|
if info.get('title'):
|
||||||
|
info['title'] = remove_end(info['title'], video_id).strip()
|
||||||
|
|
||||||
series_id, episode_number = video_id.split('.')
|
series_id, episode_number = video_id.split('.')
|
||||||
episode_info = self._download_json(
|
episode_info = self._download_json(
|
||||||
# We only need a single episode info, so restricting page size to one episode
|
# We only need a single episode info, so restricting page size to one episode
|
||||||
|
@@ -1,135 +1,59 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
js_to_json,
|
||||||
parse_iso8601,
|
parse_duration,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DRBonanzaIE(InfoExtractor):
|
class DRBonanzaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
_TESTS = [{
|
'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
|
||||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '65517',
|
'id': '40312',
|
||||||
|
'display_id': 'matador---0824-komme-fremmede-',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Talkshowet - Leonard Cohen',
|
'title': 'MATADOR - 08:24. "Komme fremmede".',
|
||||||
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
|
'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||||
'timestamp': 1295537932,
|
'duration': 4613,
|
||||||
'upload_date': '20110120',
|
|
||||||
'duration': 3664,
|
|
||||||
},
|
},
|
||||||
'params': {
|
}
|
||||||
'skip_download': True, # requires rtmp
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
|
||||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '59410',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
|
|
||||||
'description': 'md5:501e5a195749480552e214fbbed16c4e',
|
|
||||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
|
||||||
'timestamp': 1223274900,
|
|
||||||
'upload_date': '20081006',
|
|
||||||
'duration': 7369,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, url_id)
|
video_id, display_id = mobj.group('id', 'display_id')
|
||||||
|
|
||||||
if url_id:
|
webpage = self._download_webpage(url, display_id)
|
||||||
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
|
|
||||||
else:
|
|
||||||
# Just fetch the first video on that page
|
|
||||||
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
|
|
||||||
|
|
||||||
asset_id = str(info['AssetId'])
|
info = self._parse_html5_media_entries(
|
||||||
title = info['Title'].rstrip(' \'\"-,.:;!?')
|
url, webpage, display_id, m3u8_id='hls',
|
||||||
duration = int_or_none(info.get('Duration'), scale=1000)
|
m3u8_entry_protocol='m3u8_native')[0]
|
||||||
# First published online. "FirstPublished" contains the date for original airing.
|
self._sort_formats(info['formats'])
|
||||||
timestamp = parse_iso8601(
|
|
||||||
re.sub(r'\.\d+$', '', info['Created']))
|
|
||||||
|
|
||||||
def parse_filename_info(url):
|
asset = self._parse_json(
|
||||||
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
self._search_regex(
|
||||||
if match:
|
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||||
return {
|
display_id, transform_source=js_to_json)
|
||||||
'width': int(match.group('width')),
|
|
||||||
'height': int(match.group('height')),
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group('ext')
|
|
||||||
}
|
|
||||||
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
|
||||||
if match:
|
|
||||||
return {
|
|
||||||
'vbr': int(match.group('bitrate')),
|
|
||||||
'ext': match.group(2)
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
|
title = unescapeHTML(asset['AssetTitle']).strip()
|
||||||
preferencemap = {
|
|
||||||
'VideoHigh': -1,
|
|
||||||
'VideoMid': -2,
|
|
||||||
'VideoLow': -3,
|
|
||||||
'Audio': -4,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
def extract(field):
|
||||||
for file in info['Files']:
|
return self._search_regex(
|
||||||
if info['Type'] == 'Video':
|
r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
|
||||||
if file['Type'] in video_types:
|
webpage, field, default=None)
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'].replace('Video', ''),
|
|
||||||
'preference': preferencemap.get(file['Type'], -10),
|
|
||||||
})
|
|
||||||
if format['url'].startswith('rtmp'):
|
|
||||||
rtmp_url = format['url']
|
|
||||||
format['rtmp_live'] = True # --resume does not work
|
|
||||||
if '/bonanza/' in rtmp_url:
|
|
||||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
elif info['Type'] == 'Audio':
|
|
||||||
if file['Type'] == 'Audio':
|
|
||||||
format = parse_filename_info(file['Location'])
|
|
||||||
format.update({
|
|
||||||
'url': file['Location'],
|
|
||||||
'format_id': file['Type'],
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.append(format)
|
|
||||||
elif file['Type'] == 'Thumb':
|
|
||||||
thumbnail = file['Location']
|
|
||||||
|
|
||||||
description = '%s\n%s\n%s\n' % (
|
info.update({
|
||||||
info['Description'], info['Actors'], info['Colophon'])
|
'id': asset.get('AssetId') or video_id,
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
|
||||||
display_id = re.sub(r'-+', '-', display_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': asset_id,
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'description': extract('Programinfo'),
|
||||||
'description': description,
|
'duration': parse_duration(extract('Tid')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': asset.get('AssetImageUrl'),
|
||||||
'timestamp': timestamp,
|
})
|
||||||
'duration': duration,
|
return info
|
||||||
}
|
|
||||||
|
@@ -44,8 +44,23 @@ class DrTuberIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_data = self._download_json(
|
||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
'http://www.drtuber.com/player_config_json/', video_id, query={
|
||||||
|
'vid': video_id,
|
||||||
|
'embed': 0,
|
||||||
|
'aid': 0,
|
||||||
|
'domain_id': 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, video_url in video_data['files'].items():
|
||||||
|
if video_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': 2 if format_id == 'hq' else 1,
|
||||||
|
'url': video_url
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||||
@@ -75,7 +90,7 @@ class DrTuberIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
|
@@ -118,7 +118,7 @@ class DRTVIE(InfoExtractor):
|
|||||||
if target == 'HDS':
|
if target == 'HDS':
|
||||||
f4m_formats = self._extract_f4m_formats(
|
f4m_formats = self._extract_f4m_formats(
|
||||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||||
video_id, preference, f4m_id=format_id)
|
video_id, preference, f4m_id=format_id, fatal=False)
|
||||||
if kind == 'AudioResource':
|
if kind == 'AudioResource':
|
||||||
for f in f4m_formats:
|
for f in f4m_formats:
|
||||||
f['vcodec'] = 'none'
|
f['vcodec'] = 'none'
|
||||||
@@ -126,7 +126,8 @@ class DRTVIE(InfoExtractor):
|
|||||||
elif target == 'HLS':
|
elif target == 'HLS':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
preference=preference, m3u8_id=format_id))
|
preference=preference, m3u8_id=format_id,
|
||||||
|
fatal=False))
|
||||||
else:
|
else:
|
||||||
bitrate = link.get('Bitrate')
|
bitrate = link.get('Bitrate')
|
||||||
if bitrate:
|
if bitrate:
|
||||||
|
@@ -5,9 +5,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
determine_ext,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -24,14 +27,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
'id': 'dc0768de855511e49e4b0025900fea04',
|
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||||
}
|
'duration': 1484,
|
||||||
}, {
|
|
||||||
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
|
||||||
'md5': '6388f1941b48537dbd28791f712af8bf',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '72c02230849211e49f60002590604f2e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||||
@@ -44,55 +40,100 @@ class DVTVIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b0b40906854d11e4bdad0025900fea04',
|
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
|
||||||
|
'description': 'md5:0916925dea8e30fe84222582280b47a0',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '5f7652a08b05009c1292317b449ffea2',
|
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '420ad9ec854a11e4bdad0025900fea04',
|
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
|
||||||
|
'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95d35580846a11e4b6d20025900fea04',
|
'id': '95d35580846a11e4b6d20025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
|
||||||
|
'description': 'md5:889fe610a70fee5511dc3326a089188e',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6fe14d66853511e4833a0025900fea04',
|
'id': '6fe14d66853511e4833a0025900fea04',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
|
||||||
|
'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
|
||||||
|
'timestamp': 1418760010,
|
||||||
|
'upload_date': '20141216',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
|
||||||
|
'md5': 'f8efe9656017da948369aa099788c8ea',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3c496fec365911e7a6500025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
|
||||||
|
'duration': 1103,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_video_metadata(self, js, video_id):
|
def _parse_video_metadata(self, js, video_id):
|
||||||
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
title = unescapeHTML(data['title'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video in metadata['sources']:
|
for video in data['sources']:
|
||||||
ext = video['type'][6:]
|
video_url = video.get('file')
|
||||||
formats.append({
|
if not video_url:
|
||||||
'url': video['file'],
|
continue
|
||||||
'ext': ext,
|
video_type = video.get('type')
|
||||||
'format_id': '%s-%s' % (ext, video['label']),
|
ext = determine_ext(video_url, mimetype2ext(video_type))
|
||||||
'height': int(video['label'].rstrip('p')),
|
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
|
||||||
'fps': 25,
|
formats.extend(self._extract_m3u8_formats(
|
||||||
})
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif video_type == 'application/dash+xml' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
label = video.get('label')
|
||||||
|
height = self._search_regex(
|
||||||
|
r'^(\d+)[pP]', label or '', 'height', default=None)
|
||||||
|
format_id = ['http']
|
||||||
|
for f in (ext, label):
|
||||||
|
if f:
|
||||||
|
format_id.append(f)
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': metadata['mediaid'],
|
'id': data.get('mediaid') or video_id,
|
||||||
'title': unescapeHTML(metadata['title']),
|
'title': title,
|
||||||
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
'description': data.get('description'),
|
||||||
|
'thumbnail': data.get('image'),
|
||||||
|
'duration': int_or_none(data.get('duration')),
|
||||||
|
'timestamp': int_or_none(data.get('pubtime')),
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +144,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
|
|
||||||
# single video
|
# single video
|
||||||
item = self._search_regex(
|
item = self._search_regex(
|
||||||
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||||
webpage, 'video', default=None, fatal=False)
|
webpage, 'video', default=None, fatal=False)
|
||||||
|
|
||||||
if item:
|
if item:
|
||||||
@@ -113,6 +154,8 @@ class DVTVIE(InfoExtractor):
|
|||||||
items = re.findall(
|
items = re.findall(
|
||||||
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||||
webpage)
|
webpage)
|
||||||
|
if not items:
|
||||||
|
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
|
||||||
|
|
||||||
if items:
|
if items:
|
||||||
return {
|
return {
|
||||||
|
@@ -11,6 +11,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -50,6 +51,10 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Georestricted',
|
'skip': 'Georestricted',
|
||||||
|
}, {
|
||||||
|
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
|
||||||
|
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -60,16 +65,40 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
# Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
|
PLAYER_JS_RE = r'''
|
||||||
|
<script[^>]+
|
||||||
|
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
|
||||||
|
.+?
|
||||||
|
'''
|
||||||
|
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
<script[^>]+
|
%s
|
||||||
src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
|
|
||||||
.+?
|
|
||||||
<div[^>]+
|
<div[^>]+
|
||||||
class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
|
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
|
||||||
data-id=["\'](?P<id>\d+)
|
data-id=["\'](?P<id>\d+)
|
||||||
''', webpage)
|
''' % PLAYER_JS_RE, webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
|
||||||
|
# Generalization of "Javascript code usage", "Combined usage" and
|
||||||
|
# "Usage without attaching to DOM" embeddings (see
|
||||||
|
# http://dultonmedia.github.io/eplayer/)
|
||||||
|
mobj = re.search(
|
||||||
|
r'''(?xs)
|
||||||
|
%s
|
||||||
|
<script>
|
||||||
|
.+?
|
||||||
|
new\s+EaglePlayer\(
|
||||||
|
(?:[^,]+\s*,\s*)?
|
||||||
|
{
|
||||||
|
.+?
|
||||||
|
\bid\s*:\s*["\']?(?P<id>\d+)
|
||||||
|
.+?
|
||||||
|
}
|
||||||
|
\s*\)
|
||||||
|
.+?
|
||||||
|
</script>
|
||||||
|
''' % PLAYER_JS_RE, webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
|
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
|
||||||
|
|
||||||
@@ -79,9 +108,10 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
if status != 200:
|
if status != 200:
|
||||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
|
def _download_json(self, url_or_request, video_id, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
response = super(EaglePlatformIE, self)._download_json(
|
||||||
|
url_or_request, video_id, *args, **kwargs)
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if isinstance(ee.cause, compat_HTTPError):
|
if isinstance(ee.cause, compat_HTTPError):
|
||||||
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||||
@@ -93,11 +123,24 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
return self._download_json(url_or_request, video_id, note)['data'][0]
|
return self._download_json(url_or_request, video_id, note)['data'][0]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
query = {
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
referrer = smuggled_data.get('referrer')
|
||||||
|
if referrer:
|
||||||
|
headers['Referer'] = referrer
|
||||||
|
query['referrer'] = referrer
|
||||||
|
|
||||||
player_data = self._download_json(
|
player_data = self._download_json(
|
||||||
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
|
'http://%s/api/player_data' % host, video_id,
|
||||||
|
headers=headers, query=query)
|
||||||
|
|
||||||
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
||||||
|
|
||||||
|
@@ -1,15 +1,18 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggheadCourseIE(InfoExtractor):
|
class EggheadCourseIE(InfoExtractor):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
|
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||||
'playlist_count': 29,
|
'playlist_count': 29,
|
||||||
@@ -22,18 +25,60 @@ class EggheadCourseIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
|
course = self._download_json(
|
||||||
ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
|
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
|
||||||
|
|
||||||
found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
|
entries = [
|
||||||
entries = [self.url_result(m) for m in found]
|
self.url_result(
|
||||||
|
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
|
||||||
|
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
|
||||||
|
for lesson in course['lessons'] if lesson.get('wistia_id')]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, course.get('title'),
|
||||||
|
course.get('description'))
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadLessonIE(InfoExtractor):
|
||||||
|
IE_DESC = 'egghead.io lesson'
|
||||||
|
IE_NAME = 'egghead:lesson'
|
||||||
|
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fv5yotjxcg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Create linear data flow with container style types (Box)',
|
||||||
|
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
|
||||||
|
'thumbnail': r're:^https?:.*\.jpg$',
|
||||||
|
'timestamp': 1481296768,
|
||||||
|
'upload_date': '20161209',
|
||||||
|
'duration': 304,
|
||||||
|
'view_count': 0,
|
||||||
|
'tags': ['javascript', 'free'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lesson_id = self._match_id(url)
|
||||||
|
|
||||||
|
lesson = self._download_json(
|
||||||
|
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'url_transparent',
|
||||||
'id': playlist_id,
|
'ie_key': 'Wistia',
|
||||||
'title': title,
|
'url': 'wistia:%s' % lesson['wistia_id'],
|
||||||
'description': self._og_search_description(webpage),
|
'id': lesson['wistia_id'],
|
||||||
'entries': entries,
|
'title': lesson.get('title'),
|
||||||
|
'description': lesson.get('summary'),
|
||||||
|
'thumbnail': lesson.get('thumb_nail'),
|
||||||
|
'timestamp': unified_timestamp(lesson.get('published_at')),
|
||||||
|
'duration': int_or_none(lesson.get('duration')),
|
||||||
|
'view_count': int_or_none(lesson.get('plays_count')),
|
||||||
|
'tags': try_get(lesson, lambda x: x['tag_list'], list),
|
||||||
}
|
}
|
||||||
|
@@ -10,7 +10,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ESPNIE(InfoExtractor):
|
class ESPNIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:(?:\w+\.)+)?espn\.go|
|
||||||
|
(?:www\.)?espn
|
||||||
|
)\.com/
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
video/clip|
|
||||||
|
watch/player
|
||||||
|
)
|
||||||
|
(?:
|
||||||
|
\?.*?\bid=|
|
||||||
|
/_/id/
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -25,20 +43,34 @@ class ESPNIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
|
||||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2743663',
|
'id': '18910086',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Must-See Moments: Best of the MLS season',
|
'title': 'Kyrie spins around defender for two',
|
||||||
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
|
'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
|
||||||
'timestamp': 1449446454,
|
'timestamp': 1489539155,
|
||||||
'upload_date': '20151207',
|
'upload_date': '20170315',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.espn.com/watch/player?id=19141491',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.espn.com/watch/player/_/id/19141491',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -71,6 +71,10 @@ from .arte import (
|
|||||||
TheOperaPlatformIE,
|
TheOperaPlatformIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .asiancrush import (
|
||||||
|
AsianCrushIE,
|
||||||
|
AsianCrushPlaylistIE,
|
||||||
|
)
|
||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
from .atvat import ATVAtIE
|
from .atvat import ATVAtIE
|
||||||
@@ -90,7 +94,7 @@ from .azmedien import (
|
|||||||
)
|
)
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
BBCCoUkArticleIE,
|
BBCCoUkArticleIE,
|
||||||
@@ -98,7 +102,10 @@ from .bbc import (
|
|||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
from .beampro import BeamProLiveIE
|
from .beampro import (
|
||||||
|
BeamProLiveIE,
|
||||||
|
BeamProVodIE,
|
||||||
|
)
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
@@ -178,6 +185,7 @@ from .chirbit import (
|
|||||||
ChirbitProfileIE,
|
ChirbitProfileIE,
|
||||||
)
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
|
from .cjsw import CJSWIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
@@ -251,7 +259,10 @@ from .democracynow import DemocracynowIE
|
|||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .douyutv import DouyuTVIE
|
from .douyutv import (
|
||||||
|
DouyuShowIE,
|
||||||
|
DouyuTVIE,
|
||||||
|
)
|
||||||
from .dplay import (
|
from .dplay import (
|
||||||
DPlayIE,
|
DPlayIE,
|
||||||
DPlayItIE,
|
DPlayItIE,
|
||||||
@@ -287,7 +298,10 @@ from .dw import (
|
|||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .echomsk import EchoMskIE
|
from .echomsk import EchoMskIE
|
||||||
from .egghead import EggheadCourseIE
|
from .egghead import (
|
||||||
|
EggheadCourseIE,
|
||||||
|
EggheadLessonIE,
|
||||||
|
)
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
@@ -386,7 +400,6 @@ from .globo import (
|
|||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .go90 import Go90IE
|
from .go90 import Go90IE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .godtv import GodTVIE
|
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
@@ -460,6 +473,7 @@ from .jamendo import (
|
|||||||
)
|
)
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
|
from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
@@ -542,6 +556,7 @@ from .mangomolo import (
|
|||||||
)
|
)
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .mediaset import MediasetIE
|
||||||
from .medici import MediciIE
|
from .medici import MediciIE
|
||||||
from .meipai import MeipaiIE
|
from .meipai import MeipaiIE
|
||||||
from .melonvod import MelonVODIE
|
from .melonvod import MelonVODIE
|
||||||
@@ -630,7 +645,10 @@ from .neteasemusic import (
|
|||||||
NetEaseMusicProgramIE,
|
NetEaseMusicProgramIE,
|
||||||
NetEaseMusicDjRadioIE,
|
NetEaseMusicDjRadioIE,
|
||||||
)
|
)
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import (
|
||||||
|
NewgroundsIE,
|
||||||
|
NewgroundsPlaylistIE,
|
||||||
|
)
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
NextMediaIE,
|
NextMediaIE,
|
||||||
@@ -638,6 +656,10 @@ from .nextmedia import (
|
|||||||
AppleDailyIE,
|
AppleDailyIE,
|
||||||
NextTVIE,
|
NextTVIE,
|
||||||
)
|
)
|
||||||
|
from .nexx import (
|
||||||
|
NexxIE,
|
||||||
|
NexxEmbedIE,
|
||||||
|
)
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhk import NhkVodIE
|
from .nhk import NhkVodIE
|
||||||
@@ -733,8 +755,8 @@ from .openload import OpenloadIE
|
|||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFOE1IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
@@ -746,6 +768,7 @@ from .pandoratv import PandoraTVIE
|
|||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
|
from .pearvideo import PearVideoIE
|
||||||
from .people import PeopleIE
|
from .people import PeopleIE
|
||||||
from .periscope import (
|
from .periscope import (
|
||||||
PeriscopeIE,
|
PeriscopeIE,
|
||||||
@@ -811,6 +834,7 @@ from .radiobremen import RadioBremenIE
|
|||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
|
RaiPlayLiveIE,
|
||||||
RaiIE,
|
RaiIE,
|
||||||
)
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@@ -862,6 +886,7 @@ from .rutube import (
|
|||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
|
from .ruv import RuvIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
@@ -958,6 +983,7 @@ from .tagesschau import (
|
|||||||
TagesschauIE,
|
TagesschauIE,
|
||||||
)
|
)
|
||||||
from .tass import TassIE
|
from .tass import TassIE
|
||||||
|
from .tastytrade import TastyTradeIE
|
||||||
from .tbs import TBSIE
|
from .tbs import TBSIE
|
||||||
from .tdslifeway import TDSLifewayIE
|
from .tdslifeway import TDSLifewayIE
|
||||||
from .teachertube import (
|
from .teachertube import (
|
||||||
@@ -1015,11 +1041,6 @@ from .trilulilu import TriluliluIE
|
|||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tubitv import TubiTvIE
|
from .tubitv import TubiTvIE
|
||||||
from .tudou import (
|
|
||||||
TudouIE,
|
|
||||||
TudouPlaylistIE,
|
|
||||||
TudouAlbumIE,
|
|
||||||
)
|
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tunein import (
|
from .tunein import (
|
||||||
TuneInClipIE,
|
TuneInClipIE,
|
||||||
@@ -1099,6 +1120,10 @@ from .uplynk import (
|
|||||||
UplynkIE,
|
UplynkIE,
|
||||||
UplynkPreplayIE,
|
UplynkPreplayIE,
|
||||||
)
|
)
|
||||||
|
from .upskill import (
|
||||||
|
UpskillIE,
|
||||||
|
UpskillCourseIE,
|
||||||
|
)
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .urplay import URPlayIE
|
from .urplay import URPlayIE
|
||||||
from .usanetwork import USANetworkIE
|
from .usanetwork import USANetworkIE
|
||||||
@@ -1189,7 +1214,8 @@ from .vk import (
|
|||||||
)
|
)
|
||||||
from .vlive import (
|
from .vlive import (
|
||||||
VLiveIE,
|
VLiveIE,
|
||||||
VLiveChannelIE
|
VLiveChannelIE,
|
||||||
|
VLivePlaylistIE
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
@@ -1265,7 +1291,6 @@ from .yahoo import (
|
|||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
from .yam import YamIE
|
|
||||||
from .yandexmusic import (
|
from .yandexmusic import (
|
||||||
YandexMusicTrackIE,
|
YandexMusicTrackIE,
|
||||||
YandexMusicAlbumIE,
|
YandexMusicAlbumIE,
|
||||||
|
@@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_urls(webpage):
|
||||||
mobj = re.search(
|
urls = []
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
for mobj in re.finditer(
|
||||||
if mobj is not None:
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||||
return mobj.group('url')
|
webpage):
|
||||||
|
urls.append(mobj.group('url'))
|
||||||
# Facebook API embed
|
# Facebook API embed
|
||||||
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
||||||
mobj = re.search(r'''(?x)<div[^>]+
|
for mobj in re.finditer(r'''(?x)<div[^>]+
|
||||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
|
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
|
||||||
if mobj is not None:
|
urls.append(mobj.group('url'))
|
||||||
return mobj.group('url')
|
return urls
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(useremail, password) = self._get_login_info()
|
(useremail, password) = self._get_login_info()
|
||||||
|
@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
|
|||||||
'format_id': f.get('name'),
|
'format_id': f.get('name'),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'source_preference': quality(f.get('name')),
|
'source_preference': quality(f.get('name')),
|
||||||
|
# quality metadata of http formats may be incorrect
|
||||||
|
'preference': -1,
|
||||||
})
|
})
|
||||||
# m3u8 URL format is reverse engineered from [1] (search for
|
# m3u8 URL format is reverse engineered from [1] (search for
|
||||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||||
|
@@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'glavnoe',
|
'id': 'glavnoe',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Итоги недели с 8 по 14 июня 2015 года',
|
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
|
[r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
|
||||||
|
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||||
webpage, 'video url')
|
webpage, 'video url')
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_urlencode,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for stream in streams['stream']:
|
for stream in streams['stream']:
|
||||||
stream_type = str(stream.get('type'))
|
stream_type = compat_str(stream.get('type'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': stream_type,
|
'format_id': stream_type,
|
||||||
'url': stream['_content'],
|
'url': stream['_content'],
|
||||||
|
@@ -85,11 +85,11 @@ class FourTubeIE(InfoExtractor):
|
|||||||
media_id = params[0]
|
media_id = params[0]
|
||||||
sources = ['%s' % p for p in params[2]]
|
sources = ['%s' % p for p in params[2]]
|
||||||
|
|
||||||
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
|
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||||
media_id, '+'.join(sources))
|
media_id, '+'.join(sources))
|
||||||
headers = {
|
headers = {
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
b'Origin': b'http://www.4tube.com',
|
b'Origin': b'https://www.4tube.com',
|
||||||
}
|
}
|
||||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||||
tokens = self._download_json(token_req, video_id)
|
tokens = self._download_json(token_req, video_id)
|
||||||
|
@@ -5,6 +5,7 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
int_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': source,
|
'url': source,
|
||||||
'height': resolution,
|
'height': int_or_none(resolution),
|
||||||
} for source, resolution in zip(
|
} for source, resolution in zip(
|
||||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||||
|
|
||||||
|
@@ -112,7 +112,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?france\.tv/(?:[^/]+/)+(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||||
@@ -154,6 +154,12 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
|
'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -1,10 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FunnyOrDieIE(InfoExtractor):
|
class FunnyOrDieIE(InfoExtractor):
|
||||||
@@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||||
'thumbnail': r're:^http:.*\.jpg$',
|
'thumbnail': r're:^http:.*\.jpg$',
|
||||||
|
'uploader': 'DASjr',
|
||||||
|
'timestamp': 1317904928,
|
||||||
|
'upload_date': '20111006',
|
||||||
|
'duration': 318.3,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||||
@@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'title': 'Please Use This Song (Jon Lajoie)',
|
'title': 'Please Use This Song (Jon Lajoie)',
|
||||||
'description': 'Please use this to sell something. www.jonlajoie.com',
|
'description': 'Please use this to sell something. www.jonlajoie.com',
|
||||||
'thumbnail': r're:^http:.*\.jpg$',
|
'thumbnail': r're:^http:.*\.jpg$',
|
||||||
|
'timestamp': 1398988800,
|
||||||
|
'upload_date': '20140502',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'url': 'http://www.funnyordie.com%s' % src,
|
'url': 'http://www.funnyordie.com%s' % src,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
post_json = self._search_regex(
|
timestamp = unified_timestamp(self._html_search_meta(
|
||||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
'uploadDate', webpage, 'timestamp', default=None))
|
||||||
post = json.loads(post_json)
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
|
||||||
|
webpage, 'uploader', default=None)
|
||||||
|
|
||||||
|
title, description, thumbnail, duration = [None] * 4
|
||||||
|
|
||||||
|
medium = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
|
||||||
|
default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if medium:
|
||||||
|
title = medium.get('title')
|
||||||
|
duration = float_or_none(medium.get('duration'))
|
||||||
|
if not timestamp:
|
||||||
|
timestamp = unified_timestamp(medium.get('publishDate'))
|
||||||
|
|
||||||
|
post = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
|
||||||
|
default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if post:
|
||||||
|
if not title:
|
||||||
|
title = post.get('name')
|
||||||
|
description = post.get('description')
|
||||||
|
thumbnail = post.get('picture')
|
||||||
|
|
||||||
|
if not title:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
if not description:
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
if not duration:
|
||||||
|
duration = int_or_none(self._html_search_meta(
|
||||||
|
('video:duration', 'duration'), webpage, 'duration', default=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': post['name'],
|
'title': title,
|
||||||
'description': post.get('description'),
|
'description': description,
|
||||||
'thumbnail': post.get('picture'),
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@@ -6,62 +6,52 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GaskrankIE(InfoExtractor):
|
class GaskrankIE(InfoExtractor):
|
||||||
"""InfoExtractor for gaskrank.tv"""
|
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
|
||||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
_TESTS = [{
|
||||||
_TESTS = [
|
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||||
{
|
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
'info_dict': {
|
||||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
'id': '201601/26955',
|
||||||
'info_dict': {
|
'ext': 'mp4',
|
||||||
'id': '201601/26955',
|
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||||
'ext': 'mp4',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
'categories': ['motorrad-fun'],
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||||
'categories': ['motorrad-fun'],
|
'uploader_id': 'Bikefun',
|
||||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
'upload_date': '20170110',
|
||||||
'uploader_id': 'Bikefun',
|
'uploader_url': None,
|
||||||
'upload_date': '20170110',
|
|
||||||
'uploader_url': None,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
|
||||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '201106/15920',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'categories': ['racing'],
|
|
||||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
|
||||||
'uploader_id': 'IOM',
|
|
||||||
'upload_date': '20160506',
|
|
||||||
'uploader_url': 'www.iomtt.com',
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
}, {
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||||
|
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201106/15920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['racing'],
|
||||||
|
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||||
|
'uploader_id': 'IOM',
|
||||||
|
'upload_date': '20170523',
|
||||||
|
'uploader_url': 'www.iomtt.com',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
"""extract information from gaskrank.tv"""
|
|
||||||
def fix_json(code):
|
|
||||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
|
||||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
|
||||||
|
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'title', webpage, fatal=True)
|
||||||
|
|
||||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||||
title = self._search_regex(
|
|
||||||
r'movieName\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'title')
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'poster\s*:\s*\'([^\']*)\'',
|
|
||||||
webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||||
@@ -89,29 +79,14 @@ class GaskrankIE(InfoExtractor):
|
|||||||
if average_rating:
|
if average_rating:
|
||||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
playlist = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
|
||||||
webpage, 'playlist', default='{}'),
|
|
||||||
display_id, transform_source=fix_json, fatal=False)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||||
playlist.get('0').get('src'), 'video id')
|
webpage, 'video id', default=display_id)
|
||||||
|
|
||||||
formats = []
|
entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
for key in playlist:
|
entry.update({
|
||||||
formats.append({
|
|
||||||
'url': playlist[key]['src'],
|
|
||||||
'format_id': key,
|
|
||||||
'quality': playlist[key].get('quality')})
|
|
||||||
self._sort_formats(formats, field_preference=['format_id'])
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
@@ -120,4 +95,7 @@ class GaskrankIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'average_rating': average_rating,
|
'average_rating': average_rating,
|
||||||
}
|
})
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
|
||||||
|
return entry
|
||||||
|
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
|||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
@@ -35,6 +36,10 @@ from .brightcove import (
|
|||||||
BrightcoveLegacyIE,
|
BrightcoveLegacyIE,
|
||||||
BrightcoveNewIE,
|
BrightcoveNewIE,
|
||||||
)
|
)
|
||||||
|
from .nexx import (
|
||||||
|
NexxIE,
|
||||||
|
NexxEmbedIE,
|
||||||
|
)
|
||||||
from .nbc import NBCSportsVPlayerIE
|
from .nbc import NBCSportsVPlayerIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
@@ -56,6 +61,7 @@ from .dailymotion import (
|
|||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionCloudIE,
|
DailymotionCloudIE,
|
||||||
)
|
)
|
||||||
|
from .dailymail import DailyMailIE
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
from .viewlift import ViewLiftEmbedIE
|
from .viewlift import ViewLiftEmbedIE
|
||||||
from .mtv import MTVServicesEmbeddedIE
|
from .mtv import MTVServicesEmbeddedIE
|
||||||
@@ -88,6 +94,9 @@ from .rutube import RutubeIE
|
|||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
|
from .wistia import WistiaIE
|
||||||
|
from .mediaset import MediasetIE
|
||||||
|
from .joj import JojIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -756,6 +765,20 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Dailymotion'],
|
'add_ie': ['Dailymotion'],
|
||||||
},
|
},
|
||||||
|
# DailyMail embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1495629',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Care worker punches elderly dementia patient in head 11 times',
|
||||||
|
'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
|
||||||
|
},
|
||||||
|
'add_ie': ['DailyMail'],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# YouTube embed
|
# YouTube embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
|
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
|
||||||
@@ -1182,7 +1205,7 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
},
|
},
|
||||||
# Eagle.Platform embed (generic URL)
|
# EaglePlatform embed (generic URL)
|
||||||
{
|
{
|
||||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||||
@@ -1196,8 +1219,26 @@ class GenericIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
# ClipYou (Eagle.Platform) embed (custom URL)
|
# referrer protected EaglePlatform embed
|
||||||
|
{
|
||||||
|
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '582306',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 3382,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# ClipYou (EaglePlatform) embed (custom URL)
|
||||||
{
|
{
|
||||||
'url': 'http://muz-tv.ru/play/7129/',
|
'url': 'http://muz-tv.ru/play/7129/',
|
||||||
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||||
@@ -1209,6 +1250,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'duration': 216,
|
'duration': 216,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
# Pladform embed
|
# Pladform embed
|
||||||
{
|
{
|
||||||
@@ -1509,6 +1553,22 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveLegacy'],
|
'add_ie': ['BrightcoveLegacy'],
|
||||||
},
|
},
|
||||||
|
# Nexx embed
|
||||||
|
{
|
||||||
|
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '247746',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Yesterday's Jam (OV)",
|
||||||
|
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
|
||||||
|
'timestamp': 1492594816,
|
||||||
|
'upload_date': '20170419',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# Facebook <iframe> embed
|
# Facebook <iframe> embed
|
||||||
{
|
{
|
||||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||||
@@ -1519,6 +1579,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Facebook video #599637780109885',
|
'title': 'Facebook video #599637780109885',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Facebook <iframe> embed, plugin video
|
||||||
|
{
|
||||||
|
'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1754168231264132',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
|
||||||
|
'uploader': 'Tariq Ramadan (official)',
|
||||||
|
'timestamp': 1496758379,
|
||||||
|
'upload_date': '20170606',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# Facebook API embed
|
# Facebook API embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
||||||
@@ -1718,6 +1793,39 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': [WashingtonPostIE.ie_key()],
|
'add_ie': [WashingtonPostIE.ie_key()],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Mediaset embed
|
||||||
|
'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '720642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [MediasetIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# JOJ.sk embeds
|
||||||
|
'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
|
||||||
|
'title': 'Slovenskom sa prehnala vlna silných búrok',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'add_ie': [JojIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
|
'url': 'https://tvrain.ru/amp/418921/',
|
||||||
|
'md5': 'cc00413936695987e8de148b67d14f1d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '418921',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
|
||||||
|
},
|
||||||
|
},
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@@ -1892,14 +2000,14 @@ class GenericIE(InfoExtractor):
|
|||||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
format_id = m.group('format_id')
|
format_id = compat_str(m.group('format_id'))
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
elif format_id == 'f4m':
|
elif format_id == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id)
|
formats = self._extract_f4m_formats(url, video_id)
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': m.group('format_id'),
|
'format_id': format_id,
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
@@ -2017,6 +2125,13 @@ class GenericIE(InfoExtractor):
|
|||||||
video_description = self._og_search_description(webpage, default=None)
|
video_description = self._og_search_description(webpage, default=None)
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'title': video_title,
|
||||||
|
'description': video_description,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
})
|
||||||
|
|
||||||
# Look for Brightcove Legacy Studio embeds
|
# Look for Brightcove Legacy Studio embeds
|
||||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
@@ -2038,6 +2153,16 @@ class GenericIE(InfoExtractor):
|
|||||||
if bc_urls:
|
if bc_urls:
|
||||||
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||||
|
|
||||||
|
# Look for Nexx embeds
|
||||||
|
nexx_urls = NexxIE._extract_urls(webpage)
|
||||||
|
if nexx_urls:
|
||||||
|
return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Nexx iFrame embeds
|
||||||
|
nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
|
||||||
|
if nexx_embed_urls:
|
||||||
|
return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
|
||||||
|
|
||||||
# Look for ThePlatform embeds
|
# Look for ThePlatform embeds
|
||||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||||
if tp_urls:
|
if tp_urls:
|
||||||
@@ -2110,58 +2235,27 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||||
|
|
||||||
|
# Look for DailyMail embeds
|
||||||
|
dailymail_urls = DailyMailIE._extract_urls(webpage)
|
||||||
|
if dailymail_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
wistia_url = WistiaIE._extract_url(webpage)
|
||||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
if wistia_url:
|
||||||
if match:
|
|
||||||
embed_url = self._proto_relative_url(
|
|
||||||
unescapeHTML(match.group('url')))
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': embed_url,
|
'url': self._proto_relative_url(wistia_url),
|
||||||
'ie_key': 'Wistia',
|
'ie_key': WistiaIE.ie_key(),
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
|
||||||
if match:
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'wistia:%s' % match.group('id'),
|
|
||||||
'ie_key': 'Wistia',
|
|
||||||
'uploader': video_uploader,
|
|
||||||
}
|
|
||||||
|
|
||||||
match = re.search(
|
|
||||||
r'''(?sx)
|
|
||||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
|
||||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
|
||||||
''', webpage)
|
|
||||||
if match:
|
|
||||||
return self.url_result(self._proto_relative_url(
|
|
||||||
'wistia:%s' % match.group('id')), 'Wistia')
|
|
||||||
|
|
||||||
# Look for SVT player
|
# Look for SVT player
|
||||||
svt_url = SVTIE._extract_url(webpage)
|
svt_url = SVTIE._extract_url(webpage)
|
||||||
if svt_url:
|
if svt_url:
|
||||||
return self.url_result(svt_url, 'SVT')
|
return self.url_result(svt_url, 'SVT')
|
||||||
|
|
||||||
# Look for embedded condenast player
|
|
||||||
matches = re.findall(
|
|
||||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
|
||||||
webpage)
|
|
||||||
if matches:
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'entries': [{
|
|
||||||
'_type': 'url',
|
|
||||||
'ie_key': 'CondeNast',
|
|
||||||
'url': ma,
|
|
||||||
} for ma in matches],
|
|
||||||
'title': video_title,
|
|
||||||
'id': video_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -2243,9 +2337,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded Facebook player
|
# Look for embedded Facebook player
|
||||||
facebook_url = FacebookIE._extract_url(webpage)
|
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||||
if facebook_url is not None:
|
if facebook_urls:
|
||||||
return self.url_result(facebook_url, 'Facebook')
|
return self.playlist_from_matches(facebook_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for embedded VK player
|
# Look for embedded VK player
|
||||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||||
@@ -2442,12 +2536,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if kaltura_url:
|
if kaltura_url:
|
||||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
||||||
|
|
||||||
# Look for Eagle.Platform embeds
|
# Look for EaglePlatform embeds
|
||||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||||
if eagleplatform_url:
|
if eagleplatform_url:
|
||||||
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
|
return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
|
||||||
|
|
||||||
# Look for ClipYou (uses Eagle.Platform) embeds
|
# Look for ClipYou (uses EaglePlatform) embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -2555,29 +2649,6 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
limelight_urls, video_id, video_title, video_description)
|
limelight_urls, video_id, video_title, video_description)
|
||||||
|
|
||||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
|
||||||
if mobj:
|
|
||||||
lm = {
|
|
||||||
'Media': 'media',
|
|
||||||
'Channel': 'channel',
|
|
||||||
'ChannelList': 'channel_list',
|
|
||||||
}
|
|
||||||
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
|
||||||
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
|
||||||
'Limelight%s' % mobj.group(1), mobj.group(2))
|
|
||||||
|
|
||||||
mobj = re.search(
|
|
||||||
r'''(?sx)
|
|
||||||
<object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
|
|
||||||
<param[^>]+
|
|
||||||
name=(["\'])flashVars\2[^>]+
|
|
||||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
|
||||||
''', webpage)
|
|
||||||
if mobj:
|
|
||||||
return self.url_result(smuggle_url(
|
|
||||||
'limelight:media:%s' % mobj.group('id'),
|
|
||||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
|
||||||
|
|
||||||
# Look for Anvato embeds
|
# Look for Anvato embeds
|
||||||
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
||||||
if anvato_urls:
|
if anvato_urls:
|
||||||
@@ -2699,7 +2770,7 @@ class GenericIE(InfoExtractor):
|
|||||||
rutube_urls = RutubeIE._extract_urls(webpage)
|
rutube_urls = RutubeIE._extract_urls(webpage)
|
||||||
if rutube_urls:
|
if rutube_urls:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
rutube_urls, ie=RutubeIE.ie_key())
|
rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
|
||||||
|
|
||||||
# Look for WashingtonPost embeds
|
# Look for WashingtonPost embeds
|
||||||
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
||||||
@@ -2707,18 +2778,38 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Mediaset embeds
|
||||||
|
mediaset_urls = MediasetIE._extract_urls(webpage)
|
||||||
|
if mediaset_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||||
|
|
||||||
|
# Look for JOJ.sk embeds
|
||||||
|
joj_urls = JojIE._extract_urls(webpage)
|
||||||
|
if joj_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
joj_urls, video_id, video_title, ie=JojIE.ie_key())
|
||||||
|
|
||||||
|
def merge_dicts(dict1, dict2):
|
||||||
|
merged = {}
|
||||||
|
for k, v in dict1.items():
|
||||||
|
if v is not None:
|
||||||
|
merged[k] = v
|
||||||
|
for k, v in dict2.items():
|
||||||
|
if v is None:
|
||||||
|
continue
|
||||||
|
if (k not in merged or
|
||||||
|
(isinstance(v, compat_str) and v and
|
||||||
|
isinstance(merged[k], compat_str) and
|
||||||
|
not merged[k])):
|
||||||
|
merged[k] = v
|
||||||
|
return merged
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
if json_ld.get('url'):
|
if json_ld.get('url'):
|
||||||
info_dict.update({
|
return merge_dicts(json_ld, info_dict)
|
||||||
'title': video_title or info_dict['title'],
|
|
||||||
'description': video_description,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
'age_limit': age_limit
|
|
||||||
})
|
|
||||||
info_dict.update(json_ld)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
@@ -2736,9 +2827,7 @@ class GenericIE(InfoExtractor):
|
|||||||
if jwplayer_data:
|
if jwplayer_data:
|
||||||
info = self._parse_jwplayer_data(
|
info = self._parse_jwplayer_data(
|
||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
if not info.get('title'):
|
return merge_dicts(info, info_dict)
|
||||||
info['title'] = video_title
|
|
||||||
return info
|
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
|
@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
|
|||||||
video_url = gfy.get('%sUrl' % format_id)
|
video_url = gfy.get('%sUrl' % format_id)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
filesize = gfy.get('%sSize' % format_id)
|
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@@ -5,9 +5,10 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
determine_ext,
|
||||||
qualities,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -15,7 +16,7 @@ class GiantBombIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||||
'md5': '57badeface303ecf6b98b812de1b9018',
|
'md5': 'c8ea694254a59246a42831155dec57ac',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2300-9782',
|
'id': '2300-9782',
|
||||||
'display_id': 'quick-look-destiny-the-dark-below',
|
'display_id': 'quick-look-destiny-the-dark-below',
|
||||||
@@ -51,11 +52,16 @@ class GiantBombIE(InfoExtractor):
|
|||||||
for format_id, video_url in video['videoStreams'].items():
|
for format_id, video_url in video['videoStreams'].items():
|
||||||
if format_id == 'f4m_stream':
|
if format_id == 'f4m_stream':
|
||||||
continue
|
continue
|
||||||
if video_url.endswith('.f4m'):
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'f4m':
|
||||||
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
|
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
|
||||||
if f4m_formats:
|
if f4m_formats:
|
||||||
f4m_formats[0]['quality'] = quality(format_id)
|
f4m_formats[0]['quality'] = quality(format_id)
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@@ -1,66 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
from ..utils import js_to_json
|
|
||||||
|
|
||||||
|
|
||||||
class GodTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Randy Needham',
|
|
||||||
'duration': 3615.08,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/playlist/bible-study',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'bible-study',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 37,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/node/15097',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/live/africa',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://god.tv/liveevents',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'settings', default='{}'),
|
|
||||||
display_id, transform_source=js_to_json, fatal=False)
|
|
||||||
|
|
||||||
ooyala_id = None
|
|
||||||
|
|
||||||
if settings:
|
|
||||||
playlist = settings.get('playlist')
|
|
||||||
if playlist and isinstance(playlist, list):
|
|
||||||
entries = [
|
|
||||||
OoyalaIE._build_url_result(video['content_id'])
|
|
||||||
for video in playlist if video.get('content_id')]
|
|
||||||
if entries:
|
|
||||||
return self.playlist_result(entries, display_id)
|
|
||||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
|
||||||
|
|
||||||
if not ooyala_id:
|
|
||||||
ooyala_id = self._search_regex(
|
|
||||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
|
||||||
webpage, 'ooyala id', group='id')
|
|
||||||
|
|
||||||
return OoyalaIE._build_url_result(ooyala_id)
|
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': e.tag,
|
'format_id': compat_str(e.tag),
|
||||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
'height': self._int(e.get('height'), 'height'),
|
'height': self._int(e.get('height'), 'height'),
|
||||||
'width': self._int(e.get('width'), 'width'),
|
'width': self._int(e.get('width'), 'width'),
|
||||||
|
@@ -69,19 +69,32 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
||||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
||||||
|
|
||||||
|
resolutions = {}
|
||||||
|
for fmt in fmt_list:
|
||||||
|
mobj = re.search(
|
||||||
|
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||||
|
if mobj:
|
||||||
|
resolutions[mobj.group('format_id')] = (
|
||||||
|
int(mobj.group('width')), int(mobj.group('height')))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
|
for fmt_stream in fmt_stream_map:
|
||||||
fmt_id, fmt_url = fmt_stream.split('|')
|
fmt_stream_split = fmt_stream.split('|')
|
||||||
resolution = fmt.split('/')[1]
|
if len(fmt_stream_split) < 2:
|
||||||
width, height = resolution.split('x')
|
continue
|
||||||
formats.append({
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
'url': lowercase_escape(fmt_url),
|
f = {
|
||||||
'format_id': fmt_id,
|
'url': lowercase_escape(format_url),
|
||||||
'resolution': resolution,
|
'format_id': format_id,
|
||||||
'width': int_or_none(width),
|
'ext': self._FORMATS_EXT[format_id],
|
||||||
'height': int_or_none(height),
|
}
|
||||||
'ext': self._FORMATS_EXT[fmt_id],
|
resolution = resolutions.get(format_id)
|
||||||
})
|
if resolution:
|
||||||
|
f.update({
|
||||||
|
'width': resolution[0],
|
||||||
|
'height': resolution[1],
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -7,14 +7,19 @@ from .common import InfoExtractor
|
|||||||
class HGTVComShowIE(InfoExtractor):
|
class HGTVComShowIE(InfoExtractor):
|
||||||
IE_NAME = 'hgtv.com:show'
|
IE_NAME = 'hgtv.com:show'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
|
# data-module="video"
|
||||||
|
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'flip-or-flop-full-episodes-videos',
|
'id': 'flip-or-flop-full-episodes-season-4-videos',
|
||||||
'title': 'Flip or Flop Full Episodes',
|
'title': 'Flip or Flop Full Episodes',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 15,
|
'playlist_mincount': 15,
|
||||||
}
|
}, {
|
||||||
|
# data-deferred-module="video"
|
||||||
|
'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
@@ -23,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
|
|||||||
|
|
||||||
config = self._parse_json(
|
config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||||
webpage, 'video config'),
|
webpage, 'video config'),
|
||||||
display_id)['channels'][0]
|
display_id)['channels'][0]
|
||||||
|
|
||||||
|
@@ -16,8 +16,8 @@ from ..utils import (
|
|||||||
|
|
||||||
class HitboxIE(InfoExtractor):
|
class HitboxIE(InfoExtractor):
|
||||||
IE_NAME = 'hitbox'
|
IE_NAME = 'hitbox'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/video/203213',
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '203213',
|
'id': '203213',
|
||||||
@@ -38,13 +38,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _extract_metadata(self, url, video_id):
|
def _extract_metadata(self, url, video_id):
|
||||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'%s/%s' % (url, video_id), video_id,
|
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
|
||||||
'Downloading metadata JSON')
|
|
||||||
|
|
||||||
date = 'media_live_since'
|
date = 'media_live_since'
|
||||||
media_type = 'livestream'
|
media_type = 'livestream'
|
||||||
@@ -63,14 +65,15 @@ class HitboxIE(InfoExtractor):
|
|||||||
views = int_or_none(video_meta.get('media_views'))
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
categories = [video_meta.get('category_name')]
|
categories = [video_meta.get('category_name')]
|
||||||
thumbs = [
|
thumbs = [{
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
'width': 320,
|
'width': 320,
|
||||||
'height': 180},
|
'height': 180
|
||||||
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
}, {
|
||||||
'width': 768,
|
'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
'height': 432},
|
'width': 768,
|
||||||
]
|
'height': 432
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -90,7 +93,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -121,8 +124,7 @@ class HitboxIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/video',
|
'https://www.smashcast.tv/api/media/video', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
@@ -130,8 +132,8 @@ class HitboxIE(InfoExtractor):
|
|||||||
|
|
||||||
class HitboxLiveIE(HitboxIE):
|
class HitboxLiveIE(HitboxIE):
|
||||||
IE_NAME = 'hitbox:live'
|
IE_NAME = 'hitbox:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hitbox.tv/dimak',
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dimak',
|
'id': 'dimak',
|
||||||
@@ -146,13 +148,20 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
# live
|
# live
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.smashcast.tv/dimak',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
player_config = self._download_json(
|
player_config = self._download_json(
|
||||||
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -197,8 +206,7 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
metadata = self._extract_metadata(
|
metadata = self._extract_metadata(
|
||||||
'https://www.hitbox.tv/api/media/live',
|
'https://www.smashcast.tv/api/media/live', video_id)
|
||||||
video_id)
|
|
||||||
metadata['formats'] = formats
|
metadata['formats'] = formats
|
||||||
metadata['is_live'] = True
|
metadata['is_live'] = True
|
||||||
metadata['title'] = self._live_title(metadata.get('title'))
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
@@ -89,6 +89,11 @@ class IGNIE(InfoExtractor):
|
|||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# videoId pattern
|
||||||
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
@@ -98,6 +103,8 @@ class IGNIE(InfoExtractor):
|
|||||||
r'data-video-id="(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<object id="vid_(.+?)"',
|
r'<object id="vid_(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
r'videoId"\s*:\s*"(.+?)"',
|
||||||
|
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
||||||
]
|
]
|
||||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
return self._search_regex(res_id, webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
@@ -35,6 +35,9 @@ class ImdbIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -59,12 +59,18 @@ class ITVIE(InfoExtractor):
|
|||||||
def _add_sub_element(element, name):
|
def _add_sub_element(element, name):
|
||||||
return etree.SubElement(element, _add_ns(name))
|
return etree.SubElement(element, _add_ns(name))
|
||||||
|
|
||||||
|
production_id = (
|
||||||
|
params.get('data-video-autoplay-id') or
|
||||||
|
'%s#001' % (
|
||||||
|
params.get('data-video-episode-id') or
|
||||||
|
video_id.replace('a', '/')))
|
||||||
|
|
||||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||||
_add_sub_element(req_env, 'soapenv:Header')
|
_add_sub_element(req_env, 'soapenv:Header')
|
||||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||||
request = _add_sub_element(get_playlist, 'tem:request')
|
request = _add_sub_element(get_playlist, 'tem:request')
|
||||||
_add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
|
_add_sub_element(request, 'itv:ProductionId').text = production_id
|
||||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||||
_add_sub_element(vodcrid, 'com:Id')
|
_add_sub_element(vodcrid, 'com:Id')
|
||||||
|
100
youtube_dl/extractor/joj.py
Executable file
100
youtube_dl/extractor/joj.py
Executable file
@@ -0,0 +1,100 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JojIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
joj:|
|
||||||
|
https?://media\.joj\.sk/embed/
|
||||||
|
)
|
||||||
|
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NOVÉ BÝVANIE',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 3118,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://media.joj.sk/embed/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
(r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'<title>(?P<title>[^<]+)'), webpage, 'title',
|
||||||
|
default=None, group='title') or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
bitrates = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
|
||||||
|
if isinstance(format_url, compat_str):
|
||||||
|
height = self._search_regex(
|
||||||
|
r'(\d+)[pP]\.', format_url, 'height', default=None)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%sp' % height if height else None,
|
||||||
|
'height': int(height),
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
for file_el in playlist.findall('./files/file'):
|
||||||
|
path = file_el.get('path')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
format_id = file_el.get('id') or file_el.get('label')
|
||||||
|
formats.append({
|
||||||
|
'url': 'http://n16.joj.sk/storage/%s' % path.replace(
|
||||||
|
'dat/', '', 1),
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'(\d+)[pP]', format_id or path, 'height',
|
||||||
|
default=None)),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
duration = int_or_none(self._search_regex(
|
||||||
|
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
|
|||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
publish_date = unified_strdate(self._html_search_meta(
|
publish_date = unified_strdate(self._html_search_meta(
|
||||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||||
comment_count = self._html_search_regex(
|
comment_count = int(self._html_search_regex(
|
||||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||||
webpage, 'comment count', fatal=False)
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -324,7 +324,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
if captions:
|
if captions:
|
||||||
for caption in captions.get('objects', []):
|
for caption in captions.get('objects', []):
|
||||||
# Continue if caption is not ready
|
# Continue if caption is not ready
|
||||||
if f.get('status') != 2:
|
if caption.get('status') != 2:
|
||||||
continue
|
continue
|
||||||
if not caption.get('id'):
|
if not caption.get('id'):
|
||||||
continue
|
continue
|
||||||
|
@@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = (self._html_search_meta('title', webpage, default=None) or
|
title = (self._html_search_meta('title', webpage, default=None) or
|
||||||
self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
|
self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,10 +10,10 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'md5': '50f79e05ba149149c1b4ea961223d5b3',
|
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '757_1364311680',
|
'id': '757_1364311680',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'description': 'extremely bad day for this guy..!',
|
'description': 'extremely bad day for this guy..!',
|
||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident',
|
'title': 'Most unlucky car accident',
|
||||||
@@ -22,7 +21,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
'md5': 'b13a29626183c9d33944e6a04f41aafc',
|
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'f93_1390833151',
|
'id': 'f93_1390833151',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -32,6 +31,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
'thumbnail': r're:^https?://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# Prochan embed
|
||||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -41,11 +41,13 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': 'CapObveus',
|
'uploader': 'CapObveus',
|
||||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is dead',
|
||||||
}, {
|
}, {
|
||||||
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
||||||
|
# Multiple resolutions
|
||||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||||
'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
|
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '801_1409392012',
|
'id': '801_1409392012',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -93,57 +95,39 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
webpage, 'age limit', default=None))
|
webpage, 'age limit', default=None))
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
sources_raw = self._search_regex(
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
if not entries:
|
||||||
if sources_raw is None:
|
# Maybe an embed?
|
||||||
alt_source = self._search_regex(
|
embed_url = self._search_regex(
|
||||||
r'(file: ".*?"),', webpage, 'video URL', default=None)
|
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||||
if alt_source:
|
webpage, 'embed URL')
|
||||||
sources_raw = '[{ %s}]' % alt_source
|
return {
|
||||||
else:
|
'_type': 'url_transparent',
|
||||||
# Maybe an embed?
|
'url': embed_url,
|
||||||
embed_url = self._search_regex(
|
'id': video_id,
|
||||||
r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
'title': video_title,
|
||||||
webpage, 'embed URL')
|
'description': video_description,
|
||||||
return {
|
'uploader': video_uploader,
|
||||||
'_type': 'url_transparent',
|
'age_limit': age_limit,
|
||||||
'url': embed_url,
|
}
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
|
|
||||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
info_dict = entries[0]
|
||||||
sources = json.loads(sources_json)
|
|
||||||
|
|
||||||
formats = [{
|
for a_format in info_dict['formats']:
|
||||||
'format_id': '%s' % i,
|
if not a_format.get('height'):
|
||||||
'format_note': s.get('label'),
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
'url': s['file'],
|
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||||
} for i, s in enumerate(sources)]
|
default=None))
|
||||||
|
|
||||||
for i, s in enumerate(sources):
|
self._sort_formats(info_dict['formats'])
|
||||||
# Removing '.h264_*.mp4' gives the raw video, which is essentially
|
|
||||||
# the same video without the LiveLeak logo at the top (see
|
|
||||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
|
||||||
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
|
|
||||||
if s['file'] != orig_url:
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'original-%s' % i,
|
|
||||||
'format_note': s.get('label'),
|
|
||||||
'url': orig_url,
|
|
||||||
'preference': 1,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
info_dict.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'formats': formats,
|
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
}
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
class MedialaanIE(InfoExtractor):
|
class MedialaanIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.|nieuws\.)?
|
||||||
(?:
|
(?:
|
||||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||||
(?:
|
(?:
|
||||||
@@ -85,6 +85,22 @@ class MedialaanIE(InfoExtractor):
|
|||||||
# clip
|
# clip
|
||||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# http/s redirect
|
||||||
|
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '257136373657000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credentials',
|
||||||
|
}, {
|
||||||
|
# nieuws.vtm.be
|
||||||
|
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -146,6 +162,8 @@ class MedialaanIE(InfoExtractor):
|
|||||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||||
if player:
|
if player:
|
||||||
video = player[-1]
|
video = player[-1]
|
||||||
|
if video['videoUrl'] in ('http', 'https'):
|
||||||
|
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video['videoUrl'],
|
'url': video['videoUrl'],
|
||||||
|
118
youtube_dl/extractor/mediaset.py
Normal file
118
youtube_dl/extractor/mediaset.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MediasetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
mediaset:|
|
||||||
|
https?://
|
||||||
|
(?:www\.)?video\.mediaset\.it/
|
||||||
|
(?:
|
||||||
|
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||||
|
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
|
||||||
|
)
|
||||||
|
)(?P<id>[0-9]+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# full episode
|
||||||
|
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
|
||||||
|
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '661824',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quarta puntata',
|
||||||
|
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1414,
|
||||||
|
'creator': 'mediaset',
|
||||||
|
'upload_date': '20161107',
|
||||||
|
'series': 'Hello Goodbye',
|
||||||
|
'categories': ['reality'],
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}, {
|
||||||
|
# clip
|
||||||
|
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# iframe simple
|
||||||
|
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||||
|
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'mediaset:661824',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_list = self._download_json(
|
||||||
|
'http://cdnsel01.mediaset.net/GetCdn.aspx',
|
||||||
|
video_id, 'Downloading video CDN JSON', query={
|
||||||
|
'streamid': video_id,
|
||||||
|
'format': 'json',
|
||||||
|
})['videoList']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_url in video_list:
|
||||||
|
if '.ism' in format_url:
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': determine_ext(format_url),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
mediainfo = self._download_json(
|
||||||
|
'http://plr.video.mediaset.it/html/metainfo.sjson',
|
||||||
|
video_id, 'Downloading video info JSON', query={
|
||||||
|
'id': video_id,
|
||||||
|
})['video']
|
||||||
|
|
||||||
|
title = mediainfo['title']
|
||||||
|
|
||||||
|
creator = try_get(
|
||||||
|
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
|
||||||
|
category = try_get(
|
||||||
|
mediainfo, lambda x: x['brand-info']['category'], compat_str)
|
||||||
|
categories = [category] if category else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': mediainfo.get('short-description'),
|
||||||
|
'thumbnail': mediainfo.get('thumbnail'),
|
||||||
|
'duration': parse_duration(mediainfo.get('duration')),
|
||||||
|
'creator': creator,
|
||||||
|
'upload_date': unified_strdate(mediainfo.get('production-date')),
|
||||||
|
'webpage_url': mediainfo.get('url'),
|
||||||
|
'series': mediainfo.get('brand-value'),
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -136,11 +136,9 @@ class MiTeleIE(InfoExtractor):
|
|||||||
video_id, 'Downloading gigya script')
|
video_id, 'Downloading gigya script')
|
||||||
|
|
||||||
# Get a appKey/uuid for getting the session key
|
# Get a appKey/uuid for getting the session key
|
||||||
appKey_var = self._search_regex(
|
|
||||||
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
|
|
||||||
gigya_sc, 'appKey variable')
|
|
||||||
appKey = self._search_regex(
|
appKey = self._search_regex(
|
||||||
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
|
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||||
|
gigya_sc, 'appKey')
|
||||||
|
|
||||||
session_json = self._download_json(
|
session_json = self._download_json(
|
||||||
'https://appgrid-api.cloud.accedo.tv/session',
|
'https://appgrid-api.cloud.accedo.tv/session',
|
||||||
|
@@ -68,10 +68,6 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url = file_.get('url')
|
format_url = file_.get('url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if ext == 'ism':
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
|
||||||
if 'm3u8' in format_url:
|
if 'm3u8' in format_url:
|
||||||
# m3u8_native should not be used here until
|
# m3u8_native should not be used here until
|
||||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||||
@@ -79,6 +75,9 @@ class MSNIE(InfoExtractor):
|
|||||||
format_url, display_id, 'mp4',
|
format_url, display_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
|
elif determine_ext(format_url) == 'ism':
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url + '/Manifest', display_id, 'mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@@ -83,7 +83,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
hls_url = rendition.find('./src').text
|
hls_url = rendition.find('./src').text
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls'))
|
m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
# fms
|
# fms
|
||||||
try:
|
try:
|
||||||
@@ -106,7 +106,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
}])
|
}])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise ExtractorError('Invalid rendition field.')
|
raise ExtractorError('Invalid rendition field.')
|
||||||
self._sort_formats(formats)
|
if formats:
|
||||||
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_subtitles(self, mdoc, mtvn_id):
|
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||||
@@ -133,8 +134,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
mediagen_url += 'acceptMethods='
|
mediagen_url += 'acceptMethods='
|
||||||
mediagen_url += 'hls' if use_hls else 'fms'
|
mediagen_url += 'hls' if use_hls else 'fms'
|
||||||
|
|
||||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
mediagen_doc = self._download_xml(
|
||||||
'Downloading video urls')
|
mediagen_url, video_id, 'Downloading video urls', fatal=False)
|
||||||
|
|
||||||
|
if mediagen_doc is False:
|
||||||
|
return None
|
||||||
|
|
||||||
item = mediagen_doc.find('./video/item')
|
item = mediagen_doc.find('./video/item')
|
||||||
if item is not None and item.get('type') == 'text':
|
if item is not None and item.get('type') == 'text':
|
||||||
@@ -174,6 +178,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
|
formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
|
||||||
|
|
||||||
|
# Some parts of complete video may be missing (e.g. missing Act 3 in
|
||||||
|
# http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
|
||||||
|
if not formats:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -205,9 +216,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
title = xpath_text(idoc, './channel/title')
|
title = xpath_text(idoc, './channel/title')
|
||||||
description = xpath_text(idoc, './channel/description')
|
description = xpath_text(idoc, './channel/description')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in idoc.findall('.//item'):
|
||||||
|
info = self._get_video_info(item, use_hls)
|
||||||
|
if info:
|
||||||
|
entries.append(info)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
|
entries, playlist_title=title, playlist_description=description)
|
||||||
playlist_title=title, playlist_description=description)
|
|
||||||
|
|
||||||
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||||
triforce_feed = self._parse_json(self._search_regex(
|
triforce_feed = self._parse_json(self._search_regex(
|
||||||
|
@@ -12,64 +12,62 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MySpaceIE(InfoExtractor):
|
class MySpaceIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
myspace\.com/[^/]+/
|
||||||
|
(?P<mediatype>
|
||||||
|
video/[^/]+/(?P<video_id>\d+)|
|
||||||
|
music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
||||||
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '109594919',
|
||||||
'id': '109594919',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Little Big Town',
|
||||||
'title': 'Little Big Town',
|
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
'uploader': 'Five Minutes to the Stage',
|
||||||
'uploader': 'Five Minutes to the Stage',
|
'uploader_id': 'fiveminutestothestage',
|
||||||
'uploader_id': 'fiveminutestothestage',
|
'timestamp': 1414108751,
|
||||||
'timestamp': 1414108751,
|
'upload_date': '20141023',
|
||||||
'upload_date': '20141023',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
# songs
|
# songs
|
||||||
{
|
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
||||||
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '93388656',
|
||||||
'id': '93388656',
|
'ext': 'm4a',
|
||||||
'ext': 'm4a',
|
'title': 'Of weakened soul...',
|
||||||
'title': 'Of weakened soul...',
|
'uploader': 'Killsorrow',
|
||||||
'uploader': 'Killsorrow',
|
'uploader_id': 'killsorrow',
|
||||||
'uploader_id': 'killsorrow',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'xqds0B_meys',
|
|
||||||
'ext': 'webm',
|
|
||||||
'title': 'Three Days Grace - Animal I Have Become',
|
|
||||||
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
|
||||||
'uploader': 'ThreeDaysGraceVEVO',
|
|
||||||
'uploader_id': 'ThreeDaysGraceVEVO',
|
|
||||||
'upload_date': '20091002',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ypWvQgnJrSU',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Starset - First Light',
|
|
||||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
|
||||||
'uploader': 'Yumi K',
|
|
||||||
'uploader_id': 'SorenPromotions',
|
|
||||||
'upload_date': '20140725',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
]
|
}, {
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xqds0B_meys',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'Three Days Grace - Animal I Have Become',
|
||||||
|
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
||||||
|
'uploader': 'ThreeDaysGraceVEVO',
|
||||||
|
'uploader_id': 'ThreeDaysGraceVEVO',
|
||||||
|
'upload_date': '20091002',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('video_id') or mobj.group('song_id')
|
||||||
is_song = mobj.group('mediatype').startswith('music/song')
|
is_song = mobj.group('mediatype').startswith('music/song')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
|
@@ -1,6 +1,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
@@ -13,7 +22,10 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'B7 - BusMode',
|
'title': 'B7 - BusMode',
|
||||||
'uploader': 'Burn7',
|
'uploader': 'Burn7',
|
||||||
}
|
'timestamp': 1378878540,
|
||||||
|
'upload_date': '20130911',
|
||||||
|
'duration': 143,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||||
@@ -22,25 +34,133 @@ class NewgroundsIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dancin',
|
'title': 'Dancin',
|
||||||
'uploader': 'Squirrelman82',
|
'uploader': 'Squirrelman82',
|
||||||
|
'timestamp': 1460256780,
|
||||||
|
'upload_date': '20160410',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# source format unavailable, additional mp4 formats
|
||||||
|
'url': 'http://www.newgrounds.com/portal/view/689400',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '689400',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ZTV News Episode 8',
|
||||||
|
'uploader': 'BennettTheSage',
|
||||||
|
'timestamp': 1487965140,
|
||||||
|
'upload_date': '20170224',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^>]+)</title>', webpage, 'title')
|
r'<title>([^>]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
media_url = self._parse_json(self._search_regex(
|
||||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
|
||||||
|
|
||||||
music_url = self._parse_json(self._search_regex(
|
formats = [{
|
||||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
'url': media_url,
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
max_resolution = int_or_none(self._search_regex(
|
||||||
|
r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
|
||||||
|
default=None))
|
||||||
|
if max_resolution:
|
||||||
|
url_base = media_url.rpartition('.')[0]
|
||||||
|
for resolution in (360, 720, 1080):
|
||||||
|
if resolution > max_resolution:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%dp.mp4' % (url_base, resolution),
|
||||||
|
'format_id': '%dp' % resolution,
|
||||||
|
'height': resolution,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._check_formats(formats, media_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(self._search_regex(
|
||||||
|
r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
|
||||||
|
default=None))
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
|
||||||
|
default=None))
|
||||||
|
|
||||||
|
filesize_approx = parse_filesize(self._html_search_regex(
|
||||||
|
r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
|
||||||
|
default=None))
|
||||||
|
if len(formats) == 1:
|
||||||
|
formats[0]['filesize_approx'] = filesize_approx
|
||||||
|
|
||||||
|
if '<dd>Song' in webpage:
|
||||||
|
formats[0]['vcodec'] = 'none'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': music_url,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NewgroundsPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.newgrounds.com/collection/cats',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cats',
|
||||||
|
'title': 'Cats',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 46,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZONE-SAMA',
|
||||||
|
'title': 'Portal Search: ZONE-SAMA',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.newgrounds.com/audio/search/title/cats',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<title>([^>]+)</title>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
# cut left menu
|
||||||
|
webpage = self._search_regex(
|
||||||
|
r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
|
||||||
|
webpage, 'wide column', default=webpage)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for a, path, media_id in re.findall(
|
||||||
|
r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
|
||||||
|
webpage):
|
||||||
|
a_class = extract_attributes(a).get('class')
|
||||||
|
if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
|
||||||
|
continue
|
||||||
|
entries.append(
|
||||||
|
self.url_result(
|
||||||
|
'https://www.newgrounds.com/%s' % path,
|
||||||
|
ie=NewgroundsIE.ie_key(), video_id=media_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
271
youtube_dl/extractor/nexx.py
Normal file
271
youtube_dl/extractor/nexx.py
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NexxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# movie
|
||||||
|
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||||
|
'md5': '16746bfc28c42049492385c989b26c4a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '128907',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Stiftung Warentest',
|
||||||
|
'alt_title': 'Wie ein Test abläuft',
|
||||||
|
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
||||||
|
'release_year': 2013,
|
||||||
|
'creator': 'SPIEGEL TV',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2509,
|
||||||
|
'timestamp': 1384264416,
|
||||||
|
'upload_date': '20131112',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode
|
||||||
|
'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '247858',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Return of the Golden Child (OV)',
|
||||||
|
'description': 'md5:5d969537509a92b733de21bae249dc63',
|
||||||
|
'release_year': 2017,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1397,
|
||||||
|
'timestamp': 1495033267,
|
||||||
|
'upload_date': '20170517',
|
||||||
|
'episode_number': 2,
|
||||||
|
'season_number': 2,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
# Reference:
|
||||||
|
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
# JavaScript Integration
|
||||||
|
mobj = re.search(
|
||||||
|
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
domain_id = mobj.group('id')
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
|
||||||
|
webpage):
|
||||||
|
entries.append(
|
||||||
|
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
|
||||||
|
% (domain_id, video_id))
|
||||||
|
|
||||||
|
# TODO: support more embed formats
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
return NexxIE._extract_urls(webpage)[0]
|
||||||
|
|
||||||
|
def _handle_error(self, response):
|
||||||
|
status = int_or_none(try_get(
|
||||||
|
response, lambda x: x['metadata']['status']) or 200)
|
||||||
|
if 200 <= status < 300:
|
||||||
|
return
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, domain_id, path, video_id, data=None, headers={}):
|
||||||
|
headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
|
||||||
|
result = self._download_json(
|
||||||
|
'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
|
||||||
|
'Downloading %s JSON' % path, data=urlencode_postdata(data),
|
||||||
|
headers=headers)
|
||||||
|
self._handle_error(result)
|
||||||
|
return result['result']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
domain_id, video_id = mobj.group('domain_id', 'id')
|
||||||
|
|
||||||
|
# Reverse engineered from JS code (see getDeviceID function)
|
||||||
|
device_id = '%d:%d:%d%d' % (
|
||||||
|
random.randint(1, 4), int(time.time()),
|
||||||
|
random.randint(1e4, 99999), random.randint(1, 9))
|
||||||
|
|
||||||
|
result = self._call_api(domain_id, 'session/init', video_id, data={
|
||||||
|
'nxp_devh': device_id,
|
||||||
|
'nxp_userh': '',
|
||||||
|
'precid': '0',
|
||||||
|
'playlicense': '0',
|
||||||
|
'screenx': '1920',
|
||||||
|
'screeny': '1080',
|
||||||
|
'playerversion': '6.0.00',
|
||||||
|
'gateway': 'html5',
|
||||||
|
'adGateway': '',
|
||||||
|
'explicitlanguage': 'en-US',
|
||||||
|
'addTextTemplates': '1',
|
||||||
|
'addDomainData': '1',
|
||||||
|
'addAdModel': '1',
|
||||||
|
}, headers={
|
||||||
|
'X-Request-Enable-Auth-Fallback': '1',
|
||||||
|
})
|
||||||
|
|
||||||
|
cid = result['general']['cid']
|
||||||
|
|
||||||
|
# As described in [1] X-Request-Token generation algorithm is
|
||||||
|
# as follows:
|
||||||
|
# md5( operation + domain_id + domain_secret )
|
||||||
|
# where domain_secret is a static value that will be given by nexx.tv
|
||||||
|
# as per [1]. Here is how this "secret" is generated (reversed
|
||||||
|
# from _play.api.init function, search for clienttoken). So it's
|
||||||
|
# actually not static and not that much of a secret.
|
||||||
|
# 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
|
||||||
|
secret = result['device']['clienttoken'][int(device_id[0]):]
|
||||||
|
secret = secret[0:len(secret) - int(device_id[-1])]
|
||||||
|
|
||||||
|
op = 'byid'
|
||||||
|
|
||||||
|
# Reversed from JS code for _play.api.call function (search for
|
||||||
|
# X-Request-Token)
|
||||||
|
request_token = hashlib.md5(
|
||||||
|
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
video = self._call_api(
|
||||||
|
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||||
|
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||||
|
'addInteractionOptions': '1',
|
||||||
|
'addStatusDetails': '1',
|
||||||
|
'addStreamDetails': '1',
|
||||||
|
'addCaptions': '1',
|
||||||
|
'addScenes': '1',
|
||||||
|
'addHotSpots': '1',
|
||||||
|
'addBumpers': '1',
|
||||||
|
'captionFormat': 'data',
|
||||||
|
}, headers={
|
||||||
|
'X-Request-CID': cid,
|
||||||
|
'X-Request-Token': request_token,
|
||||||
|
})
|
||||||
|
|
||||||
|
general = video['general']
|
||||||
|
title = general['title']
|
||||||
|
|
||||||
|
stream_data = video['streamdata']
|
||||||
|
language = general.get('language_raw') or ''
|
||||||
|
|
||||||
|
# TODO: reverse more cdns and formats
|
||||||
|
|
||||||
|
cdn = stream_data['cdnType']
|
||||||
|
assert cdn == 'azure'
|
||||||
|
|
||||||
|
azure_locator = stream_data['azureLocator']
|
||||||
|
|
||||||
|
AZURE_URL = 'http://nx-p%02d.akamaized.net/'
|
||||||
|
|
||||||
|
for secure in ('s', ''):
|
||||||
|
cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
|
||||||
|
if cdn_shield:
|
||||||
|
azure_base = 'http%s://%s' % (secure, cdn_shield)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
|
||||||
|
|
||||||
|
is_ml = ',' in language
|
||||||
|
azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
|
||||||
|
azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
|
||||||
|
|
||||||
|
protection_token = try_get(
|
||||||
|
video, lambda x: x['protectiondata']['token'], compat_str)
|
||||||
|
if protection_token:
|
||||||
|
azure_m3u8_url += '?hdnts=%s' % protection_token
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='%s-hls' % cdn)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': general.get('subtitle'),
|
||||||
|
'description': general.get('description'),
|
||||||
|
'release_year': int_or_none(general.get('year')),
|
||||||
|
'creator': general.get('studio') or general.get('studio_adref'),
|
||||||
|
'thumbnail': try_get(
|
||||||
|
video, lambda x: x['imagedata']['thumb'], compat_str),
|
||||||
|
'duration': parse_duration(general.get('runtime')),
|
||||||
|
'timestamp': int_or_none(general.get('uploaded')),
|
||||||
|
'episode_number': int_or_none(try_get(
|
||||||
|
video, lambda x: x['episodedata']['episode'])),
|
||||||
|
'season_number': int_or_none(try_get(
|
||||||
|
video, lambda x: x['episodedata']['season'])),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NexxEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
|
||||||
|
'md5': '16746bfc28c42049492385c989b26c4a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '161464',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nervenkitzel Achterbahn',
|
||||||
|
'alt_title': 'Karussellbauer in Deutschland',
|
||||||
|
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
||||||
|
'release_year': 2005,
|
||||||
|
'creator': 'SPIEGEL TV',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2761,
|
||||||
|
'timestamp': 1394021479,
|
||||||
|
'upload_date': '20140305',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
# Reference:
|
||||||
|
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
|
||||||
|
|
||||||
|
# iFrame Embed Integration
|
||||||
|
return [mobj.group('url') for mobj in re.finditer(
|
||||||
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
embed_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, embed_id)
|
||||||
|
|
||||||
|
return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
|
@@ -1,23 +1,22 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
xpath_text,
|
|
||||||
determine_ext,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -83,9 +82,12 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'uploader_id': '312',
|
'uploader_id': '312',
|
||||||
},
|
},
|
||||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||||
|
}, {
|
||||||
|
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -98,19 +100,24 @@ class NiconicoIE(InfoExtractor):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
# Log in
|
# Log in
|
||||||
|
login_ok = True
|
||||||
login_form_strs = {
|
login_form_strs = {
|
||||||
'mail': username,
|
'mail_tel': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
}
|
}
|
||||||
login_data = urlencode_postdata(login_form_strs)
|
urlh = self._request_webpage(
|
||||||
request = sanitized_Request(
|
'https://account.nicovideo.jp/api/v1/login', None,
|
||||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
note='Logging in', errnote='Unable to log in',
|
||||||
login_results = self._download_webpage(
|
data=urlencode_postdata(login_form_strs))
|
||||||
request, None, note='Logging in', errnote='Unable to log in')
|
if urlh is False:
|
||||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
login_ok = False
|
||||||
|
else:
|
||||||
|
parts = compat_urlparse.urlparse(urlh.geturl())
|
||||||
|
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||||
|
login_ok = False
|
||||||
|
if not login_ok:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return login_ok
|
||||||
return True
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@@ -56,17 +57,24 @@ class NJPWWorldIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
|
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||||
player_url = compat_urlparse.urljoin(url, player_url)
|
player = extract_attributes(mobj.group(0))
|
||||||
|
player_path = player.get('href')
|
||||||
|
if not player_path:
|
||||||
|
continue
|
||||||
|
kind = self._search_regex(
|
||||||
|
r'(low|high)$', player.get('class') or '', 'kind',
|
||||||
|
default='low')
|
||||||
|
player_url = compat_urlparse.urljoin(url, player_path)
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
player_url, video_id, note='Downloading player page')
|
player_url, video_id, note='Downloading player page')
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(
|
entries = self._parse_html5_media_entries(
|
||||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||||
m3u8_entry_protocol='m3u8_native',
|
m3u8_entry_protocol='m3u8_native')
|
||||||
preference=2 if 'hq' in kind else 1)
|
kind_formats = entries[0]['formats']
|
||||||
formats.extend(entries[0]['formats'])
|
for f in kind_formats:
|
||||||
|
f['quality'] = 2 if kind == 'high' else 1
|
||||||
|
formats.extend(kind_formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -28,17 +28,17 @@ class NPOBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class NPOIE(NPOBaseIE):
|
class NPOIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo'
|
IE_NAME = 'npo'
|
||||||
IE_DESC = 'npo.nl and ntr.nl'
|
IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
npo:|
|
npo:|
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
npo\.nl/(?!live|radio)(?:[^/]+/){2}|
|
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||||
ntr\.nl/(?:[^/]+/){2,}|
|
ntr\.nl/(?:[^/]+/){2,}|
|
||||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||||
zapp\.nl/[^/]+/[^/]+/
|
(?:zapp|npo3)\.nl/(?:[^/]+/){2}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
(?P<id>[^/?#]+)
|
(?P<id>[^/?#]+)
|
||||||
@@ -146,10 +146,16 @@ class NPOIE(NPOBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
|
'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# live stream
|
# live stream
|
||||||
'url': 'npo:LI_NL1_4188102',
|
'url': 'npo:LI_NL1_4188102',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -338,7 +344,7 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
live_id = self._search_regex(
|
live_id = self._search_regex(
|
||||||
r'data-prid="([^"]+)"', webpage, 'live id')
|
[r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
NO_DEFAULT,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_start,
|
remove_start,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
@@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor):
|
|||||||
'upload_date': '20170214',
|
'upload_date': '20170214',
|
||||||
'timestamp': 1487078046,
|
'timestamp': 1487078046,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# embedded via pulsembed
|
||||||
|
'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '501235.965429946',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
|
||||||
|
'upload_date': '20170622',
|
||||||
|
'timestamp': 1498159955,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _search_mvp_id(self, webpage, default=NO_DEFAULT):
|
||||||
|
return self._search_regex(
|
||||||
|
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
|
||||||
|
default=default)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
mvp_id = self._search_regex(
|
mvp_id = self._search_mvp_id(webpage, default=None)
|
||||||
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
|
||||||
|
if not mvp_id:
|
||||||
|
pulsembed_url = self._search_regex(
|
||||||
|
r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
|
||||||
|
webpage, 'pulsembed url', group='url')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
pulsembed_url, video_id, 'Downloading pulsembed webpage')
|
||||||
|
mvp_id = self._search_mvp_id(webpage)
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||||
|
@@ -3,12 +3,14 @@ import re
|
|||||||
import base64
|
import base64
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
ExtractorError,
|
|
||||||
unsmuggle_url,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
|
|
||||||
@@ -39,13 +41,15 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
if cur_auth_data['authorized']:
|
if cur_auth_data['authorized']:
|
||||||
for stream in cur_auth_data['streams']:
|
for stream in cur_auth_data['streams']:
|
||||||
s_url = base64.b64decode(
|
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
if not url_data:
|
||||||
if s_url in urls:
|
continue
|
||||||
|
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||||
|
if not s_url or s_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.append(s_url)
|
urls.append(s_url)
|
||||||
ext = determine_ext(s_url, None)
|
ext = determine_ext(s_url, None)
|
||||||
delivery_type = stream['delivery_type']
|
delivery_type = stream.get('delivery_type')
|
||||||
if delivery_type == 'hls' or ext == 'm3u8':
|
if delivery_type == 'hls' or ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
|
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
|
||||||
@@ -65,7 +69,7 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': s_url,
|
'url': s_url,
|
||||||
'ext': ext or stream.get('delivery_type'),
|
'ext': ext or delivery_type,
|
||||||
'vcodec': stream.get('video_codec'),
|
'vcodec': stream.get('video_codec'),
|
||||||
'format_id': delivery_type,
|
'format_id': delivery_type,
|
||||||
'width': int_or_none(stream.get('width')),
|
'width': int_or_none(stream.get('width')),
|
||||||
@@ -136,6 +140,11 @@ class OoyalaIE(OoyalaBaseIE):
|
|||||||
'title': 'Divide Tool Path.mp4',
|
'title': 'Divide Tool Path.mp4',
|
||||||
'duration': 204.405,
|
'duration': 204.405,
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# empty stream['url']['data']
|
||||||
|
'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -2,8 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import calendar
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
@@ -144,77 +142,25 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ORFOE1IE(InfoExtractor):
|
class ORFRadioIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:oe1'
|
|
||||||
IE_DESC = 'Radio Österreich 1'
|
|
||||||
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
|
|
||||||
|
|
||||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
show_id = self._match_id(url)
|
|
||||||
data = self._download_json(
|
|
||||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
|
||||||
show_id
|
|
||||||
)
|
|
||||||
|
|
||||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
|
||||||
data['item']['day_label'],
|
|
||||||
data['item']['time']
|
|
||||||
), '%d.%m.%Y %H:%M')
|
|
||||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': show_id,
|
|
||||||
'title': data['item']['title'],
|
|
||||||
'url': data['item']['url_stream'],
|
|
||||||
'ext': 'mp3',
|
|
||||||
'description': data['item'].get('info'),
|
|
||||||
'timestamp': unix_timestamp
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ORFFM4IE(InfoExtractor):
|
|
||||||
IE_NAME = 'orf:fm4'
|
|
||||||
IE_DESC = 'radio FM4'
|
|
||||||
_VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://fm4.orf.at/player/20160110/IS/',
|
|
||||||
'md5': '01e736e8f1cef7e13246e880a59ad298',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2016-01-10_2100_tl_54_7DaysSun13_11244',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Im Sumpf',
|
|
||||||
'description': 'md5:384c543f866c4e422a55f66a62d669cd',
|
|
||||||
'duration': 7173,
|
|
||||||
'timestamp': 1452456073,
|
|
||||||
'upload_date': '20160110',
|
|
||||||
},
|
|
||||||
'skip': 'Live streams on FM4 got deleted soon',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
station = mobj.group('station')
|
||||||
show_date = mobj.group('date')
|
show_date = mobj.group('date')
|
||||||
show_id = mobj.group('show')
|
show_id = mobj.group('show')
|
||||||
|
|
||||||
|
if station == 'fm4':
|
||||||
|
show_id = '4%s' % show_id
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
|
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
|
||||||
show_id
|
show_id
|
||||||
)
|
)
|
||||||
|
|
||||||
def extract_entry_dict(info, title, subtitle):
|
def extract_entry_dict(info, title, subtitle):
|
||||||
return {
|
return {
|
||||||
'id': info['loopStreamId'].replace('.mp3', ''),
|
'id': info['loopStreamId'].replace('.mp3', ''),
|
||||||
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
|
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': subtitle,
|
'description': subtitle,
|
||||||
'duration': (info['end'] - info['start']) / 1000,
|
'duration': (info['end'] - info['start']) / 1000,
|
||||||
@@ -233,6 +179,47 @@ class ORFFM4IE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFFM4IE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:fm4'
|
||||||
|
IE_DESC = 'radio FM4'
|
||||||
|
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://fm4.orf.at/player/20170107/CC',
|
||||||
|
'md5': '2b0be47375432a7ef104453432a19212',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Solid Steel Radioshow',
|
||||||
|
'description': 'Die Mixshow von Coldcut und Ninja Tune.',
|
||||||
|
'duration': 3599,
|
||||||
|
'timestamp': 1483819257,
|
||||||
|
'upload_date': '20170107',
|
||||||
|
},
|
||||||
|
'skip': 'Shows from ORF radios are only available for 7 days.'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFOE1IE(ORFRadioIE):
|
||||||
|
IE_NAME = 'orf:oe1'
|
||||||
|
IE_DESC = 'Radio Österreich 1'
|
||||||
|
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://oe1.orf.at/player/20170108/456544',
|
||||||
|
'md5': '34d8a6e67ea888293741c86a099b745b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Morgenjournal',
|
||||||
|
'duration': 609,
|
||||||
|
'timestamp': 1483858796,
|
||||||
|
'upload_date': '20170108',
|
||||||
|
},
|
||||||
|
'skip': 'Shows from ORF radios are only available for 7 days.'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ORFIPTVIE(InfoExtractor):
|
class ORFIPTVIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:iptv'
|
IE_NAME = 'orf:iptv'
|
||||||
IE_DESC = 'iptv.ORF.at'
|
IE_DESC = 'iptv.ORF.at'
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -14,7 +15,6 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -45,22 +45,15 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
webpage = self._download_webpage(self._PACKT_BASE, None)
|
|
||||||
login_form = self._form_hidden_inputs(
|
|
||||||
'packt-user-login-form', webpage)
|
|
||||||
login_form.update({
|
|
||||||
'email': username,
|
|
||||||
'password': password,
|
|
||||||
})
|
|
||||||
self._download_webpage(
|
|
||||||
self._PACKT_BASE, None, 'Logging in as %s' % username,
|
|
||||||
data=urlencode_postdata(login_form))
|
|
||||||
try:
|
try:
|
||||||
self._TOKEN = self._download_json(
|
self._TOKEN = self._download_json(
|
||||||
'%s/users/tokens/sessions' % self._MAPT_REST, None,
|
self._MAPT_REST + '/users/tokens', None,
|
||||||
'Downloading Authorization Token')['data']['token']
|
'Downloading Authorization Token', data=json.dumps({
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}).encode())['data']['access']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
|
||||||
message = self._parse_json(e.cause.read().decode(), None)['message']
|
message = self._parse_json(e.cause.read().decode(), None)['message']
|
||||||
raise ExtractorError(message, expected=True)
|
raise ExtractorError(message, expected=True)
|
||||||
raise
|
raise
|
||||||
@@ -83,7 +76,7 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
|
|
||||||
headers = {}
|
headers = {}
|
||||||
if self._TOKEN:
|
if self._TOKEN:
|
||||||
headers['Authorization'] = self._TOKEN
|
headers['Authorization'] = 'Bearer ' + self._TOKEN
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
'%s/users/me/products/%s/chapters/%s/sections/%s'
|
||||||
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
|
||||||
|
@@ -10,13 +10,13 @@ from ..utils import (
|
|||||||
|
|
||||||
class PandaTVIE(InfoExtractor):
|
class PandaTVIE(InfoExtractor):
|
||||||
IE_DESC = '熊猫TV'
|
IE_DESC = '熊猫TV'
|
||||||
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.panda.tv/10091',
|
'url': 'http://www.panda.tv/66666',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10091',
|
'id': '66666',
|
||||||
'title': 're:.+',
|
'title': 're:.+',
|
||||||
'uploader': '囚徒',
|
'uploader': '刘杀鸡',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
@@ -24,13 +24,16 @@ class PandaTVIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Live stream is offline',
|
'skip': 'Live stream is offline',
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.panda.tv/66666',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
config = self._download_json(
|
config = self._download_json(
|
||||||
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
||||||
|
|
||||||
error_code = config.get('errno', 0)
|
error_code = config.get('errno', 0)
|
||||||
if error_code is not 0:
|
if error_code is not 0:
|
||||||
@@ -74,7 +77,7 @@ class PandaTVIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
||||||
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
||||||
'format_id': '%s-%s' % (k, ext),
|
'format_id': '%s-%s' % (k, ext),
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
|
@@ -19,7 +19,7 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
IE_NAME = 'pandora.tv'
|
IE_NAME = 'pandora.tv'
|
||||||
IE_DESC = '판도라TV'
|
IE_DESC = '판도라TV'
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53294230',
|
'id': '53294230',
|
||||||
@@ -34,7 +34,26 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '54721744',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '[HD] JAPAN COUNTDOWN 170423',
|
||||||
|
'description': '[HD] JAPAN COUNTDOWN 170423',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1704.9,
|
||||||
|
'upload_date': '20170423',
|
||||||
|
'uploader': 'GOGO_UCC',
|
||||||
|
'uploader_id': 'gogoucc',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Test metadata only
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
@@ -86,7 +105,7 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
'description': info.get('body'),
|
'description': info.get('body'),
|
||||||
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
'thumbnail': info.get('thumbnail') or info.get('poster'),
|
||||||
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
|
||||||
'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
|
'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
|
||||||
'uploader': info.get('nickname'),
|
'uploader': info.get('nickname'),
|
||||||
'uploader_id': info.get('upload_userid'),
|
'uploader_id': info.get('upload_userid'),
|
||||||
'view_count': str_to_int(info.get('hit')),
|
'view_count': str_to_int(info.get('hit')),
|
||||||
|
63
youtube_dl/extractor/pearvideo.py
Normal file
63
youtube_dl/extractor/pearvideo.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
qualities,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PearVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.pearvideo.com/video_1076290',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1076290',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '小浣熊在主人家玻璃上滚石头:没砸',
|
||||||
|
'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
|
||||||
|
'timestamp': 1494275280,
|
||||||
|
'upload_date': '20170508',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
quality = qualities(
|
||||||
|
('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': mobj.group('id'),
|
||||||
|
'quality': quality(mobj.group('id')),
|
||||||
|
} for mobj in re.finditer(
|
||||||
|
r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
|
||||||
|
webpage)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
|
||||||
|
r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'title', group='value')
|
||||||
|
description = self._search_regex(
|
||||||
|
(r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
|
||||||
|
r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'description', default=None,
|
||||||
|
group='value') or self._html_search_meta('Description', webpage)
|
||||||
|
timestamp = unified_timestamp(self._search_regex(
|
||||||
|
r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
|
||||||
|
webpage, 'timestamp', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -49,7 +49,7 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage)
|
r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
@@ -65,7 +65,7 @@ class PolskieRadioIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
content = self._search_regex(
|
content = self._search_regex(
|
||||||
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
|
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||||
webpage, 'content')
|
webpage, 'content')
|
||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
|
@@ -252,11 +252,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||||
playlist_id)
|
'playlist', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
title = playlist.get('title') or self._search_regex(
|
||||||
|
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
entries, playlist_id, title, playlist.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
@@ -296,6 +299,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
break
|
break
|
||||||
|
raise
|
||||||
page_entries = self._extract_entries(webpage)
|
page_entries = self._extract_entries(webpage)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
|
@@ -191,11 +191,12 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if relinker_info.get(
|
||||||
|
'is_live') else title,
|
||||||
'alt_title': media.get('subtitle'),
|
'alt_title': media.get('subtitle'),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': media.get('channel'),
|
'uploader': strip_or_none(media.get('channel')),
|
||||||
'creator': media.get('editor'),
|
'creator': strip_or_none(media.get('editor')),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
@@ -208,10 +209,46 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
info.update(relinker_info)
|
info.update(relinker_info)
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class RaiPlayLiveIE(RaiBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||||
|
'display_id': 'rainews24',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:6eca31500550f9376819f174e5644754',
|
||||||
|
'uploader': 'Rai News 24',
|
||||||
|
'creator': 'Rai News 24',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
||||||
|
webpage, 'content id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': RaiPlayIE.ie_key(),
|
||||||
|
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RedBullTVIE(InfoExtractor):
|
class RedBullTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
|
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# film
|
# film
|
||||||
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
||||||
@@ -42,6 +42,22 @@ class RedBullTVIE(InfoExtractor):
|
|||||||
'season_number': 2,
|
'season_number': 2,
|
||||||
'episode_number': 4,
|
'episode_number': 4,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# segment
|
||||||
|
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AP-1QSAQJ6V52111',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Semi Finals - Vans Park Series Pro Tour',
|
||||||
|
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
|
||||||
|
'duration': 11791.991,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -82,7 +98,8 @@ class RedBullTVIE(InfoExtractor):
|
|||||||
title = info['title'].strip()
|
title = info['title'].strip()
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
video['url'], video_id, 'mp4', 'm3u8_native')
|
video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user