Compare commits
352 Commits
2017.01.14
...
2017.02.24
Author | SHA1 | Date | |
---|---|---|---|
6b097cff27 | |||
f2f7961820 | |||
be5df5ee31 | |||
f2980fddeb | |||
0f57447de7 | |||
19f3821821 | |||
8e1409fd80 | |||
050f143c12 | |||
fafc2bf5a9 | |||
b3175982c3 | |||
89db639dfe | |||
d0d9ade486 | |||
28572a1a0b | |||
0f3d41b44d | |||
d5fd9a3be3 | |||
ada77fa544 | |||
9e03aa75c7 | |||
30eaa3a702 | |||
c59f703610 | |||
bc61c80c14 | |||
345b24538b | |||
63a29b6118 | |||
b5869560a4 | |||
527ef85fe9 | |||
58ad6995cd | |||
a86e416088 | |||
71e9577b94 | |||
0d427c8304 | |||
139d8ac106 | |||
abd29a2ced | |||
31615ac279 | |||
fc320a40d9 | |||
7345d6d465 | |||
86466a8b6f | |||
33dc173cdc | |||
3444844b04 | |||
8c6c88c7da | |||
159aaaa9d0 | |||
eea0716cae | |||
336a76551b | |||
dc0a869e5e | |||
e39b5d4ab8 | |||
e469ab2528 | |||
890d44b005 | |||
6926304472 | |||
3ccdde8cb7 | |||
da42ff0668 | |||
82f662182b | |||
2cc7fcd338 | |||
6d4c259765 | |||
c78dd35491 | |||
8ffb8e63fe | |||
983e9b7746 | |||
8936f68a0b | |||
c58b7ffef4 | |||
f1a78ee4ef | |||
de64e23c56 | |||
553f6dbac7 | |||
0aa10994f4 | |||
4248dad92b | |||
0a840f584c | |||
0016b84e16 | |||
18a0defab0 | |||
5d3fbf77d9 | |||
80b59020e0 | |||
71631862f4 | |||
89cc7fe770 | |||
04d906eae3 | |||
8ab8066cf0 | |||
01b1aa9ff4 | |||
ff4007891f | |||
28200e654b | |||
e633f21a96 | |||
d392005a79 | |||
773f291dcb | |||
bf5b9d859a | |||
049a0f4d6d | |||
ac33accd96 | |||
e84888b432 | |||
02d9b82a23 | |||
a2e3286676 | |||
f75caf059e | |||
bdabbc220c | |||
70bcc444a9 | |||
28e35f5070 | |||
cf3704c132 | |||
2c1f442c2b | |||
bad4ccdb5d | |||
db76c30c6e | |||
c2bde5d081 | |||
90fad0e74c | |||
d94badc755 | |||
fef51645d6 | |||
4cead6a614 | |||
a4a554a793 | |||
b898f0a173 | |||
2480b056c1 | |||
3aa25395aa | |||
eafaeb226a | |||
de4d378c0c | |||
099cfdb770 | |||
398dea3210 | |||
db13c16ef8 | |||
1bd05345ea | |||
3021cf83b7 | |||
04a741232f | |||
43a3d9edfc | |||
d31aa74fdb | |||
6092ccd058 | |||
22ce9ad2bd | |||
9a372f14b4 | |||
5cb2d36c82 | |||
fcca0d53a8 | |||
58a65ba852 | |||
cedf08ff54 | |||
50de3dbad3 | |||
085f169ffe | |||
f6d6ca1db3 | |||
6e5956e6ba | |||
50fd3c2c69 | |||
89c6691f9d | |||
454e5cdb17 | |||
1de9f78e71 | |||
9dad941853 | |||
1e2c3f61fc | |||
0dac7cbb09 | |||
f8514630db | |||
459818e280 | |||
6310acf512 | |||
8d38dafbbf | |||
f3915452de | |||
2f49bcd690 | |||
68c22c4c15 | |||
9b92a5917b | |||
3e2274c8b7 | |||
3d7e3aaa0e | |||
624c4b92ff | |||
2af12ad9d2 | |||
97eb9bd2ac | |||
71cdd75628 | |||
c7d6f614f3 | |||
08a00eef79 | |||
9dd5408c99 | |||
9510709575 | |||
5abcca9060 | |||
e01bfc19c3 | |||
4d32b63851 | |||
55d4de2283 | |||
61ee556aea | |||
ff24261ba0 | |||
fbc6dc525e | |||
9150d1eb69 | |||
b7f9843bec | |||
e64b0fca14 | |||
78ef214d2d | |||
be670b8e8f | |||
37084f6641 | |||
b04975733c | |||
c8b8fb0a99 | |||
8298018273 | |||
ae8d5a5c59 | |||
b9c9cb5f79 | |||
fdf9b959bc | |||
013877298d | |||
c87f95f991 | |||
f28aeff264 | |||
242a14a1f6 | |||
d5d904ff7d | |||
5620f840f6 | |||
b7a8c1bcfa | |||
7097bffba6 | |||
2aec7256ae | |||
815482d4eb | |||
9c14fe9681 | |||
e705755739 | |||
019f4c0371 | |||
2ab2c0d1f5 | |||
caf0f5f8b7 | |||
e4e50f60b1 | |||
6ef3e65a7b | |||
6fd138bed8 | |||
49bd8d5e2e | |||
3d2c2752c5 | |||
a713a86755 | |||
7bccd5fc8a | |||
3144eccf55 | |||
9db8f6c540 | |||
8e4041cf3f | |||
31487eb974 | |||
c2521c1ac6 | |||
643dc0fcfe | |||
36fce54816 | |||
2c15db829c | |||
f65dba7cdb | |||
605fd6392f | |||
f962790ee5 | |||
b7cc5f078e | |||
f7a10d8cd6 | |||
daac118bf4 | |||
8939f784d9 | |||
df0588a31f | |||
4ce3407d08 | |||
d7f9242e30 | |||
45024183ae | |||
33da98f493 | |||
4195096ea8 | |||
0bbcc8a10a | |||
b3ee552e4b | |||
a22b2fd19b | |||
c54c01f82d | |||
5a116e1302 | |||
a685751051 | |||
bd8f48c78b | |||
81aeafeb44 | |||
8bdc149441 | |||
020c5df52d | |||
da162c1135 | |||
5069594993 | |||
b996b88092 | |||
b83ef507b4 | |||
000f207944 | |||
fe5aa197b5 | |||
7882f1115e | |||
2b2d5d319b | |||
26c0f09935 | |||
c15cd29640 | |||
c38a67bcd5 | |||
363245ad94 | |||
7c5329e6f4 | |||
8fd65faece | |||
d7e215b42d | |||
3a528ffd89 | |||
3c90cc8b6f | |||
ae9a173b64 | |||
75822ca790 | |||
dadb836139 | |||
4719419951 | |||
c2d9c25f81 | |||
4d2fdb07c4 | |||
fe323a4800 | |||
f13da8af28 | |||
e228616c6e | |||
c58c2d63cb | |||
d04621daf4 | |||
76aaf1faae | |||
56fc078da8 | |||
0842b8241d | |||
59c307891a | |||
4d07b748c2 | |||
f5169501d2 | |||
186f4abe93 | |||
34cea6137e | |||
ffcfb7e3e0 | |||
c0af11abee | |||
1a241a2d02 | |||
acbb2374bc | |||
4edeac5bfa | |||
f592ff9868 | |||
24ee6b9721 | |||
a71b8d3b3b | |||
732fb3f8be | |||
008f247077 | |||
661cc229d2 | |||
b92d3c5343 | |||
ab6f6aee78 | |||
26e40542dd | |||
99a0baf370 | |||
d41ed6d243 | |||
815d2a36d8 | |||
e0b6e50ccd | |||
3a194cb4ec | |||
9b73471801 | |||
489ffc1182 | |||
0b23c222ba | |||
b51a4ebed4 | |||
9463637887 | |||
3cbecdd111 | |||
15846398ca | |||
c19ef77c31 | |||
b3277115a1 | |||
9bccdc7004 | |||
cf0cabbe50 | |||
556dbe7fe3 | |||
2417d41535 | |||
2c302cf66b | |||
c1fa3f4672 | |||
17f8deeb48 | |||
b8a03b6660 | |||
c60089c022 | |||
af59bddc4e | |||
23b35a634e | |||
74af9c700d | |||
d61aa5eb37 | |||
c3a65c3de0 | |||
ee4c091ce5 | |||
b494d6856c | |||
bc35ed3fb6 | |||
0c1c6f4b9f | |||
6d119c2a6b | |||
4201ba13e6 | |||
8bc0800d7c | |||
a089545e03 | |||
30dda24de3 | |||
9d5b29c881 | |||
6c031a35f3 | |||
271808b6b2 | |||
8d1fbe0cb2 | |||
a243abb80d | |||
42697bab3c | |||
94629e537f | |||
e84495cd8d | |||
7c20b7484c | |||
04a3d4d234 | |||
12afdc2ad6 | |||
f4ec8dce48 | |||
f3c21cb7a7 | |||
972efe60c3 | |||
4447fb2332 | |||
d77ac73790 | |||
1fe84be0f3 | |||
1076858f76 | |||
cccd70a275 | |||
eb3f008c9e | |||
f1e70fc2ff | |||
1560baacc6 | |||
460f61fac4 | |||
baa3e1845b | |||
aaf2b7c57a | |||
b687c85eab | |||
538b17a09c | |||
4e44598547 | |||
136078966b | |||
8a5f0a6357 | |||
c0bd51c090 | |||
c1c2fe2045 | |||
ddd53c392e | |||
79fc8496c6 | |||
0ce8c66fb0 | |||
906420cae3 | |||
16e2c8f771 | |||
dcae7b3fdc | |||
8e4988f1a2 | |||
a7acf868a5 | |||
6f0be93747 | |||
af62de104f | |||
cd55c6ccd7 | |||
621a2800ca | |||
b80e2ebc8d | |||
99d537a5e0 | |||
8854f3fe78 | |||
abe8cb763f | |||
a0758dfa1a |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.01.14
|
||||
[debug] youtube-dl version 2017.02.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -6,8 +6,12 @@ python:
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
script: ./devscripts/run_tests.sh
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
|
11
AUTHORS
11
AUTHORS
@ -191,3 +191,14 @@ Rich Leeper
|
||||
Zhong Jianxin
|
||||
Thor77
|
||||
Mattias Wadman
|
||||
Arjan Verwer
|
||||
Costy Petrisor
|
||||
Logan B
|
||||
Alex Seiler
|
||||
Vijay Singh
|
||||
Paul Hartmann
|
||||
Stephen Chen
|
||||
Fabian Stahl
|
||||
Bagira
|
||||
Odd Stråbø
|
||||
Philip Herzog
|
||||
|
356
ChangeLog
356
ChangeLog
@ -1,3 +1,359 @@
|
||||
version 2017.02.24
|
||||
|
||||
Core
|
||||
* [options] Hide deprecated options from --help
|
||||
* [options] Deprecate --autonumber-size
|
||||
+ [YoutubeDL] Add support for string formatting operations in output template
|
||||
(#5185, #5748, #6841, #9929, #9966 #9978, #12189)
|
||||
|
||||
Extractors
|
||||
+ [lynda:course] Add webpage extraction fallback (#12238)
|
||||
* [go] Sign all uplynk URLs and use geo bypass only for free videos
|
||||
(#12087, #12210)
|
||||
+ [skylinewebcams] Add support for skylinewebcams.com (#12221)
|
||||
+ [instagram] Add support for multi video posts (#12226)
|
||||
+ [crunchyroll] Extract playlist entries ids
|
||||
* [mgtv] Fix extraction
|
||||
+ [sohu] Raise GeoRestrictedError
|
||||
+ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
|
||||
|
||||
|
||||
version 2017.02.22
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Fix descriptions with double quotes (#12124)
|
||||
* [dailymotion] Make comment count optional (#12209)
|
||||
+ [vidzi] Add support for vidzi.cc (#12213)
|
||||
+ [24video] Add support for 24video.tube (#12217)
|
||||
+ [crackle] Use geo bypass mechanism
|
||||
+ [viewster] Use geo verification headers
|
||||
+ [tfo] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [telequebec] Use geo bypass mechanism
|
||||
+ [limelight] Extract PlaylistService errors and improve geo restriction
|
||||
detection
|
||||
|
||||
|
||||
version 2017.02.21
|
||||
|
||||
Core
|
||||
* [extractor/common] Allow calling _initialize_geo_bypass from extractors
|
||||
(#11970)
|
||||
+ [adobepass] Add support for Time Warner Cable (#12191)
|
||||
+ [travis] Run tests in parallel
|
||||
+ [downloader/ism] Honor HTTP headers when downloading fragments
|
||||
+ [downloader/dash] Honor HTTP headers when downloading fragments
|
||||
+ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
|
||||
+ Add option --geo-bypass-country for explicit geo bypass on behalf of
|
||||
specified country
|
||||
+ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
|
||||
+ Add experimental geo restriction bypass mechanism based on faking
|
||||
X-Forwarded-For HTTP header
|
||||
+ [utils] Introduce GeoRestrictedError for geo restricted videos
|
||||
+ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
|
||||
|
||||
Extractors
|
||||
+ [ninecninemedia] Use geo bypass mechanism
|
||||
* [spankbang] Make uploader optional (#12193)
|
||||
+ [iprima] Improve geo restriction detection and disable geo bypass
|
||||
* [iprima] Modernize
|
||||
* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
|
||||
+ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
|
||||
* [nrk] Update _API_HOST and relax _VALID_URL
|
||||
+ [tv4] Bypass geo restriction and improve detection
|
||||
* [tv4] Switch to hls3 protocol (#12177)
|
||||
+ [viki] Improve geo restriction detection
|
||||
+ [vgtv] Improve geo restriction detection
|
||||
+ [srgssr] Improve geo restriction detection
|
||||
+ [vbox7] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [svt] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [pbs] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [nrk] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [itv] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [go] Improve geo restriction detection and use geo bypass mechanism
|
||||
+ [dramafever] Improve geo restriction detection and use geo bypass mechanism
|
||||
* [brightcove:legacy] Restrict videoPlayer value (#12040)
|
||||
+ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
|
||||
+ [thisav] Add support for HTML5 media (#11771)
|
||||
* [metacafe] Bypass family filter (#10371)
|
||||
* [viceland] Improve info extraction
|
||||
|
||||
|
||||
version 2017.02.17
|
||||
|
||||
Extractors
|
||||
* [heise] Improve extraction (#9725)
|
||||
* [ellentv] Improve (#11653)
|
||||
* [openload] Fix extraction (#10408, #12002)
|
||||
+ [theplatform] Recognize URLs with whitespaces (#12044)
|
||||
* [einthusan] Relax URL regular expression (#12141, #12159)
|
||||
+ [generic] Support complex JWPlayer embedded videos (#12030)
|
||||
* [elpais] Improve extraction (#12139)
|
||||
|
||||
|
||||
version 2017.02.16
|
||||
|
||||
Core
|
||||
+ [utils] Add support for quoted string literals in --match-filter (#8050,
|
||||
#12142, #12144)
|
||||
|
||||
Extractors
|
||||
* [ceskatelevize] Lower priority for audio description sources (#12119)
|
||||
* [amcnetworks] Fix extraction (#12127)
|
||||
* [pinkbike] Fix uploader extraction (#12054)
|
||||
+ [onetpl] Add support for businessinsider.com.pl and plejada.pl
|
||||
+ [onetpl] Add support for onet.pl (#10507)
|
||||
+ [onetmvp] Add shortcut extractor
|
||||
+ [vodpl] Add support for vod.pl (#12122)
|
||||
+ [pornhub] Extract video URL from tv platform site (#12007, #12129)
|
||||
+ [ceskatelevize] Extract DASH formats (#12119, #12133)
|
||||
|
||||
|
||||
version 2017.02.14
|
||||
|
||||
Core
|
||||
* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
|
||||
|
||||
Extractor
|
||||
* [zdf] Fix extraction (#12117)
|
||||
* [xtube] Fix extraction for both kinds of video id (#12088)
|
||||
* [xtube] Improve title extraction (#12088)
|
||||
+ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
|
||||
* [bellmedia] Allow video id longer than 6 characters (#12114)
|
||||
+ [limelight] Add support for referer protected videos
|
||||
* [disney] Improve extraction (#4975, #11000, #11882, #11936)
|
||||
* [hotstar] Improve extraction (#12096)
|
||||
* [einthusan] Fix extraction (#11416)
|
||||
+ [aenetworks] Add support for lifetimemovieclub.com (#12097)
|
||||
* [youtube] Fix parsing codecs (#12091)
|
||||
|
||||
|
||||
version 2017.02.11
|
||||
|
||||
Core
|
||||
+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
|
||||
utility functions
|
||||
+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
|
||||
|
||||
Extractor
|
||||
* [pluralsight:course] Fix extraction (#12075)
|
||||
+ [bbc] Extract m3u8 formats with 320k audio
|
||||
* [facebook] Relax video id matching (#11017, #12055, #12056)
|
||||
+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
|
||||
+ [pluralsight] Detect blocked account error message (#12070)
|
||||
+ [bloomberg] Add another video id pattern (#12062)
|
||||
* [extractor/commonmistakes] Restrict URL regular expression (#12050)
|
||||
+ [tvplayer] Add support for tvplayer.com
|
||||
|
||||
|
||||
version 2017.02.10
|
||||
|
||||
Extractors
|
||||
* [xtube] Fix extraction (#12023)
|
||||
* [pornhub] Fix extraction (#12007, #12018)
|
||||
* [facebook] Improve JS data regular expression (#12042)
|
||||
* [kaltura] Improve embed partner id extraction (#12041)
|
||||
+ [sprout] Add support for sproutonline.com
|
||||
* [6play] Improve extraction
|
||||
+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
|
||||
+ [go] Add support for Adobe Pass authentication (#11468, #10831)
|
||||
* [6play] Fix extraction (#12011)
|
||||
+ [nbc] Add support for Adobe Pass authentication (#12006)
|
||||
|
||||
|
||||
version 2017.02.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
|
||||
+ [downloader/fragment] Respect --no-part
|
||||
* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
|
||||
|
||||
Extractors
|
||||
* [pornhub] Fix extraction (#11997)
|
||||
+ [canalplus] Add support for cstar.fr (#11990)
|
||||
+ [extractor/generic] Improve RTMP support (#11993)
|
||||
+ [gaskrank] Add support for gaskrank.tv (#11685)
|
||||
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||
* [iwara] Fix extraction (#11781)
|
||||
* [googledrive] Fix extraction on Python 3.6
|
||||
+ [videopress] Add support for videopress.com
|
||||
+ [afreecatv] Extract RTMP formats
|
||||
|
||||
|
||||
version 2017.02.04.1
|
||||
|
||||
Extractors
|
||||
+ [twitch:stream] Add support for player.twitch.tv (#11971)
|
||||
* [radiocanada] Fix extraction for toutv rtmp formats
|
||||
|
||||
|
||||
version 2017.02.04
|
||||
|
||||
Core
|
||||
+ Add --playlist-random to shuffle playlists (#11889, #11901)
|
||||
* [utils] Improve comments processing in js_to_json (#11947)
|
||||
* [utils] Handle single-line comments in js_to_json
|
||||
* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
|
||||
|
||||
Extractors
|
||||
+ [piksel] Add another app token pattern (#11969)
|
||||
+ [vk] Capture and output author blocked error message (#11965)
|
||||
+ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
|
||||
#11800)
|
||||
+ [drtv] Add support for live and radio sections (#1827, #3427)
|
||||
* [myspace] Fix extraction and extract HLS and HTTP formats
|
||||
+ [youtube] Add format info for itag 325 and 328
|
||||
* [vine] Fix extraction (#11955)
|
||||
- [sportbox] Remove extractor (#11954)
|
||||
+ [filmon] Add support for filmon.com (#11187)
|
||||
+ [infoq] Add audio only formats (#11565)
|
||||
* [douyutv] Improve room id regular expression (#11931)
|
||||
* [iprima] Fix extraction (#11920, #11896)
|
||||
* [youtube] Fix ytsearch when cookies are provided (#11924)
|
||||
* [go] Relax video id regular expression (#11937)
|
||||
* [facebook] Fix title extraction (#11941)
|
||||
+ [youtube:playlist] Recognize TL playlists (#11945)
|
||||
+ [bilibili] Support new Bangumi URLs (#11845)
|
||||
+ [cbc:watch] Extract audio codec for audio only formats (#11893)
|
||||
+ [elpais] Fix extraction for some URLs (#11765)
|
||||
|
||||
|
||||
version 2017.02.01
|
||||
|
||||
Extractors
|
||||
+ [facebook] Add another fallback extraction scenario (#11926)
|
||||
* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
|
||||
- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
|
||||
+ [vimeo] Extract upload timestamp
|
||||
+ [vimeo] Extract license (#8726, #11880)
|
||||
+ [nrk:series] Add support for series (#11571, #11711)
|
||||
|
||||
|
||||
version 2017.01.31
|
||||
|
||||
Core
|
||||
+ [compat] Add compat_etree_register_namespace
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
|
||||
#11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
|
||||
#11913, #11914, #11915, #11916, #11917, #11918, #11919)
|
||||
+ [vimeo] Extract both mixed and separated DASH formats
|
||||
+ [ruutu] Extract DASH formats
|
||||
* [itv] Fix extraction for python 2.6
|
||||
|
||||
|
||||
version 2017.01.29
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix initialization template (#11605, #11825)
|
||||
+ [extractor/common] Document fragment_base_url and fragment's path fields
|
||||
* [extractor/common] Fix duration per DASH segment (#11868)
|
||||
+ Introduce --autonumber-start option for initial value of %(autonumber)s
|
||||
template (#727, #2702, #9362, #10457, #10529, #11862)
|
||||
|
||||
Extractors
|
||||
+ [azmedien:playlist] Add support for topic and themen playlists (#11817)
|
||||
* [npo] Fix subtitles extraction
|
||||
+ [itv] Extract subtitles
|
||||
+ [itv] Add support for itv.com (#9240)
|
||||
+ [mtv81] Add support for mtv81.com (#7619)
|
||||
+ [vlive] Add support for channels (#11826)
|
||||
+ [kaltura] Add fallback for fileExt
|
||||
+ [kaltura] Improve uploader_id extraction
|
||||
+ [konserthusetplay] Add support for rspoplay.se (#11828)
|
||||
|
||||
|
||||
version 2017.01.28
|
||||
|
||||
Core
|
||||
* [utils] Improve parse_duration
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Improve series and season metadata extraction (#11832)
|
||||
* [soundcloud] Improve formats extraction and extract audio bitrate
|
||||
+ [soundcloud] Extract HLS formats
|
||||
* [soundcloud] Fix track URL extraction (#11852)
|
||||
+ [twitch:vod] Expand URL regular expressions (#11846)
|
||||
* [aenetworks] Fix season episodes extraction (#11669)
|
||||
+ [tva] Add support for videos.tva.ca (#11842)
|
||||
* [jamendo] Improve and extract more metadata (#11836)
|
||||
+ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000)
|
||||
* [vevo] Remove request to old API and catch API v2 errors
|
||||
+ [cmt,mtv,southpark] Add support for episode URLs (#11837)
|
||||
+ [youtube] Add fallback for duration extraction (#11841)
|
||||
|
||||
|
||||
version 2017.01.25
|
||||
|
||||
Extractors
|
||||
+ [openload] Fallback video extension to mp4
|
||||
+ [extractor/generic] Add support for Openload embeds (#11536, #11812)
|
||||
* [srgssr] Fix rts video extraction (#11831)
|
||||
+ [afreecatv:global] Add support for afreeca.tv (#11807)
|
||||
+ [crackle] Extract vtt subtitles
|
||||
+ [crackle] Extract multiple resolutions for thumbnails
|
||||
+ [crackle] Add support for mobile URLs
|
||||
+ [konserthusetplay] Extract subtitles (#11823)
|
||||
+ [konserthusetplay] Add support for HLS videos (#11823)
|
||||
* [vimeo:review] Fix config URL extraction (#11821)
|
||||
|
||||
|
||||
version 2017.01.24
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Fix extraction (#11820)
|
||||
+ [nextmedia] Add support for NextTV (壹電視)
|
||||
* [24video] Fix extraction (#11811)
|
||||
* [youtube:playlist] Fix nonexistent and private playlist detection (#11604)
|
||||
+ [chirbit] Extract uploader (#11809)
|
||||
|
||||
|
||||
version 2017.01.22
|
||||
|
||||
Extractors
|
||||
+ [pornflip] Add support for pornflip.com (#11556, #11795)
|
||||
* [chaturbate] Fix extraction (#11797, #11802)
|
||||
+ [azmedien] Add support for AZ Medien sites (#11784, #11785)
|
||||
+ [nextmedia] Support redirected URLs
|
||||
+ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
|
||||
+ [youtube] Extract episode metadata (#9695, #11774)
|
||||
+ [cspan] Support Ustream embedded videos (#11547)
|
||||
+ [1tv] Add support for HLS videos (#11786)
|
||||
* [uol] Fix extraction (#11770)
|
||||
* [mtv] Relax triforce feed regular expression (#11766)
|
||||
|
||||
|
||||
version 2017.01.18
|
||||
|
||||
Extractors
|
||||
* [bilibili] Fix extraction (#11077)
|
||||
+ [canalplus] Add fallback for video id (#11764)
|
||||
* [20min] Fix extraction (#11683, #11751)
|
||||
* [imdb] Extend URL regular expression (#11744)
|
||||
+ [naver] Add support for tv.naver.com links (#11743)
|
||||
|
||||
|
||||
version 2017.01.16
|
||||
|
||||
Core
|
||||
* [options] Apply custom config to final composite configuration (#11741)
|
||||
* [YoutubeDL] Improve protocol auto determining (#11720)
|
||||
|
||||
Extractors
|
||||
* [xiami] Relax URL regular expressions
|
||||
* [xiami] Improve track metadata extraction (#11699)
|
||||
+ [limelight] Check hand-make direct HTTP links
|
||||
+ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
|
||||
+ [brightcove] Recognize another player ID pattern (#11688)
|
||||
+ [niconico] Support login via cookies (#7968)
|
||||
* [yourupload] Fix extraction (#11601)
|
||||
+ [beam:live] Add support for beam.pro live streams (#10702, #11596)
|
||||
* [vevo] Improve geo restriction detection
|
||||
+ [dramafever] Add support for URLs with language code (#11714)
|
||||
* [cbc] Improve playlist support (#11704)
|
||||
|
||||
|
||||
version 2017.01.14
|
||||
|
||||
Core
|
||||
|
196
README.md
196
README.md
@ -88,8 +88,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@ -99,16 +97,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
(experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
|
||||
## Geo Restriction:
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
actual downloading. (experimental)
|
||||
actual downloading.
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header (experimental)
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
(experimental)
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
explicitly provided two-letter ISO 3166-2
|
||||
country code (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
@ -139,16 +144,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter (experimental).
|
||||
Specify any key (see help for -o for a list
|
||||
of available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present,key > NUMBER (like "comment_count >
|
||||
12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, and & to require
|
||||
multiple matches. Values which are not
|
||||
known are excluded unless you put a
|
||||
question mark (?) after the operator.For
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
help for -o for a list of available keys)
|
||||
to match if the key is present, !key to
|
||||
check if the key is not present, key >
|
||||
NUMBER (like "comment_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, key = 'LITERAL' (like
|
||||
"uploader = 'Mike Smith'", also works with
|
||||
!=) to match against a string literal and &
|
||||
to require multiple matches. Values which
|
||||
are not known are excluded unless you put a
|
||||
question mark (?) after the operator. For
|
||||
example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
@ -178,6 +185,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@ -185,6 +194,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
@ -207,19 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
--autonumber-size NUMBER Specify the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
is given
|
||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||
(default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-A, --auto-number [deprecated; use -o
|
||||
"%(autonumber)s-%(title)s.%(ext)s" ] Number
|
||||
downloaded files starting from 00000
|
||||
-t, --title [deprecated] Use title in file name
|
||||
(default)
|
||||
-l, --literal [deprecated] Alias of --title
|
||||
-w, --no-overwrites Do not overwrite files
|
||||
-c, --continue Force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
@ -374,7 +376,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"vorbis", "mp3", "m4a", "opus", or "wav";
|
||||
"best" by default
|
||||
"best" by default; No effect without -x
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||
a value between 0 (better) and 9 (worse)
|
||||
for VBR or a specific bitrate like 128K
|
||||
@ -474,87 +476,89 @@ The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||
|
||||
- `id`: Video identifier
|
||||
- `title`: Video title
|
||||
- `url`: Video URL
|
||||
- `ext`: Video filename extension
|
||||
- `alt_title`: A secondary title of the video
|
||||
- `display_id`: An alternative identifier for the video
|
||||
- `uploader`: Full name of the video uploader
|
||||
- `license`: License name the video is licensed under
|
||||
- `creator`: The creator of the video
|
||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||
- `upload_date`: Video upload date (YYYYMMDD)
|
||||
- `uploader_id`: Nickname or id of the video uploader
|
||||
- `location`: Physical location where the video was filmed
|
||||
- `duration`: Length of the video in seconds
|
||||
- `view_count`: How many users have watched the video on the platform
|
||||
- `like_count`: Number of positive ratings of the video
|
||||
- `dislike_count`: Number of negative ratings of the video
|
||||
- `repost_count`: Number of reposts of the video
|
||||
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count`: Number of comments on the video
|
||||
- `age_limit`: Age restriction for the video (years)
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_id`: Format code specified by `--format`
|
||||
- `format_note`: Additional info about the format
|
||||
- `width`: Width of the video
|
||||
- `height`: Height of the video
|
||||
- `resolution`: Textual description of width and height
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `fps`: Frame rate
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `protocol`: The protocol that will be used for the actual download
|
||||
- `extractor`: Name of the extractor
|
||||
- `extractor_key`: Key name of the extractor
|
||||
- `epoch`: Unix epoch when creating the file
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id`: Playlist identifier
|
||||
- `playlist_title`: Playlist title
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
- `display_id` (string): An alternative identifier for the video
|
||||
- `uploader` (string): Full name of the video uploader
|
||||
- `license` (string): License name the video is licensed under
|
||||
- `creator` (string): The creator of the video
|
||||
- `release_date` (string): The date (YYYYMMDD) when the video was released
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `view_count` (numeric): How many users have watched the video on the platform
|
||||
- `like_count` (numeric): Number of positive ratings of the video
|
||||
- `dislike_count` (numeric): Number of negative ratings of the video
|
||||
- `repost_count` (numeric): Number of reposts of the video
|
||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count` (numeric): Number of comments on the video
|
||||
- `age_limit` (numeric): Age restriction for the video (years)
|
||||
- `format` (string): A human-readable description of the format
|
||||
- `format_id` (string): Format code specified by `--format`
|
||||
- `format_note` (string): Additional info about the format
|
||||
- `width` (numeric): Width of the video
|
||||
- `height` (numeric): Height of the video
|
||||
- `resolution` (string): Textual description of width and height
|
||||
- `tbr` (numeric): Average bitrate of audio and video in KBit/s
|
||||
- `abr` (numeric): Average audio bitrate in KBit/s
|
||||
- `acodec` (string): Name of the audio codec in use
|
||||
- `asr` (numeric): Audio sampling rate in Hertz
|
||||
- `vbr` (numeric): Average video bitrate in KBit/s
|
||||
- `fps` (numeric): Frame rate
|
||||
- `vcodec` (string): Name of the video codec in use
|
||||
- `container` (string): Name of the container format
|
||||
- `filesize` (numeric): The number of bytes, if known in advance
|
||||
- `filesize_approx` (numeric): An estimate for the number of bytes
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch when creating the file
|
||||
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist` (string): Name or id of the playlist that contains the video
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
- `chapter_number`: Number of the chapter the video belongs to
|
||||
- `chapter_id`: Id of the chapter the video belongs to
|
||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
- `series`: Title of the series or programme the video episode belongs to
|
||||
- `season`: Title of the season the video episode belongs to
|
||||
- `season_number`: Number of the season the video episode belongs to
|
||||
- `season_id`: Id of the season the video episode belongs to
|
||||
- `episode`: Title of the video episode
|
||||
- `episode_number`: Number of the video episode within a season
|
||||
- `episode_id`: Id of the video episode
|
||||
- `series` (string): Title of the series or programme the video episode belongs to
|
||||
- `season` (string): Title of the season the video episode belongs to
|
||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||
- `season_id` (string): Id of the season the video episode belongs to
|
||||
- `episode` (string): Title of the video episode
|
||||
- `episode_number` (numeric): Number of the video episode within a season
|
||||
- `episode_id` (string): Id of the video episode
|
||||
|
||||
Available for the media that is a track or a part of a music album:
|
||||
- `track`: Title of the track
|
||||
- `track_number`: Number of the track within an album or a disc
|
||||
- `track_id`: Id of the track
|
||||
- `artist`: Artist(s) of the track
|
||||
- `genre`: Genre(s) of the track
|
||||
- `album`: Title of the album the track belongs to
|
||||
- `album_type`: Type of the album
|
||||
- `album_artist`: List of all artists appeared on the album
|
||||
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
||||
- `release_year`: Year (YYYY) when the album was released
|
||||
- `track` (string): Title of the track
|
||||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
- `artist` (string): Artist(s) of the track
|
||||
- `genre` (string): Genre(s) of the track
|
||||
- `album` (string): Title of the album the track belongs to
|
||||
- `album_type` (string): Type of the album
|
||||
- `album_artist` (string): List of all artists appeared on the album
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||
|
||||
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
|
||||
@ -841,7 +845,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
from inspect import getsource
|
||||
import io
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
@ -95,5 +96,5 @@ module_contents.append(
|
||||
|
||||
module_src = '\n'.join(module_contents) + '\n'
|
||||
|
||||
with open(lazy_extractors_filename, 'wt') as f:
|
||||
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
||||
f.write(module_src)
|
||||
|
21
devscripts/run_tests.sh
Executable file
21
devscripts/run_tests.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
|
||||
|
||||
test_set=""
|
||||
multiprocess_args=""
|
||||
|
||||
case "$YTDL_TEST_SET" in
|
||||
core)
|
||||
test_set="-I test_($DOWNLOAD_TESTS)\.py"
|
||||
;;
|
||||
download)
|
||||
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||
multiprocess_args="--processes=4 --process-timeout=540"
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
|
||||
nosetests test --verbose $test_set $multiprocess_args
|
@ -11,6 +11,7 @@
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
@ -33,7 +34,8 @@
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:global**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
@ -74,6 +76,8 @@
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@ -81,11 +85,13 @@
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **Beam:live**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@ -163,6 +169,7 @@
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
@ -198,6 +205,7 @@
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **Disney**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@ -206,7 +214,8 @@
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **drtv**
|
||||
- **drtv:live**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
@ -242,6 +251,8 @@
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **fernsehkritik.tv**
|
||||
- **filmon**
|
||||
- **filmon:channel**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
@ -273,6 +284,7 @@
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gaskrank**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
@ -298,7 +310,6 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **hgtv.com:show**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
@ -332,6 +343,7 @@
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITV**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
@ -440,6 +452,7 @@
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv81**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
@ -482,6 +495,7 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **NextTV**: 壹電視
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
@ -520,6 +534,7 @@
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
@ -531,8 +546,10 @@
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **OnDemandKorea**
|
||||
- **onet.pl**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnetMVP**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
@ -571,6 +588,7 @@
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PornCom**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
@ -652,6 +670,7 @@
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **scrippsnetworks:watch**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
@ -661,7 +680,7 @@
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
- **SkylineWebcams**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
@ -693,10 +712,10 @@
|
||||
- **Spiegeltv**
|
||||
- **Spike**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
@ -779,16 +798,19 @@
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVN24**
|
||||
- **TVNoe**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@ -845,6 +867,7 @@
|
||||
- **videomore:season**
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **vidme**
|
||||
@ -879,7 +902,9 @@
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
|
5
setup.py
5
setup.py
@ -107,8 +107,8 @@ setup(
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Philipp Hagemeister',
|
||||
maintainer_email='phihag@phihag.de',
|
||||
maintainer='Sergey M.',
|
||||
maintainer_email='dstftw@gmail.com',
|
||||
packages=[
|
||||
'youtube_dl',
|
||||
'youtube_dl.extractor', 'youtube_dl.downloader',
|
||||
@ -130,6 +130,7 @@ setup(
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -525,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'id': '1234',
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
'height': 1080,
|
||||
}
|
||||
|
||||
def fname(templ):
|
||||
@ -534,16 +536,29 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
||||
# Replace missing fields with 'NA'
|
||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
||||
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
||||
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
||||
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
||||
self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
|
||||
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
|
||||
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
|
||||
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
|
||||
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
|
||||
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
|
||||
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
|
||||
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
|
||||
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
|
||||
|
||||
def test_format_note(self):
|
||||
ydl = YoutubeDL()
|
||||
self.assertEqual(ydl._format_note({}), '')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'vbr': 10,
|
||||
}), '^\s*10k$')
|
||||
}), r'^\s*10k$')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'fps': 30,
|
||||
}), '^30fps$')
|
||||
}), r'^30fps$')
|
||||
|
||||
def test_postprocessors(self):
|
||||
filename = 'post-processor-testfile.mp4'
|
||||
@ -606,6 +621,8 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
'playlist_id': '42',
|
||||
'uploader': "變態妍字幕版 太妍 тест",
|
||||
'creator': "тест ' 123 ' тест--",
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
@ -616,6 +633,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
'playlist_id': '43',
|
||||
'uploader': "тест 123",
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
@ -656,6 +674,26 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('creator = "тест \' 123 \' тест--"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, [])
|
||||
|
||||
def test_playlist_items_selection(self):
|
||||
entries = [{
|
||||
'id': compat_str(i),
|
||||
|
@ -65,6 +65,10 @@ defs = gettestcases()
|
||||
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
# Parallel testing in nosetests. See
|
||||
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
|
||||
_multiprocess_shared_ = True
|
||||
|
||||
maxDiff = None
|
||||
|
||||
def setUp(self):
|
||||
@ -73,7 +77,7 @@ class TestDownload(unittest.TestCase):
|
||||
# Dynamically generate tests
|
||||
|
||||
|
||||
def generator(test_case):
|
||||
def generator(test_case, tname):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
@ -102,6 +106,7 @@ def generator(test_case):
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('skip_download', True)
|
||||
@ -146,7 +151,7 @@ def generator(test_case):
|
||||
raise
|
||||
|
||||
if try_num == RETRIES:
|
||||
report_warning('Failed due to network errors, skipping...')
|
||||
report_warning('%s failed due to network errors, skipping...' % tname)
|
||||
return
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||
@ -221,12 +226,12 @@ def generator(test_case):
|
||||
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
test_method = generator(test_case)
|
||||
tname = 'test_' + str(test_case['name'])
|
||||
i = 1
|
||||
while hasattr(TestDownload, tname):
|
||||
tname = 'test_%s_%d' % (test_case['name'], i)
|
||||
i += 1
|
||||
test_method = generator(test_case, tname)
|
||||
test_method.__name__ = str(tname)
|
||||
setattr(TestDownload, test_method.__name__, test_method)
|
||||
del test_method
|
||||
|
@ -34,6 +34,9 @@ from youtube_dl.utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@ -510,6 +513,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@ -784,12 +788,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{ 0: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ 0: // comment\n1 }')
|
||||
self.assertEqual(json.loads(on), {'0': 1})
|
||||
|
||||
on = js_to_json(r'["<p>x<\/p>"]')
|
||||
self.assertEqual(json.loads(on), ['<p>x</p>'])
|
||||
|
||||
@ -799,15 +818,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json("['a\\\nb']")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json('{0xff:0xff}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{077:077}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{42:42}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
@ -1096,6 +1127,32 @@ The first line
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
def test_get_element_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||
|
||||
def test_get_elements_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||
|
||||
def test_get_elements_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -24,6 +24,7 @@ import sys
|
||||
import time
|
||||
import tokenize
|
||||
import traceback
|
||||
import random
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
@ -32,6 +33,7 @@ from .compat import (
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_numeric_types,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_tokenize_tokenize,
|
||||
@ -55,6 +57,8 @@ from .utils import (
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
ISO3166Utils,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
@ -159,6 +163,7 @@ class YoutubeDL(object):
|
||||
playlistend: Playlist item to end at.
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistreverse: Download playlist items in reverse order.
|
||||
playlistrandom: Download playlist items in random order.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
@ -270,6 +275,12 @@ class YoutubeDL(object):
|
||||
If it returns None, the video is downloaded.
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||
HTTP header (experimental)
|
||||
geo_bypass_country:
|
||||
Two-letter ISO 3166-2 country code that will be used for
|
||||
explicit geographic restriction bypassing via faking
|
||||
X-Forwarded-For HTTP header (experimental)
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
@ -317,11 +328,21 @@ class YoutubeDL(object):
|
||||
self.params.update(params)
|
||||
self.cache = Cache(self)
|
||||
|
||||
if self.params.get('cn_verification_proxy') is not None:
|
||||
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
|
||||
def check_deprecated(param, option, suggestion):
|
||||
if self.params.get(param) is not None:
|
||||
self.report_warning(
|
||||
'%s is deprecated. Use %s instead.' % (option, suggestion))
|
||||
return True
|
||||
return False
|
||||
|
||||
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
||||
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
|
||||
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
|
||||
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
@ -583,10 +604,7 @@ class YoutubeDL(object):
|
||||
autonumber_size = self.params.get('autonumber_size')
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
|
||||
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||
if template_dict.get('resolution') is None:
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
@ -599,12 +617,61 @@ class YoutubeDL(object):
|
||||
compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k == 'id'))
|
||||
template_dict = dict((k, sanitize(k, v))
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
|
||||
# For fields playlist_index and autonumber convert all occurrences
|
||||
# of %(field)s to %(field)0Nd for backward compatibility
|
||||
field_size_compat_map = {
|
||||
'playlist_index': len(str(template_dict['n_entries'])),
|
||||
'autonumber': autonumber_size,
|
||||
}
|
||||
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
||||
if mobj:
|
||||
outtmpl = re.sub(
|
||||
FIELD_SIZE_COMPAT_RE,
|
||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||
outtmpl)
|
||||
|
||||
NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
'playlist_index',
|
||||
))
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
for numeric_field in NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
||||
FORMAT_RE = r'''(?x)
|
||||
(?<!%)
|
||||
%
|
||||
\({0}\) # mapping key
|
||||
(?:[#0\-+ ]+)? # conversion flags (optional)
|
||||
(?:\d+)? # minimum field width (optional)
|
||||
(?:\.\d+)? # precision (optional)
|
||||
[hlL]? # length modifier (optional)
|
||||
[diouxXeEfFgGcrs%] # conversion type
|
||||
'''
|
||||
outtmpl = re.sub(
|
||||
FORMAT_RE.format(numeric_field),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
@ -705,6 +772,14 @@ class YoutubeDL(object):
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
msg += '\nThis video is available in %s.' % ', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
break
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
break
|
||||
@ -842,8 +917,17 @@ class YoutubeDL(object):
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
@ -1228,6 +1312,11 @@ class YoutubeDL(object):
|
||||
if cookies:
|
||||
res['Cookie'] = cookies
|
||||
|
||||
if 'X-Forwarded-For' not in res:
|
||||
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||
if x_forwarded_for_ip:
|
||||
res['X-Forwarded-For'] = x_forwarded_for_ip
|
||||
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
@ -1363,13 +1452,16 @@ class YoutubeDL(object):
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Automatically determine protocol if missing (useful for format
|
||||
# selection purposes)
|
||||
if 'protocol' not in format:
|
||||
if format.get('protocol') is None:
|
||||
format['protocol'] = determine_protocol(format)
|
||||
# Add HTTP headers, so that external programs can use them from the
|
||||
# json output
|
||||
full_format_info = info_dict.copy()
|
||||
full_format_info.update(format)
|
||||
format['http_headers'] = self._calc_headers(full_format_info)
|
||||
# Remove private housekeeping stuff
|
||||
if '__x_forwarded_for_ip' in info_dict:
|
||||
del info_dict['__x_forwarded_for_ip']
|
||||
|
||||
# TODO Central sorting goes here
|
||||
|
||||
|
@ -133,6 +133,12 @@ def _real_main(argv=None):
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.autonumber_size is not None:
|
||||
if opts.autonumber_size <= 0:
|
||||
parser.error('auto number size must be positive')
|
||||
if opts.autonumber_start is not None:
|
||||
if opts.autonumber_start < 0:
|
||||
parser.error('auto number start must be positive or 0')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
@ -321,6 +327,7 @@ def _real_main(argv=None):
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
@ -337,6 +344,7 @@ def _real_main(argv=None):
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
'playlistrandom': opts.playlist_random,
|
||||
'noplaylist': opts.noplaylist,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
'consoletitle': opts.consoletitle,
|
||||
@ -406,6 +414,11 @@ def _real_main(argv=None):
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
'config_location': opts.config_location,
|
||||
'geo_bypass': opts.geo_bypass,
|
||||
'geo_bypass_country': opts.geo_bypass_country,
|
||||
# just for deprecation check
|
||||
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
@ -2529,6 +2529,24 @@ else:
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
if hasattr(etree, 'register_namespace'):
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
else:
|
||||
def compat_etree_register_namespace(prefix, uri):
|
||||
"""Register a namespace prefix.
|
||||
The registry is global, and any existing mapping for either the
|
||||
given prefix or the namespace URI will be removed.
|
||||
*prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
|
||||
attributes in this namespace will be serialized with prefix if possible.
|
||||
ValueError is raised if prefix is reserved or is invalid.
|
||||
"""
|
||||
if re.match(r"ns\d+$", prefix):
|
||||
raise ValueError("Prefix format reserved for internal use")
|
||||
for k, v in list(etree._namespace_map.items()):
|
||||
if k == uri or v == prefix:
|
||||
del etree._namespace_map[k]
|
||||
etree._namespace_map[uri] = prefix
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
@ -2742,6 +2760,10 @@ else:
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
|
||||
|
||||
compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3
|
||||
else (int, float, complex))
|
||||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
@ -2865,6 +2887,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
@ -2876,6 +2899,7 @@ __all__ = [
|
||||
'compat_input',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
|
@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
)
|
||||
|
||||
|
||||
@ -198,6 +199,15 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||
# can cause problems in some cases
|
||||
# https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
@ -264,7 +274,9 @@ class FFmpegFD(ExternalFD):
|
||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
args += ['-f', 'mp4']
|
||||
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
else:
|
||||
|
@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'nopart': self.params.get('nopart', False),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
@ -238,7 +238,10 @@ class IsmFD(FragmentFD):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
|
@ -31,6 +31,11 @@ MSO_INFO = {
|
||||
'username_field': 'user',
|
||||
'password_field': 'passwd',
|
||||
},
|
||||
'TWC': {
|
||||
'name': 'Time Warner Cable | Spectrum',
|
||||
'username_field': 'Ecom_User_ID',
|
||||
'password_field': 'Ecom_Password',
|
||||
},
|
||||
'thr030': {
|
||||
'name': '3 Rivers Communications'
|
||||
},
|
||||
|
@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||
@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
@ -87,7 +91,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@ -143,3 +144,107 @@ class AfreecaTVIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVGlobalIE(AfreecaTVIE):
|
||||
IE_NAME = 'afreecatv:global'
|
||||
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://afreeca.tv/36853014/v/58301',
|
||||
'info_dict': {
|
||||
'id': '58301',
|
||||
'title': 'tryhard top100',
|
||||
'uploader_id': '36853014',
|
||||
'uploader': 'makgi Hearthstone Live!',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_type = 'video' if video_id else 'live'
|
||||
query = {
|
||||
'pt': 'view',
|
||||
'bid': channel_id,
|
||||
}
|
||||
if video_id:
|
||||
query['vno'] = video_id
|
||||
video_data = self._download_json(
|
||||
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
|
||||
video_id or channel_id, query=query)['channel']
|
||||
|
||||
if video_data.get('result') != 1:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
info = {
|
||||
'thumbnail': video_data.get('thumb'),
|
||||
'view_count': int_or_none(video_data.get('vcnt')),
|
||||
'age_limit': int_or_none(video_data.get('grade')),
|
||||
'uploader_id': channel_id,
|
||||
'uploader': video_data.get('cname'),
|
||||
}
|
||||
|
||||
if video_id:
|
||||
entries = []
|
||||
for i, f in enumerate(video_data.get('flist', [])):
|
||||
video_key = self.parse_video_key(f.get('key', ''))
|
||||
f_url = f.get('file')
|
||||
if not video_key or not f_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(f.get('length')),
|
||||
'url': f_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
i = entries[0].copy()
|
||||
i.update(info)
|
||||
info = i
|
||||
else:
|
||||
formats = []
|
||||
for s in video_data.get('strm', []):
|
||||
s_url = s.get('purl')
|
||||
if not s_url:
|
||||
continue
|
||||
stype = s.get('stype')
|
||||
if stype == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||
elif stype == 'RTMP':
|
||||
format_id = [stype]
|
||||
label = s.get('label')
|
||||
if label:
|
||||
format_id.append(label)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': s_url,
|
||||
'tbr': int_or_none(s.get('bps')),
|
||||
'height': int_or_none(s.get('brt')),
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': channel_id,
|
||||
'title': self._live_title(title),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
media_url = self._search_regex(
|
||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
r'link\.theplatform\.com/s/([^?]+)',
|
||||
media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
auth_required = self._search_regex(
|
||||
r'window\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
requestor_id = self._search_regex(
|
||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
|
@ -1,13 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
|
172
youtube_dl/extractor/azmedien.py
Normal file
172
youtube_dl/extractor/azmedien.py
Normal file
@ -0,0 +1,172 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'ext': 'mov',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
entries = []
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# not all tracks have songs
|
||||
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||
'info_dict': {
|
||||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
|
@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
|
73
youtube_dl/extractor/beampro.py
Normal file
73
youtube_dl/extractor/beampro.py
Normal file
@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class BeamProLiveIE(InfoExtractor):
|
||||
IE_NAME = 'Beam:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
_TEST = {
|
||||
'url': 'http://www.beam.pro/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||
'thumbnail': r're:https://.*\.jpg$',
|
||||
'timestamp': 1483477281,
|
||||
'upload_date': '20170103',
|
||||
'uploader': 'niterhayven',
|
||||
'uploader_id': '373396',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'niterhayven is offline',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
'{0} is offline'.format(channel_name), expected=True)
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
|
||||
return {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
|
||||
space
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
|
@ -5,19 +5,27 @@ import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor):
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}]
|
||||
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
|
||||
elif 'code' in result:
|
||||
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Can\'t extract Bangumi episode ID')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
anime_id = mobj.group('anime_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/v' not in url:
|
||||
if 'anime/' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
headers=headers)
|
||||
if 'result' not in js:
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
video_id, note='Downloading video info page',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
||||
entries = []
|
||||
|
||||
@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
|
||||
|
||||
IE_NAME = 'bangumi.bilibili.com'
|
||||
IE_DESC = 'BiliBili番剧'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '91da8621454dd58316851c27c68b0c13',
|
||||
'info_dict': {
|
||||
'id': '40062',
|
||||
'ext': 'mp4',
|
||||
'title': '混沌武士',
|
||||
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
|
||||
'timestamp': 1414538739,
|
||||
'upload_date': '20141028',
|
||||
'episode': '疾风怒涛 Tempestuous Temperaments',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlist_items': '1',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
bangumi_id = self._match_id(url)
|
||||
|
||||
# Sometimes this API returns a JSONP response
|
||||
season_info = self._download_json(
|
||||
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
|
||||
bangumi_id, transform_source=strip_jsonp)['result']
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
|
||||
'episode': episode.get('index_title'),
|
||||
'episode_number': int_or_none(episode.get('index')),
|
||||
} for episode in season_info['episodes']]
|
||||
|
||||
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# data-bmmrid=
|
||||
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='url', default=None)
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='id', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
|
@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
params = {}
|
||||
|
||||
playerID = find_param('playerID')
|
||||
playerID = find_param('playerID') or find_param('playerId')
|
||||
if playerID is None:
|
||||
raise ExtractorError('Cannot find player ID')
|
||||
params['playerID'] = playerID
|
||||
@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
# These fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||
if videoPlayer is not None:
|
||||
if isinstance(videoPlayer, list):
|
||||
videoPlayer = videoPlayer[0]
|
||||
if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')):
|
||||
return None
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
|
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:(?:football|www)\.)?cstar\.fr|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'cstar': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
@ -86,6 +88,19 @@ class CanalplusIE(InfoExtractor):
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||
'info_dict': {
|
||||
'id': '1416769',
|
||||
'display_id': 'pid7566-feminines-videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||
'upload_date': '20160921',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
@ -107,7 +122,7 @@ class CanalplusIE(InfoExtractor):
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||
r'data-video=["\'](?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
webpage, 'video id', default=mobj.group('vid'), group='id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
|
@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_player_init(self, player_init, display_id):
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_init = self._search_regex(
|
||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||
default=None)
|
||||
if player_init:
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
entries = [
|
||||
self._extract_player_init(player_init, display_id)
|
||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||
entries.extend([
|
||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
self._og_search_title(webpage, fatal=False),
|
||||
self._og_search_description(webpage))
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
@ -283,6 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
for f in formats:
|
||||
format_id = f.get('format_id')
|
||||
if format_id.startswith('AAC'):
|
||||
f['acodec'] = 'aac'
|
||||
elif format_id.startswith('AC3'):
|
||||
f['acodec'] = 'ac-3'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
@ -21,10 +22,10 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '61924494876951776',
|
||||
'id': '61924494877246241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace',
|
||||
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
|
||||
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
@ -114,70 +115,100 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id)
|
||||
|
||||
playlist_url = playlistpage['url']
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id)['playlist']
|
||||
playlist_len = len(playlist)
|
||||
|
||||
entries = []
|
||||
for item in playlist:
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item['streamUrls'].items():
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
title = item['title']
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
thumbnail = item.get('previewImageUrl')
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
|
||||
subtitles = {}
|
||||
if item.get('type') == 'VOD':
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles = self.extract_subtitles(episode_id, subs)
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
if playlist_len == 1:
|
||||
final_title = playlist_title or title
|
||||
if is_live:
|
||||
final_title = self._live_title(final_title)
|
||||
else:
|
||||
final_title = '%s (%s)' % (playlist_title, title)
|
||||
if not playlistpage:
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': final_title,
|
||||
'description': playlist_description if playlist_len == 1 else None,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
})
|
||||
playlist_url = playlistpage['url']
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
|
||||
playlist = playlist.get('playlist')
|
||||
if not isinstance(playlist, list):
|
||||
continue
|
||||
|
||||
playlist_len = len(playlist)
|
||||
|
||||
for num, item in enumerate(playlist):
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls-%s' % format_id, fatal=False)
|
||||
else:
|
||||
stream_formats = self._extract_mpd_formats(
|
||||
stream_url, playlist_id,
|
||||
mpd_id='dash-%s' % format_id, fatal=False)
|
||||
# See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
|
||||
if format_id == 'audioDescription':
|
||||
for f in stream_formats:
|
||||
f['source_preference'] = -10
|
||||
formats.extend(stream_formats)
|
||||
|
||||
if user_agent and len(entries) == playlist_len:
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
title = item['title']
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
thumbnail = item.get('previewImageUrl')
|
||||
|
||||
subtitles = {}
|
||||
if item.get('type') == 'VOD':
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles = self.extract_subtitles(episode_id, subs)
|
||||
|
||||
if playlist_len == 1:
|
||||
final_title = playlist_title or title
|
||||
if is_live:
|
||||
final_title = self._live_title(final_title)
|
||||
else:
|
||||
final_title = '%s (%s)' % (playlist_title, title)
|
||||
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': final_title,
|
||||
'description': playlist_description if playlist_len == 1 else None,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
})
|
||||
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
@ -31,30 +33,35 @@ class ChaturbateIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage,
|
||||
'playlist', default=None, group='url')
|
||||
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
|
||||
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
|
||||
|
||||
if not m3u8_url:
|
||||
if not m3u8_formats:
|
||||
error = self._search_regex(
|
||||
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
||||
webpage, 'error', group='error', default=None)
|
||||
if not error:
|
||||
if any(p not in webpage for p in (
|
||||
if any(p in webpage for p in (
|
||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||
error = self._ROOM_OFFLINE
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
formats = []
|
||||
for m3u8_id, m3u8_url in m3u8_formats:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4',
|
||||
# ffmpeg skips segments for fast m3u8
|
||||
preference=-10 if m3u8_id == 'fast' else None,
|
||||
m3u8_id=m3u8_id, fatal=False, live=True))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(video_id),
|
||||
'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id,
|
||||
'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
|
@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor):
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
'uploader': 'Gerryaudio',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor):
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ from .mtv import MTVIE
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
|
@ -6,6 +6,7 @@ import hashlib
|
||||
import json
|
||||
import netrc
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
@ -39,7 +40,10 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@ -121,9 +125,19 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* fragment_base_url
|
||||
Base URL for fragments. Each fragment's path
|
||||
value (if present) will be relative to
|
||||
this URL.
|
||||
* fragments A list of fragments of a fragmented media.
|
||||
Each fragment entry must contain either an url
|
||||
or a path. If an url is present it should be
|
||||
considered by a client. Otherwise both path and
|
||||
fragment_base_url must be present. Here is
|
||||
the list of all potential fields:
|
||||
* "url" - fragment's URL
|
||||
* "path" - fragment's path relative to
|
||||
fragment_base_url
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
@ -309,17 +323,34 @@ class InfoExtractor(object):
|
||||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_GEO_BYPASS attribute may be set to False in order to disable
|
||||
geo restriction bypass mechanisms for a particular extractor.
|
||||
Though it won't disable explicit geo restriction bypass based on
|
||||
country code provided with geo_bypass_country. (experimental)
|
||||
|
||||
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
||||
countries for this extractor. One of these countries will be used by
|
||||
geo restriction bypass mechanism right away in order to bypass
|
||||
geo restriction, of course, if the mechanism is not disabled. (experimental)
|
||||
|
||||
NB: both these geo attributes are experimental and may change in future
|
||||
or be completely removed.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
|
||||
_ready = False
|
||||
_downloader = None
|
||||
_x_forwarded_for_ip = None
|
||||
_GEO_BYPASS = True
|
||||
_GEO_COUNTRIES = None
|
||||
_WORKING = True
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
self._ready = False
|
||||
self._x_forwarded_for_ip = None
|
||||
self.set_downloader(downloader)
|
||||
|
||||
@classmethod
|
||||
@ -348,15 +379,59 @@ class InfoExtractor(object):
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
self._initialize_geo_bypass(self._GEO_COUNTRIES)
|
||||
if not self._ready:
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
||||
def _initialize_geo_bypass(self, countries):
|
||||
"""
|
||||
Initialize geo restriction bypass mechanism.
|
||||
|
||||
This method is used to initialize geo bypass mechanism based on faking
|
||||
X-Forwarded-For HTTP header. A random country from provided country list
|
||||
is selected and a random IP belonging to this country is generated. This
|
||||
IP will be passed as X-Forwarded-For HTTP header in all subsequent
|
||||
HTTP requests.
|
||||
|
||||
This method will be used for initial geo bypass mechanism initialization
|
||||
during the instance initialization with _GEO_COUNTRIES.
|
||||
|
||||
You may also manually call it from extractor's code if geo countries
|
||||
information is not available beforehand (e.g. obtained during
|
||||
extraction) or due to some another reason.
|
||||
"""
|
||||
if not self._x_forwarded_for_ip:
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
# If there is no explicit country for geo bypass specified and
|
||||
# the extractor is known to be geo restricted let's fake IP
|
||||
# as X-Forwarded-For right away.
|
||||
if (not country_code and
|
||||
self._GEO_BYPASS and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
countries):
|
||||
country_code = random.choice(countries)
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_stdout(
|
||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||
|
||||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
try:
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
for _ in range(2):
|
||||
try:
|
||||
self.initialize()
|
||||
ie_result = self._real_extract(url)
|
||||
if self._x_forwarded_for_ip:
|
||||
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
|
||||
return ie_result
|
||||
except GeoRestrictedError as e:
|
||||
if self.__maybe_fake_ip_and_retry(e.countries):
|
||||
continue
|
||||
raise
|
||||
except ExtractorError:
|
||||
raise
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
@ -364,6 +439,21 @@ class InfoExtractor(object):
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||
|
||||
def __maybe_fake_ip_and_retry(self, countries):
|
||||
if (not self._downloader.params.get('geo_bypass_country', None) and
|
||||
self._GEO_BYPASS and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
not self._x_forwarded_for_ip and
|
||||
countries):
|
||||
country_code = random.choice(countries)
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
if self._x_forwarded_for_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||
return True
|
||||
return False
|
||||
|
||||
def set_downloader(self, downloader):
|
||||
"""Sets the downloader for this IE."""
|
||||
self._downloader = downloader
|
||||
@ -423,6 +513,15 @@ class InfoExtractor(object):
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
# Some sites check X-Forwarded-For HTTP header in order to figure out
|
||||
# the origin of the client behind proxy. This allows bypassing geo
|
||||
# restriction by faking this header's value to IP that belongs to some
|
||||
# geo unrestricted country. We will do so once we encounter any
|
||||
# geo restriction error.
|
||||
if self._x_forwarded_for_ip:
|
||||
if 'X-Forwarded-For' not in headers:
|
||||
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
@ -598,10 +697,8 @@ class InfoExtractor(object):
|
||||
expected=True)
|
||||
|
||||
@staticmethod
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
|
||||
raise ExtractorError(
|
||||
'%s. You might want to use --proxy to workaround.' % msg,
|
||||
expected=True)
|
||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
||||
raise GeoRestrictedError(msg, countries=countries)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
@ -1015,13 +1112,13 @@ class InfoExtractor(object):
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return True
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
@ -1198,6 +1295,9 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
@ -1305,8 +1405,8 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
@ -1627,12 +1727,12 @@ class InfoExtractor(object):
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
media = segment_template.get('media')
|
||||
if media:
|
||||
ms_info['media'] = media
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
ms_info['initialization'] = initialization
|
||||
else:
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
@ -1676,6 +1776,7 @@ class InfoExtractor(object):
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
@ -1683,7 +1784,7 @@ class InfoExtractor(object):
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'tbr': int_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
@ -1692,13 +1793,32 @@ class InfoExtractor(object):
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
@ -1710,7 +1830,7 @@ class InfoExtractor(object):
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
@ -1728,7 +1848,7 @@ class InfoExtractor(object):
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
@ -1751,14 +1871,16 @@ class InfoExtractor(object):
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
'url': representation_ms_info['segment_urls'][segment_index],
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
@ -1768,7 +1890,7 @@ class InfoExtractor(object):
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
@ -1927,7 +2049,12 @@ class InfoExtractor(object):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
@ -2033,6 +2160,123 @@ class InfoExtractor(object):
|
||||
})
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
def _find_jwplayer_data(webpage):
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like 1080p.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
||||
'height', default=None))
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
@ -7,7 +9,7 @@ from ..utils import ExtractorError
|
||||
class CommonMistakesIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:url|URL)
|
||||
(?:url|URL)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||
|
||||
_TESTS = [{
|
||||
# Disable test for python 3.2 since BOM is broken in re in this version
|
||||
# (see https://github.com/rg3/youtube-dl/issues/9751)
|
||||
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
|
||||
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
72
youtube_dl/extractor/corus.py
Normal file
72
youtube_dl/extractor/corus.py
Normal file
@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CorusIE(ThePlatformFeedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
'title': 'Movie Night Popcorn with Bryan',
|
||||
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'upload_date': '20170206',
|
||||
'timestamp': 1486392197,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TP_FEEDS = {
|
||||
'globaltv': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'etcanada': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'hgtv': {
|
||||
'feed_id': 'L0BMHXi2no43',
|
||||
'account_id': 2414428465,
|
||||
},
|
||||
'foodnetwork': {
|
||||
'feed_id': 'ukK8o58zbRmJ',
|
||||
'account_id': 2414429569,
|
||||
},
|
||||
'slice': {
|
||||
'feed_id': '5tUJLgV2YNJ5',
|
||||
'account_id': 2414427935,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
||||
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
||||
'episode_number': int_or_none(e.get('pl1$episode')),
|
||||
'season_number': int_or_none(e.get('pl1$season')),
|
||||
'series': e.get('pl1$show'),
|
||||
}, {
|
||||
'HLS': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'DesktopHLS Default': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'MP4 MBR': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
}, feed_info['account_id'])
|
@ -6,7 +6,8 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
'info_dict': {
|
||||
@ -31,8 +32,32 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
(120, 90),
|
||||
(208, 156),
|
||||
(220, 124),
|
||||
(220, 220),
|
||||
(240, 180),
|
||||
(250, 141),
|
||||
(315, 236),
|
||||
(320, 180),
|
||||
(360, 203),
|
||||
(400, 300),
|
||||
(421, 316),
|
||||
(460, 330),
|
||||
(460, 460),
|
||||
(462, 260),
|
||||
(480, 270),
|
||||
(587, 330),
|
||||
(640, 480),
|
||||
(700, 330),
|
||||
(700, 394),
|
||||
(854, 480),
|
||||
(1024, 1024),
|
||||
(1920, 1080),
|
||||
]
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
@ -61,17 +86,25 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
video_id, headers=self.geo_verification_headers()).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnail = None
|
||||
thumbnails = []
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
for width, height in self._THUMBNAIL_RES:
|
||||
res = '%dx%d' % (width, height)
|
||||
thumbnails.append({
|
||||
'id': res,
|
||||
'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'resolution': res,
|
||||
})
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
@ -86,10 +119,11 @@ class CrackleIE(InfoExtractor):
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
|
||||
subtitles.setdefault(locale, []).append({
|
||||
'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
@ -100,7 +134,7 @@ class CrackleIE(InfoExtractor):
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
'id': '645513',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
'upload_date': '20170118',
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@ -173,6 +192,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# A description with double quotes
|
||||
'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
},
|
||||
'params': {
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@ -236,8 +270,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: no
|
||||
|
||||
output += """
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
@ -344,9 +377,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
|
||||
webpage, 'description', default=None)
|
||||
video_description = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
@ -439,6 +472,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
|
||||
webpage, 'series', default=xpath_text(metadata, 'series_title'))
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
|
||||
webpage, 'season number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@ -446,9 +491,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': xpath_text(metadata, 'series_title'),
|
||||
'episode': xpath_text(metadata, 'episode_title'),
|
||||
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
@ -488,11 +534,11 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
episode_paths = re.findall(
|
||||
r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
|
||||
r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
|
||||
webpage)
|
||||
entries = [
|
||||
self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
|
||||
for ep in episode_paths
|
||||
self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
|
||||
for ep_id, ep in episode_paths
|
||||
]
|
||||
entries.reverse()
|
||||
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
@ -22,14 +23,13 @@ class CSpanIE(InfoExtractor):
|
||||
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
|
||||
'info_dict': {
|
||||
'id': '315139',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'c4486943',
|
||||
'ext': 'mp4',
|
||||
@ -38,14 +38,11 @@ class CSpanIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
||||
'md5': '2ae5051559169baadba13fc35345ae74',
|
||||
'info_dict': {
|
||||
'id': '342759',
|
||||
'ext': 'mp4',
|
||||
'title': 'General Motors Ignition Switch Recall',
|
||||
'duration': 14848,
|
||||
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
# Video from senate.gov
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
@ -57,12 +54,30 @@ class CSpanIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}, {
|
||||
# Ustream embedded video
|
||||
'url': 'https://www.c-span.org/video/?114917-1/armed-services',
|
||||
'info_dict': {
|
||||
'id': '58428542',
|
||||
'ext': 'flv',
|
||||
'title': 'USHR07 Armed Services Committee',
|
||||
'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
|
||||
'timestamp': 1423060374,
|
||||
'upload_date': '20150204',
|
||||
'uploader': 'HouseCommittee',
|
||||
'uploader_id': '12987475',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_type = None
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ustream_url = UstreamIE._extract_url(webpage)
|
||||
if ustream_url:
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||
|
@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
view_count = str_to_int(view_count_str)
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||
webpage, 'comment count', fatal=False))
|
||||
webpage, 'comment count', default=None))
|
||||
|
||||
player_v5 = self._search_regex(
|
||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||
|
159
youtube_dl/extractor/disney.py
Normal file
159
youtube_dl/extractor/disney.py
Normal file
@ -0,0 +1,159 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DisneyIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
_TESTS = [{
|
||||
# Disney.EmbedVideo
|
||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||
'info_dict': {
|
||||
'id': '545ed1857afee5a0ec239977',
|
||||
'ext': 'mp4',
|
||||
'title': 'Moana - Trailer',
|
||||
'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
|
||||
'upload_date': '20170112',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Grill.burger
|
||||
'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
|
||||
'info_dict': {
|
||||
'id': '5454e9f4e9804a552e3524c8',
|
||||
'ext': 'mp4',
|
||||
'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
|
||||
'upload_date': '20170104',
|
||||
'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
|
||||
r'Grill\.burger\s*=\s*({.+})\s*:',
|
||||
webpage, 'grill data'))
|
||||
page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
|
||||
video_data = page_data['data'][0]
|
||||
else:
|
||||
webpage = self._download_webpage(
|
||||
'http://%s/embed/%s' % (domain, video_id), video_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'Disney\.EmbedVideo\s*=\s*({.+});',
|
||||
webpage, 'embed data'), video_id)
|
||||
video_data = page_data['video']
|
||||
|
||||
for external in video_data.get('externals', []):
|
||||
if external.get('source') == 'vevo':
|
||||
return self.url_result('vevo:' + external['data_id'], 'Vevo')
|
||||
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for flavor in video_data.get('flavors', []):
|
||||
flavor_format = flavor.get('format')
|
||||
flavor_url = flavor.get('url')
|
||||
if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
|
||||
continue
|
||||
tbr = int_or_none(flavor.get('bitrate'))
|
||||
if tbr == 99999:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
flavor_url, video_id, 'mp4',
|
||||
m3u8_id=flavor_format, fatal=False))
|
||||
continue
|
||||
format_id = []
|
||||
if flavor_format:
|
||||
format_id.append(flavor_format)
|
||||
if tbr:
|
||||
format_id.append(compat_str(tbr))
|
||||
ext = determine_ext(flavor_url)
|
||||
if flavor_format == 'applehttp' or ext == 'm3u8':
|
||||
ext = 'mp4'
|
||||
width = int_or_none(flavor.get('width'))
|
||||
height = int_or_none(flavor.get('height'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': flavor_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if (width == 0 and height == 0) else None,
|
||||
})
|
||||
if not formats and video_data.get('expired'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
|
||||
expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
caption_url = caption.get('url')
|
||||
caption_format = caption.get('format')
|
||||
if not caption_url or caption_format.startswith('unknown'):
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': {
|
||||
'webvtt': 'vtt',
|
||||
}.get(caption_format, caption_format),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description') or video_data.get('short_desc'),
|
||||
'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
|
||||
'duration': int_or_none(video_data.get('duration_sec')),
|
||||
'upload_date': unified_strdate(video_data.get('publish_date')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -18,7 +18,7 @@ from ..utils import (
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.douyu.com/xiaocang',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# \"room_id\"
|
||||
'url': 'http://www.douyu.com/t/lpl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
class DramaFeverBaseIE(AMPIE):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
@ -66,7 +67,7 @@ class DramaFeverBaseIE(AMPIE):
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
@ -103,6 +104,9 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -113,8 +117,9 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
raise ExtractorError(
|
||||
'Currently unavailable in your country.', expected=True)
|
||||
self.raise_geo_restricted(
|
||||
msg='Currently unavailable in your country',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
@ -148,7 +153,7 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
|
@ -9,12 +9,13 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset.get('Kind') == 'Image':
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor):
|
||||
preference = -1
|
||||
format_id += '-spoken-subtitles'
|
||||
if target == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
video_id, preference, f4m_id=format_id))
|
||||
video_id, preference, f4m_id=format_id)
|
||||
if kind == 'AudioResource':
|
||||
for f in f4m_formats:
|
||||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DRTVLiveIE(InfoExtractor):
|
||||
IE_NAME = 'drtv:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/live/dr1',
|
||||
'info_dict': {
|
||||
'id': 'dr1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
|
||||
channel_id)
|
||||
title = self._live_title(channel_data['Title'])
|
||||
|
||||
formats = []
|
||||
for streaming_server in channel_data.get('StreamingServers', []):
|
||||
server = streaming_server.get('Server')
|
||||
if not server:
|
||||
continue
|
||||
link_type = streaming_server.get('LinkType')
|
||||
for quality in streaming_server.get('Qualities', []):
|
||||
for stream in quality.get('Streams', []):
|
||||
stream_path = stream.get('Stream')
|
||||
if not stream_path:
|
||||
continue
|
||||
stream_url = update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'b': ''})
|
||||
if link_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, channel_id, 'mp4',
|
||||
m3u8_id=link_type, fatal=False, live=True))
|
||||
elif link_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
|
||||
channel_id, f4m_id=link_type, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': title,
|
||||
'thumbnail': channel_data.get('PrimaryImageUri'),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@ -1,67 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ek Villain',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
||||
}
|
||||
},
|
||||
]
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
'id': '9097',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ae Dil Hai Mushkil',
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(base64.b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
).encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||
|
||||
m3u8_url = self._download_webpage(
|
||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
||||
% video_id, video_id, headers={'Referer': url})
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
'EJOutcomes': player_params['data-ejpingables'],
|
||||
'NativeHLS': False
|
||||
}),
|
||||
'arcVersion': 3,
|
||||
'appVersion': 59,
|
||||
'gorilla.csrf.Token': page_id,
|
||||
}))['Data']
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||
raise ExtractorError(
|
||||
'Download rate reached. Please try again later.', expected=True)
|
||||
|
||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = ej_links.get('HLSLink')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
|
||||
mp4_url = ej_links.get('MP4Link')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = get_elements_by_class('synopsis', webpage)[0]
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||
webpage, "thumbnail url", fatal=False)
|
||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||
webpage, 'thumbnail url', fatal=False, group='url')
|
||||
if thumbnail is not None:
|
||||
thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -1,13 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import NO_DEFAULT
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor):
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
|
||||
|
||||
|
||||
class EllenTVClipsIE(InfoExtractor):
|
||||
@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
'id': 'meryl-streep-vanessa-hudgens',
|
||||
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist = self._extract_playlist(webpage)
|
||||
playlist = self._extract_playlist(webpage, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
'entries': self._extract_entries(playlist)
|
||||
}
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
def _extract_playlist(self, webpage, playlist_id):
|
||||
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||
try:
|
||||
return json.loads('[{' + json_string + '}]')
|
||||
except ValueError as ve:
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
return self._parse_json('[{' + json_string + '}]', playlist_id)
|
||||
|
||||
def _extract_entries(self, playlist):
|
||||
return [
|
||||
self.url_result(
|
||||
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
|
||||
'Kaltura')
|
||||
KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
|
||||
for item in playlist]
|
||||
|
@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import strip_jsonp, unified_strdate
|
||||
|
||||
|
||||
class ElPaisIE(InfoExtractor):
|
||||
@ -29,6 +29,28 @@ class ElPaisIE(InfoExtractor):
|
||||
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
|
||||
'upload_date': '20160303',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
|
||||
'md5': '9c79923a118a067e1a45789e1e0b0f9c',
|
||||
'info_dict': {
|
||||
'id': '1485456786_417876',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
|
||||
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
||||
'upload_date': '20170127',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
|
||||
'info_dict': {
|
||||
'id': '1487062137_075943',
|
||||
'ext': 'mp4',
|
||||
'title': 'Disyuntivas',
|
||||
'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
|
||||
'upload_date': '20170214',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -37,19 +59,27 @@ class ElPaisIE(InfoExtractor):
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
id_multimedia = self._search_regex(
|
||||
r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
|
||||
if id_multimedia:
|
||||
url_info = self._download_json(
|
||||
'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
|
||||
video_suffix = url_info['mp4']
|
||||
else:
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
webpage, 'thumbnail URL', default=None)
|
||||
thumbnail = (
|
||||
None if thumbnail_suffix is None
|
||||
else prefix + thumbnail_suffix)
|
||||
else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
|
||||
title = self._html_search_regex(
|
||||
(r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
|
||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
|
||||
webpage, 'title')
|
||||
(r"tituloVideo\s*=\s*'([^']+)'",
|
||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||
r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||
webpage, 'upload date', default=None) or self._html_search_meta(
|
||||
|
@ -30,7 +30,10 @@ from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVGlobalIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@ -77,6 +80,10 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@ -88,6 +95,7 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beampro import BeamProLiveIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
@ -95,7 +103,10 @@ from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
@ -191,6 +202,7 @@ from .commonprotocols import (
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
@ -237,12 +249,16 @@ from .dramafever import (
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
DRTVLiveIE,
|
||||
)
|
||||
from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import DiscoveryGoIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
@ -287,6 +303,10 @@ from .fc2 import (
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
)
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@ -330,6 +350,7 @@ from .gameone import (
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
@ -361,10 +382,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import (
|
||||
HGTVIE,
|
||||
HGTVComShowIE,
|
||||
)
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
@ -406,6 +424,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import ITVIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@ -544,6 +563,7 @@ from .mtv import (
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
MTV81IE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
@ -593,6 +613,7 @@ from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
NextTVIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
@ -656,6 +677,7 @@ from .nrk import (
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
@ -672,6 +694,8 @@ from .ondemandkorea import OnDemandKoreaIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
OnetMVPIE,
|
||||
OnetPlIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
@ -719,6 +743,7 @@ from .polskieradio import (
|
||||
)
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@ -813,6 +838,7 @@ from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
@ -826,6 +852,7 @@ from .shared import (
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skylinewebcams import SkylineWebcamsIE
|
||||
from .skynewsarabia import (
|
||||
SkyNewsArabiaIE,
|
||||
SkyNewsArabiaArticleIE,
|
||||
@ -867,12 +894,10 @@ from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import (
|
||||
SportBoxIE,
|
||||
SportBoxEmbedIE,
|
||||
)
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@ -974,6 +999,7 @@ from .tv2 import (
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
@ -984,6 +1010,7 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
@ -994,6 +1021,7 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@ -1073,6 +1101,7 @@ from .videomore import (
|
||||
VideomoreSeasonIE,
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
@ -1117,8 +1146,12 @@ from .vk import (
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLiveChannelIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -12,14 +13,16 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@ -71,7 +74,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@ -132,6 +135,46 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
|
||||
'info_dict': {
|
||||
'id': '1072691702860471',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
|
||||
'timestamp': 1477305000,
|
||||
'upload_date': '20161024',
|
||||
'uploader': 'La Guía Del Varón',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -243,14 +286,30 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
def extract_video_data(instances):
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id'):
|
||||
return video_item['videoData']
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
video_data = video_item['videoData']
|
||||
break
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||
'server js data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(try_get(
|
||||
server_js_data, lambda x: x['jsmods']['instances'],
|
||||
list) or [])
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
@ -300,10 +359,16 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title')
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
178
youtube_dl/extractor/filmon.py
Normal file
178
youtube_dl/extractor/filmon.py
Normal file
@ -0,0 +1,178 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FilmOnIE(InfoExtractor):
|
||||
IE_NAME = 'filmon'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
|
||||
'info_dict': {
|
||||
'id': '24869',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plan 9 From Outer Space',
|
||||
'description': 'Dead human, zombies and vampires',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
|
||||
'info_dict': {
|
||||
'id': '2825',
|
||||
'title': 'Popeye Series 1',
|
||||
'description': 'The original series of Popeye.',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
title = response['title']
|
||||
description = strip_or_none(response.get('description'))
|
||||
|
||||
if response.get('type_id') == 1:
|
||||
entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for format_id, stream in response.get('streams', {}).items():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(stream.get('quality')),
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
poster = response.get('poster', {})
|
||||
thumbs = poster.get('thumbs', {})
|
||||
thumbs['poster'] = poster
|
||||
for thumb_id, thumb in thumbs.items():
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumb_id,
|
||||
'url': thumb_url,
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class FilmOnChannelIE(InfoExtractor):
|
||||
IE_NAME = 'filmon:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
# VOD
|
||||
'url': 'http://www.filmon.com/tv/sports-haters',
|
||||
'info_dict': {
|
||||
'id': '4190',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sports Haters',
|
||||
'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
|
||||
},
|
||||
}, {
|
||||
# LIVE
|
||||
'url': 'https://www.filmon.com/channel/filmon-sports',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/tv/2894',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
('logo', 56, 28),
|
||||
('big_logo', 106, 106),
|
||||
('extra_big_logo', 300, 300),
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
channel_id = compat_str(channel_data['id'])
|
||||
is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
|
||||
title = channel_data['title']
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for stream in channel_data.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
if not is_live:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
|
||||
continue
|
||||
quality = stream.get('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
# this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
|
||||
# because it doesn't have bitrate variants anyway
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for name, width, height in self._THUMBNAIL_RES:
|
||||
thumbnails.append({
|
||||
'id': name,
|
||||
'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'display_id': channel_data.get('alias'),
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': channel_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor):
|
||||
title = item['title']
|
||||
quality = qualities(QUALITIES)
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
if not path:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?)_\d+\.mp4', src,
|
||||
'm3u8 path', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'quality': quality(f.get('name')),
|
||||
'source_preference': quality(f.get('name')),
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
# is taken from [2].
|
||||
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
|
||||
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
|
||||
if not path and len(formats) == 1:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?$)', formats[0]['url'],
|
||||
'm3u8 path', default=None)
|
||||
if path:
|
||||
if len(formats) == 1:
|
||||
m3u8_path = ','
|
||||
else:
|
||||
tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
|
||||
m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
|
||||
% (path, m3u8_path),
|
||||
display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
||||
|
@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor):
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# Note that this only retrieves comments that are initially loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
|
123
youtube_dl/extractor/gaskrank.py
Normal file
123
youtube_dl/extractor/gaskrank.py
Normal file
@ -0,0 +1,123 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GaskrankIE(InfoExtractor):
|
||||
"""InfoExtractor for gaskrank.tv"""
|
||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||
'info_dict': {
|
||||
'id': '201601/26955',
|
||||
'ext': 'mp4',
|
||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['motorrad-fun'],
|
||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||
'uploader_id': 'Bikefun',
|
||||
'upload_date': '20170110',
|
||||
'uploader_url': None,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||
'info_dict': {
|
||||
'id': '201106/15920',
|
||||
'ext': 'mp4',
|
||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['racing'],
|
||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||
'uploader_id': 'IOM',
|
||||
'upload_date': '20160506',
|
||||
'uploader_url': 'www.iomtt.com',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""extract information from gaskrank.tv"""
|
||||
def fix_json(code):
|
||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||
title = self._search_regex(
|
||||
r'movieName\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
mobj = re.search(
|
||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
uploader_id = mobj.groupdict().get('uploader_id')
|
||||
upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
|
||||
|
||||
uploader_url = self._search_regex(
|
||||
r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
|
||||
webpage, 'uploader_url', default=None)
|
||||
tags = re.findall(
|
||||
r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
|
||||
webpage)
|
||||
|
||||
view_count = self._search_regex(
|
||||
r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
|
||||
webpage, 'view_count', default=None)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace('.', ''))
|
||||
|
||||
average_rating = self._search_regex(
|
||||
r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
|
||||
webpage, 'average_rating')
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||
webpage, 'playlist', default='{}'),
|
||||
display_id, transform_source=fix_json, fatal=False)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||
playlist.get('0').get('src'), 'video id')
|
||||
|
||||
formats = []
|
||||
for key in playlist:
|
||||
formats.append({
|
||||
'url': playlist[key]['src'],
|
||||
'format_id': key,
|
||||
'quality': playlist[key].get('quality')})
|
||||
self._sort_formats(formats, field_preference=['format_id'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'display_id': display_id,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'uploader_url': uploader_url,
|
||||
'tags': tags,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
}
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
is_html,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@ -29,6 +30,7 @@ from ..utils import (
|
||||
UnsupportedError,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
@ -78,6 +80,10 @@ from .vbox7 import Vbox7IE
|
||||
from .dbtv import DBTVIE
|
||||
from .piksel import PikselIE
|
||||
from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -422,6 +428,26 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove with alternative playerID key
|
||||
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
|
||||
'info_dict': {
|
||||
'id': 'nmeth.2062_SV1',
|
||||
'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '2228375078001',
|
||||
'ext': 'mp4',
|
||||
'title': 'nmeth.2062-sv1',
|
||||
'description': 'nmeth.2062-sv1',
|
||||
'timestamp': 1363357591,
|
||||
'upload_date': '20130315',
|
||||
'uploader': 'Nature Publishing Group',
|
||||
'uploader_id': '1964492299001',
|
||||
},
|
||||
}],
|
||||
},
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
@ -567,17 +593,6 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
||||
}
|
||||
},
|
||||
# Embedded Ustream video
|
||||
{
|
||||
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
|
||||
'md5': '27b99cdb639c9b12a79bca876a073417',
|
||||
'info_dict': {
|
||||
'id': '45734260',
|
||||
'ext': 'flv',
|
||||
'uploader': 'AU SPA: The NSA and Privacy',
|
||||
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
|
||||
}
|
||||
},
|
||||
# nowvideo embed hidden behind percent encoding
|
||||
{
|
||||
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||
@ -934,6 +949,29 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
},
|
||||
# jwplayer rtmp
|
||||
{
|
||||
'url': 'http://www.suffolk.edu/sjc/',
|
||||
'info_dict': {
|
||||
'id': 'sjclive',
|
||||
'ext': 'flv',
|
||||
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
|
||||
'uploader': 'www.suffolk.edu',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Complex jwplayer
|
||||
{
|
||||
'url': 'http://www.indiedb.com/games/king-machine/videos',
|
||||
'info_dict': {
|
||||
'id': 'videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'king machine trailer 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@ -964,19 +1002,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Kaltura embed protected with referrer
|
||||
{
|
||||
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
|
||||
'info_dict': {
|
||||
'id': '1_g4fbemnq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violetta - Achter De Schermen - Ruggero',
|
||||
'description': 'Achter de schermen met Ruggero',
|
||||
'timestamp': 1435133761,
|
||||
'upload_date': '20150624',
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Kaltura embed with single quotes
|
||||
{
|
||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||
@ -1448,6 +1473,40 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
{
|
||||
# 20 minuten embed
|
||||
'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
|
||||
'info_dict': {
|
||||
'id': '523629',
|
||||
'ext': 'mp4',
|
||||
'title': 'So kommen Sie bei Eis und Schnee sicher an',
|
||||
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# VideoPress embed
|
||||
'url': 'https://en.support.wordpress.com/videopress/',
|
||||
'info_dict': {
|
||||
'id': 'OcobLTqC',
|
||||
'ext': 'm4v',
|
||||
'title': 'IMG_5786',
|
||||
'timestamp': 1435711927,
|
||||
'upload_date': '20150701',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VideoPressIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# ThePlatform embedded with whitespaces in URLs
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
'only_matching': True,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2077,10 +2136,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
# Look for embedded Ustream videos
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Ustream')
|
||||
ustream_url = UstreamIE._extract_url(webpage)
|
||||
if ustream_url:
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
@ -2295,8 +2353,9 @@ class GenericIE(InfoExtractor):
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
return self.url_result('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
||||
'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?sx)
|
||||
@ -2306,7 +2365,9 @@ class GenericIE(InfoExtractor):
|
||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||
''', webpage)
|
||||
if mobj:
|
||||
return self.url_result('limelight:media:%s' % mobj.group('id'))
|
||||
return self.url_result(smuggle_url(
|
||||
'limelight:media:%s' % mobj.group('id'),
|
||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
@ -2401,6 +2462,24 @@ class GenericIE(InfoExtractor):
|
||||
if videa_urls:
|
||||
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
||||
|
||||
# Look for 20 minuten embeds
|
||||
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
||||
if twentymin_urls:
|
||||
return _playlist_from_matches(
|
||||
twentymin_urls, ie=TwentyMinutenIE.ie_key())
|
||||
|
||||
# Look for Openload embeds
|
||||
openload_urls = OpenloadIE._extract_urls(webpage)
|
||||
if openload_urls:
|
||||
return _playlist_from_matches(
|
||||
openload_urls, ie=OpenloadIE.ie_key())
|
||||
|
||||
# Look for VideoPress embeds
|
||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||
if videopress_urls:
|
||||
return _playlist_from_matches(
|
||||
videopress_urls, ie=VideoPressIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@ -2425,9 +2504,20 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
jwplayer_data_str = self._find_jwplayer_data(webpage)
|
||||
if jwplayer_data_str:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(
|
||||
jwplayer_data_str, video_id, transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(jwplayer_data, video_id)
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
if RtmpIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
@ -2535,6 +2625,15 @@ class GenericIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
if RtmpIE.suitable(video_url):
|
||||
entry_info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RtmpIE.ie_key(),
|
||||
'url': video_url,
|
||||
})
|
||||
entries.append(entry_info_dict)
|
||||
continue
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@ -13,15 +13,30 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class GoIE(InfoExtractor):
|
||||
_BRANDS = {
|
||||
'abc': '001',
|
||||
'freeform': '002',
|
||||
'watchdisneychannel': '004',
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
class GoIE(AdobePassIE):
|
||||
_SITE_INFO = {
|
||||
'abc': {
|
||||
'brand': '001',
|
||||
'requestor_id': 'ABC',
|
||||
},
|
||||
'freeform': {
|
||||
'brand': '002',
|
||||
'requestor_id': 'ABCFamily',
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'requestor_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'requestor_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
@ -43,8 +58,12 @@ class GoIE(InfoExtractor):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
|
||||
brand = self._BRANDS[sub_domain]
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
@ -59,28 +78,60 @@ class GoIE(InfoExtractor):
|
||||
ext = determine_ext(asset_url)
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
if video_type == 'lf':
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata({
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
data = {
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info['requestor_id']
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
data.update({
|
||||
'token': auth,
|
||||
'token_type': 'ap',
|
||||
'adobe_requestor_id': requestor_id,
|
||||
})
|
||||
else:
|
||||
self._initialize_geo_bypass(['US'])
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
for error in errors:
|
||||
if error.get('code') == 1002:
|
||||
self.raise_geo_restricted(
|
||||
error['message'], countries=['US'])
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'ext': ext,
|
||||
})
|
||||
}
|
||||
if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
|
||||
f.update({
|
||||
'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
|
||||
'preference': 1,
|
||||
})
|
||||
else:
|
||||
mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
|
||||
if mobj:
|
||||
height = int(mobj.group(2))
|
||||
f.update({
|
||||
'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
|
||||
'width': int(mobj.group(1)),
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
)
|
||||
|
||||
|
||||
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||
'info_dict': {
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 46,
|
||||
'duration': 45,
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
|
||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
resolution = fmt.split('/')[1]
|
||||
width, height = resolution.split('x')
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'url': lowercase_escape(fmt_url),
|
||||
'format_id': fmt_id,
|
||||
'resolution': resolution,
|
||||
'width': int_or_none(width),
|
||||
|
@ -6,59 +6,58 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class HeiseIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?heise\.de/video/artikel/
|
||||
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
||||
'''
|
||||
_TEST = {
|
||||
'url': (
|
||||
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
||||
),
|
||||
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
|
||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||
'info_dict': {
|
||||
'id': '2404147',
|
||||
'ext': 'mp4',
|
||||
'title': (
|
||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||
),
|
||||
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
|
||||
'format_id': 'mp4_720p',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
|
||||
doc = self._download_xml(data_url, video_id)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_meta('date', webpage)),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage)
|
||||
if title:
|
||||
info['title'] = title
|
||||
else:
|
||||
info['title'] = self._og_search_title(webpage)
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query={
|
||||
'container': container_id,
|
||||
'sequenz': sequenz_id,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
|
||||
@ -74,6 +73,18 @@ class HeiseIE(InfoExtractor):
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
return info
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
|
||||
self._og_search_thumbnail(webpage)),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_meta('date', webpage)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -2,50 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class HGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'aFH__I_5FBOX',
|
||||
'ext': 'mp4',
|
||||
'title': 'Overnight Success',
|
||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'timestamp': 1470320034,
|
||||
'upload_date': '20160804',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
||||
webpage, 'embed vars'), display_id, js_to_json)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'series': embed_vars.get('show'),
|
||||
'season_number': int_or_none(embed_vars.get('season')),
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
|
@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
|
||||
json_data = super(HotStarIE, self)._download_json(
|
||||
url_or_request, video_id, note, fatal=fatal, query=query)
|
||||
if json_data['resultCode'] != 'OK':
|
||||
if fatal:
|
||||
raise ExtractorError(json_data['errorDescription'])
|
||||
@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
self._GET_CONTENT_TEMPLATE % video_id,
|
||||
video_id)['contentInfo'][0]
|
||||
'http://account.hotstar.com/AVS/besc', video_id, query={
|
||||
'action': 'GetAggregatedContentDetails',
|
||||
'channel': 'PCTV',
|
||||
'contentId': video_id,
|
||||
})['contentInfo'][0]
|
||||
title = video_data['episodeTitle']
|
||||
|
||||
if video_data.get('encrypted') == 'Y':
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = []
|
||||
# PCTV for extracting f4m manifest
|
||||
for f in ('TABLET',):
|
||||
for f in ('JIO',):
|
||||
format_data = self._download_json(
|
||||
self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
|
||||
video_id, 'Downloading %s JSON metadata' % f, fatal=False)
|
||||
'http://getcdn.hotstar.com/AVS/besc',
|
||||
video_id, 'Downloading %s JSON metadata' % f,
|
||||
fatal=False, query={
|
||||
'action': 'GetCDN',
|
||||
'asJson': 'Y',
|
||||
'channel': f,
|
||||
'id': video_id,
|
||||
'type': 'VOD',
|
||||
})
|
||||
if format_data:
|
||||
format_url = format_data['src']
|
||||
format_url = format_data.get('src')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
continue
|
||||
@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['episodeTitle'],
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
||||
'formats': formats,
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('contentTitle'),
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import determine_ext
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
|
||||
'md5': '0e34642d4d9ef44bf86f66f6399672db',
|
||||
'info_dict': {
|
||||
'id': 'Simple-Made-Easy',
|
||||
'title': 'Simple Made Easy',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestaudio',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_rtmp_videos(self, webpage):
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
|
||||
@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE):
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
'format_id': 'rtmp',
|
||||
'format_id': 'rtmp_video',
|
||||
'url': video_url,
|
||||
'ext': determine_ext(playpath),
|
||||
'play_path': playpath,
|
||||
}]
|
||||
|
||||
def _extract_http_videos(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
|
||||
def _extract_cookies(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id)
|
||||
|
||||
def _extract_http_video(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
return [{
|
||||
'format_id': 'http',
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {
|
||||
'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id),
|
||||
'Cookie': self._extract_cookies(webpage)
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
# so probe URL to make sure
|
||||
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||
return []
|
||||
|
||||
return [{
|
||||
'format_id': 'http_audio',
|
||||
'url': http_audio_url,
|
||||
'vcodec': 'none',
|
||||
'http_headers': cookies_header,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE):
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage) +
|
||||
self._extract_http_video(webpage) +
|
||||
self._extract_http_audio(webpage, video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multi video post
|
||||
'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BQ0dSaohpPW',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 1',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'BQ0dTpOhuHT',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 2',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'BQ0dT7RBFeF',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video 3',
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'BQ0eAlwhDrw',
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor):
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
@ -8,12 +8,12 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||
@ -29,6 +29,10 @@ class IPrimaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -38,11 +42,13 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
|
||||
req = sanitized_Request(
|
||||
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
||||
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||
playerpage = self._download_webpage(
|
||||
'http://play.iprima.cz/prehravac/init',
|
||||
video_id, note='Downloading player', query={
|
||||
'_infuse': 1,
|
||||
'_ts': round(time.time()),
|
||||
'productId': video_id,
|
||||
}, headers={'Referer': url})
|
||||
|
||||
formats = []
|
||||
|
||||
@ -65,7 +71,7 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
|
||||
r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
|
||||
playerpage, 'player options', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if options:
|
||||
@ -82,7 +88,7 @@ class IPrimaIE(InfoExtractor):
|
||||
extract_formats(src)
|
||||
|
||||
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
||||
self.raise_geo_restricted()
|
||||
self.raise_geo_restricted(countries=['CZ'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -173,11 +173,12 @@ class IqiyiIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||
'md5': '667171934041350c5de3f5015f7f1152',
|
||||
'md5': 'b7dc800a4004b1b57749d9abae0472da',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||
'ext': 'mp4',
|
||||
'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
|
||||
# This can be either Simplified Chinese or Traditional Chinese
|
||||
'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
|
201
youtube_dl/extractor/itv.py
Normal file
201
youtube_dl/extractor/itv.py
Normal file
@ -0,0 +1,201 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_GEO_COUNTRIES = ['GB']
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'info_dict': {
|
||||
'id': '2a2936a0053',
|
||||
'ext': 'flv',
|
||||
'title': 'Home Movie',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ns_map = {
|
||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
||||
'tem': 'http://tempuri.org/',
|
||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
||||
}
|
||||
for ns, full_ns in ns_map.items():
|
||||
compat_etree_register_namespace(ns, full_ns)
|
||||
|
||||
def _add_ns(name):
|
||||
return xpath_with_ns(name, ns_map)
|
||||
|
||||
def _add_sub_element(element, name):
|
||||
return etree.SubElement(element, _add_ns(name))
|
||||
|
||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||
_add_sub_element(req_env, 'soapenv:Header')
|
||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||
request = _add_sub_element(get_playlist, 'tem:request')
|
||||
_add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
|
||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||
_add_sub_element(vodcrid, 'com:Id')
|
||||
_add_sub_element(request, 'itv:Partition')
|
||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
||||
_add_sub_element(user_info, 'itv:DM')
|
||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
||||
_add_sub_element(user_info, 'itv:SessionId')
|
||||
_add_sub_element(user_info, 'itv:SsoToken')
|
||||
_add_sub_element(user_info, 'itv:UserToken')
|
||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
||||
_add_sub_element(site_info, 'itv:Category')
|
||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
||||
})
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env))
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
||||
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
formats = []
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
if ios_playlist_url and hmac:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Apple',
|
||||
'model': 'iPad',
|
||||
'os': {
|
||||
'name': 'iPhone OS',
|
||||
'version': '9.3',
|
||||
'type': 'ios'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes'],
|
||||
'max': ['hls', 'aes']
|
||||
},
|
||||
'platformTag': 'mobile'
|
||||
}
|
||||
}).encode(), headers=headers, fatal=False)
|
||||
if ios_playlist:
|
||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
||||
ios_base_url = video_data.get('Base')
|
||||
for media_file in video_data.get('MediaFiles', []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if not caption_url.text:
|
||||
continue
|
||||
ext = determine_ext(caption_url.text, 'ttml')
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url.text,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
@ -3,14 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IwaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'ext': 'mp4',
|
||||
@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
||||
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'age_limit': 18,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||
|
||||
if not entries:
|
||||
if not video_data:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
|
||||
info_dict = entries[0]
|
||||
info_dict.update({
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'quality': 1 if format_id == 'Source' else 0,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -5,9 +5,27 @@ import re
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class JamendoIE(InfoExtractor):
|
||||
class JamendoBaseIE(InfoExtractor):
|
||||
def _extract_meta(self, webpage, fatal=True):
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
|
||||
if not title:
|
||||
title = self._html_search_meta(
|
||||
'name', webpage, 'title', fatal=fatal)
|
||||
mobj = re.search(r'(.+) - (.+)', title or '')
|
||||
artist, second = mobj.groups() if mobj else [None] * 2
|
||||
return title, artist, second
|
||||
|
||||
|
||||
class JamendoIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
@ -16,7 +34,10 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Stories from Emona I',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
}
|
||||
}
|
||||
@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
title, artist, track = self._extract_meta(webpage)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@ -46,37 +67,47 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'image', webpage, 'thumbnail', fatal=False)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
'title': 'Duck On Cover'
|
||||
'title': 'Shearer - Duck On Cover'
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
||||
'info_dict': {
|
||||
'id': '1032333',
|
||||
'ext': 'flac',
|
||||
'title': 'Warmachine'
|
||||
'title': 'Shearer - Warmachine',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Warmachine',
|
||||
}
|
||||
}, {
|
||||
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
||||
'info_dict': {
|
||||
'id': '1032330',
|
||||
'ext': 'flac',
|
||||
'title': 'Without Your Ghost'
|
||||
'title': 'Shearer - Without Your Ghost',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Without Your Ghost',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, mobj.group('display_id'))
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
title, artist, album = self._extract_meta(webpage, fatal=False)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, m.group('path')),
|
||||
ie=JamendoIE.ie_key(),
|
||||
video_id=self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'),
|
||||
'track id', default=None))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)
|
||||
]
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, m.group('path')),
|
||||
'ie_key': JamendoIE.ie_key(),
|
||||
'id': self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'), 'track id', default=None),
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
} for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_result(entries, album_id, title)
|
||||
|
@ -4,139 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _find_jwplayer_data(webpage):
|
||||
# TODO: Merge this with JWPlayer-related codes in generic.py
|
||||
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like 1080p.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
||||
'height', default=None))
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class JWPlatformIE(JWPlatformBaseIE):
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
|
@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
index\.php/kwidget|
|
||||
index\.php/(?:kwidget|extwidget/preview)|
|
||||
# html5 player
|
||||
html5/html5lib/[^/]+/mwEmbedFrame\.php
|
||||
)
|
||||
@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
|
||||
partner_id = params['wid'][0][1:]
|
||||
elif 'p' in params:
|
||||
partner_id = params['p'][0]
|
||||
elif 'partner_id' in params:
|
||||
partner_id = params['partner_id'][0]
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
if 'entry_id' in params:
|
||||
@ -266,9 +276,12 @@ class KalturaIE(InfoExtractor):
|
||||
# skip for now.
|
||||
if f.get('fileExt') == 'chun':
|
||||
continue
|
||||
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
|
||||
if not f.get('fileExt'):
|
||||
# QT indicates QuickTime; some videos have broken fileExt
|
||||
f['fileExt'] = 'mov'
|
||||
if f.get('containerFormat') == 'qt':
|
||||
f['fileExt'] = 'mov'
|
||||
else:
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
@ -319,6 +332,6 @@ class KalturaIE(InfoExtractor):
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'view_count': info.get('plays'),
|
||||
}
|
||||
|
@ -2,29 +2,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
'duration': 398.76,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -42,12 +44,18 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates'))
|
||||
source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = source.get('url')
|
||||
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
@ -97,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
subtitles = {}
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -104,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ from ..utils import (
|
||||
class LeIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['CN']
|
||||
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
|
||||
|
||||
_TESTS = [{
|
||||
@ -126,10 +126,9 @@ class LeIE(InfoExtractor):
|
||||
if playstatus['status'] == 0:
|
||||
flag = playstatus['flag']
|
||||
if flag == 1:
|
||||
msg = 'Country %s auth error' % playstatus['country']
|
||||
self.raise_geo_restricted()
|
||||
else:
|
||||
msg = 'Generic error. flag = %d' % flag
|
||||
raise ExtractorError(msg, expected=True)
|
||||
raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
|
||||
'md5': '01fb3c92de4c12c573343d63e163d302',
|
||||
'md5': 'da120c8722d8632eec6ced937536cc98',
|
||||
'info_dict': {
|
||||
'id': 'lqm3kl',
|
||||
'ext': 'mp4',
|
||||
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 320,
|
||||
'duration': 309,
|
||||
'upload_date': '20160119',
|
||||
'timestamp': 1453194778,
|
||||
'uploader_id': '3pmkp',
|
||||
},
|
||||
}, {
|
||||
# standard iframe embed
|
||||
'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
|
||||
'info_dict': {
|
||||
'id': 'uzsxms',
|
||||
'ext': 'mp4',
|
||||
'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 325,
|
||||
'upload_date': '20161021',
|
||||
'timestamp': 1477044540,
|
||||
'uploader_id': '3pmkp',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# YouTube embeds
|
||||
'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor):
|
||||
|
||||
digiteka_url = self._proto_relative_url(self._search_regex(
|
||||
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
|
||||
webpage, 'digiteka url', group='url'))
|
||||
return self.url_result(digiteka_url, 'Digiteka')
|
||||
webpage, 'digiteka url', group='url', default=None))
|
||||
|
||||
if digiteka_url:
|
||||
return self.url_result(digiteka_url, 'Digiteka')
|
||||
|
||||
return self.url_result(url, 'Generic')
|
||||
|
@ -4,10 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -15,20 +18,31 @@ class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True):
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
try:
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
if error == 'CountryDisabled':
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _call_api(self, organization_id, item_id, method):
|
||||
return self._download_json(
|
||||
self._API_URL % (organization_id, self._API_PATH, item_id, method),
|
||||
item_id, 'Downloading API %s JSON' % method)
|
||||
|
||||
def _extract(self, item_id, pc_method, mobile_method, meta_method):
|
||||
pc = self._call_playlist_service(item_id, pc_method)
|
||||
def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
|
||||
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||
metadata = self._call_api(pc['orgId'], item_id, meta_method)
|
||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
|
||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile, metadata
|
||||
|
||||
def _extract_info(self, streams, mobile_urls, properties):
|
||||
@ -59,14 +73,26 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
|
||||
urls.append(http_url)
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': format_id.replace('rtmp', 'http'),
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
http_format_id = format_id.replace('rtmp', 'http')
|
||||
|
||||
CDN_HOSTS = (
|
||||
('delvenetworks.com', 'cpl.delvenetworks.com'),
|
||||
('video.llnw.net', 's2.content.video.llnw.net'),
|
||||
)
|
||||
for cdn_host, http_host in CDN_HOSTS:
|
||||
if cdn_host not in rtmp.group('host').lower():
|
||||
continue
|
||||
http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
if self._is_valid_url(http_url, video_id, http_format_id):
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': http_format_id,
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
break
|
||||
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
@ -195,10 +221,14 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
_API_PATH = 'media'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
pc, mobile, metadata = self._extract(
|
||||
video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
|
||||
video_id, 'getPlaylistByMediaId',
|
||||
'getMobilePlaylistByMediaId', 'properties',
|
||||
smuggled_data.get('source_url'))
|
||||
|
||||
return self._extract_info(
|
||||
pc['playlistItems'][0].get('streams', []),
|
||||
@ -235,11 +265,13 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
_API_PATH = 'channels'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
pc, mobile, medias = self._extract(
|
||||
channel_id, 'getPlaylistByChannelId',
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||
'media', smuggled_data.get('source_url'))
|
||||
|
||||
entries = [
|
||||
self._extract_info(
|
||||
|
@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
|
||||
|
||||
course = self._download_json(
|
||||
'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
course_id, 'Downloading course JSON', fatal=False)
|
||||
|
||||
if not course:
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
entries = [
|
||||
self.url_result(
|
||||
item_template % video_id, ie=LyndaIE.ie_key(),
|
||||
video_id=video_id)
|
||||
for video_id in re.findall(
|
||||
r'data-video-id=["\'](\d+)', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, course_id,
|
||||
self._og_search_title(webpage, fatal=False),
|
||||
self._og_search_description(webpage))
|
||||
|
||||
if course.get('Status') == 'NotFound':
|
||||
raise ExtractorError(
|
||||
@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
if video_id:
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
|
||||
'url': item_template % video_id,
|
||||
'ie_key': LyndaIE.ie_key(),
|
||||
'chapter': chapter.get('Title'),
|
||||
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
|
||||
|
@ -6,12 +6,12 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
get_element_by_attribute,
|
||||
mimetype2ext,
|
||||
)
|
||||
@ -50,6 +50,21 @@ class MetacafeIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Page is temporarily unavailable.',
|
||||
},
|
||||
# metacafe video with family filter
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
|
||||
'md5': 'b06082c5079bbdcde677a6291fbdf376',
|
||||
'info_dict': {
|
||||
'id': '2155630',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adult Art By David Hart 156',
|
||||
'uploader': '63346',
|
||||
'description': 'md5:9afac8fc885252201ad14563694040fc',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# AnyClip video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
|
||||
@ -112,22 +127,6 @@ class MetacafeIE(InfoExtractor):
|
||||
def report_disclaimer(self):
|
||||
self.to_screen('Retrieving disclaimer')
|
||||
|
||||
def _confirm_age(self):
|
||||
# Retrieve disclaimer
|
||||
self.report_disclaimer()
|
||||
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
||||
|
||||
# Confirm age
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(
|
||||
self._FILTER_POST, None, False, 'Unable to confirm age',
|
||||
data=urlencode_postdata({
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
@ -143,13 +142,15 @@ class MetacafeIE(InfoExtractor):
|
||||
if prefix == 'cb':
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# self._confirm_age()
|
||||
headers = {
|
||||
# Disable family filter
|
||||
'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
|
||||
}
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
headers = {}
|
||||
if video_id.startswith('an-'):
|
||||
headers['Cookie'] = 'flashVersion=0;'
|
||||
headers['Cookie'] += 'flashVersion=0; '
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
@ -2,16 +2,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||
'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
|
||||
'info_dict': {
|
||||
'id': '3116640',
|
||||
'ext': 'mp4',
|
||||
@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
# no tbr extracted from stream_url
|
||||
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
|
||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
api_data = self._download_json(
|
||||
'http://v.api.mgtv.com/player/video', video_id,
|
||||
'http://pcweb.api.mgtv.com/player/video', video_id,
|
||||
query={'video_id': video_id},
|
||||
headers=self.geo_verification_headers())['data']
|
||||
info = api_data['info']
|
||||
title = info['title'].strip()
|
||||
stream_domain = api_data['stream_domain'][0]
|
||||
|
||||
formats = []
|
||||
for idx, stream in enumerate(api_data['stream']):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
stream_path = stream.get('url')
|
||||
if not stream_path:
|
||||
continue
|
||||
format_data = self._download_json(
|
||||
stream_domain + stream_path, video_id,
|
||||
note='Download video info for format #%d' % idx)
|
||||
format_url = format_data.get('info')
|
||||
if not format_url:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'(\d+)\.mp4', stream_url, 'tbr', default=None))
|
||||
|
||||
def extract_format(stream_url, format_id, idx, query={}):
|
||||
format_info = self._download_json(
|
||||
stream_url, video_id,
|
||||
note='Download video info for format %s' % (format_id or '#%d' % idx),
|
||||
query=query)
|
||||
return {
|
||||
'format_id': format_id,
|
||||
'url': format_info['info'],
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
}
|
||||
|
||||
formats.append(extract_format(
|
||||
stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
|
||||
formats.append(extract_format(stream_url.replace(
|
||||
'/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
|
||||
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
|
||||
formats.append({
|
||||
'format_id': compat_str(tbr or idx),
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'].strip(),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': info.get('desc'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
|
@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||
triforce_feed = self._parse_json(self._search_regex(
|
||||
r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage,
|
||||
r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||
'triforce feed', default='{}'), video_id, fatal=False)
|
||||
|
||||
data_zone = self._search_regex(
|
||||
@ -304,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://www.mtv.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -321,9 +321,41 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MTV81IE(InfoExtractor):
|
||||
IE_NAME = 'mtv81'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
|
||||
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
|
||||
'info_dict': {
|
||||
'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
|
||||
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
|
||||
'timestamp': 1468846800,
|
||||
'upload_date': '20160718',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(
|
||||
r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
|
||||
'mgid', group='id')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._extract_mgid(webpage)
|
||||
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
|
||||
|
||||
|
||||
class MTVVideoIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv:video'
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
|
@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
||||
'info_dict': {
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor):
|
||||
'timestamp': 1414108751,
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
||||
'info_dict': {
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'ext': 'm4a',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
'id': 'xqds0B_meys',
|
||||
'ext': 'webm',
|
||||
'title': 'Three Days Grace - Animal I Have Become',
|
||||
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
||||
'uploader': 'ThreeDaysGraceVEVO',
|
||||
'uploader_id': 'ThreeDaysGraceVEVO',
|
||||
'upload_date': '20091002',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
is_song = mobj.group('mediatype').startswith('music/song')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
|
||||
|
||||
def rtmp_format_from_stream_url(stream_url, width=None, height=None):
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
return {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
|
||||
formats = []
|
||||
vcodec = 'none' if is_song else None
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'm4a' if is_song else 'mp4',
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if stream_url and player_url:
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if http_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': http_stream_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
return formats
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
if is_song:
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
stream_url = search_data('stream-url')
|
||||
if not stream_url:
|
||||
formats = formats_from_stream_urls(
|
||||
search_data('stream-url'), search_data('hls-stream-url'),
|
||||
search_data('http-stream-url'))
|
||||
if not formats:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor):
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(search_data('duration')),
|
||||
'formats': [rtmp_format_from_stream_url(stream_url)]
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'),
|
||||
video_id)['video']
|
||||
formats = []
|
||||
hls_stream_url = video.get('hlsStreamUrl')
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
stream_url = video.get('streamUrl')
|
||||
if stream_url:
|
||||
formats.append(rtmp_format_from_stream_url(
|
||||
stream_url,
|
||||
int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height'))))
|
||||
formats = formats_from_stream_urls(
|
||||
video.get('streamUrl'), video.get('hlsStreamUrl'),
|
||||
video.get('mp4StreamUrl'), int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height')))
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -12,10 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'url': 'http://tv.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
@ -24,7 +24,7 @@ class NaverIE(InfoExtractor):
|
||||
'upload_date': '20130903',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/395837',
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
@ -34,6 +34,9 @@ class NaverIE(InfoExtractor):
|
||||
'upload_date': '20150519',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,23 +4,26 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
lowercase_escape,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NBCIE(InfoExtractor):
|
||||
class NBCIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
|
||||
'info_dict': {
|
||||
'id': '112966',
|
||||
'id': '2848237',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
|
||||
# HLS streams requires the 'hdnea3' cookie
|
||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||
'info_dict': {
|
||||
'id': 'n1806',
|
||||
'id': '101528f5a9e8127b107e98c5e6ce4638',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goliath',
|
||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||
@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return {
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||
'id': video_id,
|
||||
}
|
||||
video_data = None
|
||||
preload = self._search_regex(
|
||||
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
|
||||
if preload:
|
||||
preload_data = self._parse_json(preload, video_id)
|
||||
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
|
||||
entity_id = preload_data.get('xref', {}).get(path)
|
||||
video_data = preload_data.get('entities', {}).get(entity_id)
|
||||
if video_data:
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
video_id = video_data['guid']
|
||||
title = video_data['title']
|
||||
if video_data.get('entitlement') == 'auth':
|
||||
resource = self._get_mvpd_resource(
|
||||
'nbcentertainment', title, video_id,
|
||||
video_data.get('vChipRating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
||||
query), {'force_smil_url': True})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': theplatform_url,
|
||||
'description': video_data.get('description'),
|
||||
'keywords': video_data.get('keywords'),
|
||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('showName'),
|
||||
})
|
||||
else:
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
|
||||
return info
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
@ -2,7 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
@ -30,6 +38,12 @@ class NextMediaIE(InfoExtractor):
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
redirection_url = self._search_regex(
|
||||
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
|
||||
page, 'redirection URL', default=None, group='url')
|
||||
if redirection_url:
|
||||
return self.url_result(compat_urlparse.urljoin(url, redirection_url))
|
||||
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
@ -93,7 +107,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
||||
|
||||
class AppleDailyIE(NextMediaIE):
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
@ -157,6 +171,10 @@ class AppleDailyIE(NextMediaIE):
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
|
||||
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
@ -173,3 +191,48 @@ class AppleDailyIE(NextMediaIE):
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
|
||||
|
||||
class NextTVIE(InfoExtractor):
|
||||
IE_DESC = '壹電視'
|
||||
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
|
||||
'info_dict': {
|
||||
'id': '11779671',
|
||||
'ext': 'mp4',
|
||||
'title': '「超收稅」近4千億! 藍議員籲發消費券',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1484825400,
|
||||
'upload_date': '20170119',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
|
||||
data = self._hidden_inputs(webpage)
|
||||
|
||||
video_url = data['ntt-vod-src-detailview']
|
||||
|
||||
date_str = get_element_by_class('date', webpage)
|
||||
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
|
||||
|
||||
view_count = int_or_none(remove_start(
|
||||
clean_html(get_element_by_class('click', webpage)), '點閱:'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': data.get('ntt-vod-img-src'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@ -7,7 +7,6 @@ import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
'duration': 33,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'timestamp': 1304065916,
|
||||
'duration': 209,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor):
|
||||
'description': 'deleted',
|
||||
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
||||
'upload_date': '20071224',
|
||||
'timestamp': 1198527840, # timestamp field has different value if logged in
|
||||
'timestamp': int, # timestamp field has different value if logged in
|
||||
'duration': 304,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
'url': 'http://www.nicovideo.jp/watch/so22543406',
|
||||
'info_dict': {
|
||||
@ -79,13 +81,12 @@ class NiconicoIE(InfoExtractor):
|
||||
'upload_date': '20140104',
|
||||
'uploader': 'アニメロチャンネル',
|
||||
'uploader_id': '312',
|
||||
}
|
||||
},
|
||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# Determine whether the downloader used authentication to download video
|
||||
_AUTHENTICATED = False
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@ -109,8 +110,6 @@ class NiconicoIE(InfoExtractor):
|
||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
# Successful login
|
||||
self._AUTHENTICATED = True
|
||||
return True
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -128,35 +127,19 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note='Downloading video info page')
|
||||
|
||||
if self._AUTHENTICATED:
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
else:
|
||||
# Get external player info
|
||||
ext_player_info = self._download_webpage(
|
||||
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
|
||||
thumb_play_key = self._search_regex(
|
||||
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
|
||||
|
||||
# Get flv info
|
||||
flv_info_data = compat_urllib_parse_urlencode({
|
||||
'k': thumb_play_key,
|
||||
'v': video_id
|
||||
})
|
||||
flv_info_request = sanitized_Request(
|
||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
flv_info_webpage = self._download_webpage(
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif 'closed' in flv_info:
|
||||
raise ExtractorError('Niconico videos now require logging in',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
|
||||
|
@ -19,6 +19,7 @@ class NineCNineMediaBaseIE(InfoExtractor):
|
||||
|
||||
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
|
||||
IE_NAME = '9c9media:stack'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE):
|
||||
if metadata.get('tt888') == 'ja':
|
||||
subtitles['nl'] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||
'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
|
||||
}]
|
||||
|
||||
return {
|
||||
|
@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -15,24 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_faked_ip = None
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
# NRK checks X-Forwarded-For HTTP header in order to figure out the
|
||||
# origin of the client behind proxy. This allows to bypass geo
|
||||
# restriction by faking this header's value to some Norway IP.
|
||||
# We will do so once we encounter any geo restriction error.
|
||||
if self._faked_ip:
|
||||
# NB: str is intentional
|
||||
kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
|
||||
return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
def _fake_ip(self):
|
||||
# Use fake IP from 37.191.128.0/17 in order to workaround geo
|
||||
# restriction
|
||||
def octet(lb=0, ub=255):
|
||||
return random.randint(lb, ub)
|
||||
self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -44,8 +26,6 @@ class NRKBaseIE(InfoExtractor):
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
|
||||
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
@ -90,7 +70,6 @@ class NRKBaseIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'http_headers': http_headers,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
@ -107,19 +86,17 @@ class NRKBaseIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type and not self._faked_ip:
|
||||
self.report_warning(
|
||||
'Video is geo restricted, trying to fake IP')
|
||||
self._fake_ip()
|
||||
return self._real_extract(url)
|
||||
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
@ -128,6 +105,22 @@ class NRKBaseIE(InfoExtractor):
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
@ -140,11 +133,15 @@ class NRKBaseIE(InfoExtractor):
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@ -168,12 +165,12 @@ class NRKIE(NRKBaseIE):
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?nrk\.no/video/PS\*|
|
||||
v8-psapi\.nrk\.no/mediaelement/
|
||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||
)
|
||||
)
|
||||
(?P<id>[^/?#&]+)
|
||||
(?P<id>[^?#&]+)
|
||||
'''
|
||||
_API_HOST = 'v8.psapi.nrk.no'
|
||||
_API_HOST = 'v8-psapi.nrk.no'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
@ -199,6 +196,9 @@ class NRKIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
@ -227,54 +227,102 @@ class NRKTVIE(NRKBaseIE):
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål - TV',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '43d0be26663d380603a9cf0c24366531',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'duration': 6947.52,
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
@ -360,6 +408,64 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||
|
@ -1,15 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class OnDemandKoreaIE(JWPlatformBaseIE):
|
||||
class OnDemandKoreaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
|
||||
_GEO_COUNTRIES = ['US', 'CA']
|
||||
_TEST = {
|
||||
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
|
||||
'info_dict': {
|
||||
@ -35,7 +36,8 @@ class OnDemandKoreaIE(JWPlatformBaseIE):
|
||||
|
||||
if 'msg_block_01.png' in webpage:
|
||||
self.raise_geo_restricted(
|
||||
'This content is not available in your region')
|
||||
msg='This content is not available in your region',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
|
||||
if 'This video is only available to ODK PLUS members.' in webpage:
|
||||
raise ExtractorError(
|
||||
|
@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
|
||||
def _extract_from_id(self, video_id, webpage):
|
||||
def _extract_from_id(self, video_id, webpage=None):
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor):
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
title = (self._og_search_title(
|
||||
webpage, default=None) if webpage else None) or meta['title']
|
||||
description = (self._og_search_description(
|
||||
webpage, default=None) if webpage else None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class OnetMVPIE(OnetBaseIE):
|
||||
_VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'onetmvp:381027.1509591944',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_from_id(self._match_id(url))
|
||||
|
||||
|
||||
class OnetIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.tv'
|
||||
@ -167,3 +181,44 @@ class OnetChannelIE(OnetBaseIE):
|
||||
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
||||
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
||||
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
||||
|
||||
|
||||
class OnetPlIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.pl'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
|
||||
'md5': 'b94021eb56214c3969380388b6e73cb0',
|
||||
'info_dict': {
|
||||
'id': '1561707.1685479',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
|
||||
'description': 'md5:61fb0740084d2d702ea96512a03585b4',
|
||||
'upload_date': '20170214',
|
||||
'timestamp': 1487078046,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mvp_id = self._search_regex(
|
||||
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
||||
|
||||
return self.url_result(
|
||||
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||
|
@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
elif delivery_type == 'hds' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif delivery_type == 'hds' or ext == 'mpd':
|
||||
elif delivery_type == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
s_url, embed_code, mpd_id='dash', fatal=False))
|
||||
elif delivery_type == 'smooth':
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||
@ -67,17 +75,17 @@ class OpenloadIE(InfoExtractor):
|
||||
'<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
|
||||
webpage, 'openload ID')
|
||||
|
||||
first_three_chars = int(float(ol_id[0:][:3]))
|
||||
fifth_char = int(float(ol_id[3:5]))
|
||||
urlcode = ''
|
||||
num = 5
|
||||
first_two_chars = int(float(ol_id[0:][:2]))
|
||||
urlcode = []
|
||||
num = 2
|
||||
|
||||
while num < len(ol_id):
|
||||
urlcode += compat_chr(int(float(ol_id[num:][:3])) +
|
||||
first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
|
||||
key = int(float(ol_id[num + 3:][:2]))
|
||||
urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars)))
|
||||
num += 5
|
||||
|
||||
video_url = 'https://openload.co/stream/' + urlcode
|
||||
video_url = 'https://openload.co/stream/' + ''.join(
|
||||
[value for _, value in sorted(urlcode, key=lambda x: x[0])])
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
@ -93,7 +101,7 @@ class OpenloadIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
# Seems all videos have extensions in their titles
|
||||
'ext': determine_ext(title),
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
return info_dict
|
||||
|
@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
|
||||
)
|
||||
''' % '|'.join(list(zip(*_STATIONS))[0])
|
||||
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
@ -489,11 +491,13 @@ class PBSIE(InfoExtractor):
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
if redirect_info['status'] == 'error':
|
||||
message = self._ERRORS.get(
|
||||
redirect_info['http_code'], redirect_info['message'])
|
||||
if redirect_info['http_code'] == 403:
|
||||
self.raise_geo_restricted(
|
||||
msg=message, countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (
|
||||
self.IE_NAME,
|
||||
self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])),
|
||||
expected=True)
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
format_url = redirect_info.get('url')
|
||||
if not format_url:
|
||||
|
@ -16,18 +16,33 @@ from ..utils import (
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
}
|
||||
},
|
||||
{
|
||||
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
|
||||
'url': 'https://player.piksel.com/v/v80kqp41',
|
||||
'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
|
||||
'info_dict': {
|
||||
'id': 'v80kqp41',
|
||||
'ext': 'mp4',
|
||||
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
|
||||
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
|
||||
'timestamp': 1486171129,
|
||||
'upload_date': '20170204',
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@ -40,8 +55,10 @@ class PikselIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_token = self._search_regex(
|
||||
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
|
@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor):
|
||||
'video:duration', webpage, 'duration'))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
|
||||
'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="fullTime"[^>]+title="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
qualities,
|
||||
srt_subtitles_timecode,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||
if BLOCKED in response:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % BLOCKED, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
||||
@ -157,13 +162,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
|
||||
display_id = '%s-%s' % (name, clip_id)
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
|
||||
payload_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='app.pluralsight.com', path='player/api/v1/payload'))
|
||||
|
||||
course = self._download_json(
|
||||
payload_url, display_id, headers={'Referer': url})['payload']['course']
|
||||
'https://app.pluralsight.com/player/user/api/v1/player/payload',
|
||||
display_id, data=urlencode_postdata({'courseId': course_name}),
|
||||
headers={'Referer': url})
|
||||
|
||||
collection = course['modules']
|
||||
|
||||
@ -330,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course JSON')
|
||||
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
||||
'Downloading course JSON',
|
||||
data=json.dumps({
|
||||
'fn': 'bootstrapPlayer',
|
||||
'payload': {
|
||||
'courseId': course_id,
|
||||
}
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8'
|
||||
})['payload']['course']
|
||||
|
||||
title = course['title']
|
||||
course_name = course['name']
|
||||
course_data = course['modules']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for num, module in enumerate(course_data, 1):
|
||||
author = module.get('author')
|
||||
module_name = module.get('name')
|
||||
if not author or not module_name:
|
||||
continue
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
clip_index = int_or_none(clip.get('index'))
|
||||
if clip_index is None:
|
||||
continue
|
||||
clip_url = update_url_query(
|
||||
'%s/player' % self._API_BASE, query={
|
||||
'mode': 'live',
|
||||
'course': course_name,
|
||||
'author': author,
|
||||
'name': module_name,
|
||||
'clip': clip_index,
|
||||
})
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'url': clip_url,
|
||||
'ie_key': PluralsightIE.ie_key(),
|
||||
'chapter': module.get('title'),
|
||||
'chapter_number': num,
|
||||
|
92
youtube_dl/extractor/pornflip.py
Normal file
92
youtube_dl/extractor/pornflip.py
Normal file
@ -0,0 +1,92 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||
'info_dict': {
|
||||
'id': 'wz7DfNhMmep',
|
||||
'ext': 'mp4',
|
||||
'title': '2 Amateurs swallow make his dream cumshots true',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 112,
|
||||
'timestamp': 1481655502,
|
||||
'upload_date': '20161213',
|
||||
'uploader_id': '106786',
|
||||
'uploader': 'figifoto',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.pornflip.com/v/%s' % video_id, video_id)
|
||||
|
||||
flashvars = compat_parse_qs(self._search_regex(
|
||||
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
|
||||
webpage, 'flashvars', group='flashvars'))
|
||||
|
||||
title = flashvars['video_vars[title]'][0]
|
||||
|
||||
def flashvar(kind):
|
||||
return try_get(
|
||||
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
|
||||
|
||||
formats = []
|
||||
for key, value in flashvars.items():
|
||||
if not (value and isinstance(value, list)):
|
||||
continue
|
||||
format_url = value[0]
|
||||
if key == 'video_vars[hds_manifest]':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-%s' % height,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
|
||||
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
|
||||
webpage, 'uploader', fatal=False, group='uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': flashvar('big_thumb'),
|
||||
'duration': int_or_none(flashvar('duration')),
|
||||
'timestamp': unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp')),
|
||||
'uploader_id': flashvar('author_id'),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(flashvar('views')),
|
||||
'age_limit': 18,
|
||||
}
|
@ -2,27 +2,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import os
|
||||
# import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
# compat_urllib_parse_unquote,
|
||||
# compat_urllib_parse_unquote_plus,
|
||||
# compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
# sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
# from ..aes import (
|
||||
# aes_decrypt_text
|
||||
# )
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = sanitized_Request(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
def dl_webpage(platform):
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id, headers={
|
||||
'Cookie': 'age_verified=1; platform=%s' % platform,
|
||||
})
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
|
||||
'video url', group='url')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
title = self._html_search_meta(
|
||||
title = title or self._html_search_meta(
|
||||
'twitter:title', webpage, default=None) or self._search_regex(
|
||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||
@ -156,37 +169,6 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = '-'.join(format)
|
||||
|
||||
m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
|
||||
if m is None:
|
||||
height = None
|
||||
tbr = None
|
||||
else:
|
||||
height = int(m.group('height'))
|
||||
tbr = int(m.group('tbr'))
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'tbr': tbr,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
page_params = self._parse_json(self._search_regex(
|
||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||
webpage, 'page parameters', group='data', default='{}'),
|
||||
@ -198,6 +180,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@ -206,7 +189,7 @@ class PornHubIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
# 'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user