Compare commits
239 Commits
2017.01.14
...
2017.02.14
Author | SHA1 | Date | |
---|---|---|---|
58a65ba852 | |||
cedf08ff54 | |||
50de3dbad3 | |||
085f169ffe | |||
f6d6ca1db3 | |||
6e5956e6ba | |||
50fd3c2c69 | |||
89c6691f9d | |||
454e5cdb17 | |||
1de9f78e71 | |||
9dad941853 | |||
1e2c3f61fc | |||
0dac7cbb09 | |||
f8514630db | |||
459818e280 | |||
6310acf512 | |||
8d38dafbbf | |||
f3915452de | |||
2f49bcd690 | |||
68c22c4c15 | |||
9b92a5917b | |||
3e2274c8b7 | |||
3d7e3aaa0e | |||
624c4b92ff | |||
2af12ad9d2 | |||
97eb9bd2ac | |||
71cdd75628 | |||
c7d6f614f3 | |||
08a00eef79 | |||
9dd5408c99 | |||
9510709575 | |||
5abcca9060 | |||
e01bfc19c3 | |||
4d32b63851 | |||
55d4de2283 | |||
61ee556aea | |||
ff24261ba0 | |||
fbc6dc525e | |||
9150d1eb69 | |||
b7f9843bec | |||
e64b0fca14 | |||
78ef214d2d | |||
be670b8e8f | |||
37084f6641 | |||
b04975733c | |||
c8b8fb0a99 | |||
8298018273 | |||
ae8d5a5c59 | |||
b9c9cb5f79 | |||
fdf9b959bc | |||
013877298d | |||
c87f95f991 | |||
f28aeff264 | |||
242a14a1f6 | |||
d5d904ff7d | |||
5620f840f6 | |||
b7a8c1bcfa | |||
7097bffba6 | |||
2aec7256ae | |||
815482d4eb | |||
9c14fe9681 | |||
e705755739 | |||
019f4c0371 | |||
2ab2c0d1f5 | |||
caf0f5f8b7 | |||
e4e50f60b1 | |||
6ef3e65a7b | |||
6fd138bed8 | |||
49bd8d5e2e | |||
3d2c2752c5 | |||
a713a86755 | |||
7bccd5fc8a | |||
3144eccf55 | |||
9db8f6c540 | |||
8e4041cf3f | |||
31487eb974 | |||
c2521c1ac6 | |||
643dc0fcfe | |||
36fce54816 | |||
2c15db829c | |||
f65dba7cdb | |||
605fd6392f | |||
f962790ee5 | |||
b7cc5f078e | |||
f7a10d8cd6 | |||
daac118bf4 | |||
8939f784d9 | |||
df0588a31f | |||
4ce3407d08 | |||
d7f9242e30 | |||
45024183ae | |||
33da98f493 | |||
4195096ea8 | |||
0bbcc8a10a | |||
b3ee552e4b | |||
a22b2fd19b | |||
c54c01f82d | |||
5a116e1302 | |||
a685751051 | |||
bd8f48c78b | |||
81aeafeb44 | |||
8bdc149441 | |||
020c5df52d | |||
da162c1135 | |||
5069594993 | |||
b996b88092 | |||
b83ef507b4 | |||
000f207944 | |||
fe5aa197b5 | |||
7882f1115e | |||
2b2d5d319b | |||
26c0f09935 | |||
c15cd29640 | |||
c38a67bcd5 | |||
363245ad94 | |||
7c5329e6f4 | |||
8fd65faece | |||
d7e215b42d | |||
3a528ffd89 | |||
3c90cc8b6f | |||
ae9a173b64 | |||
75822ca790 | |||
dadb836139 | |||
4719419951 | |||
c2d9c25f81 | |||
4d2fdb07c4 | |||
fe323a4800 | |||
f13da8af28 | |||
e228616c6e | |||
c58c2d63cb | |||
d04621daf4 | |||
76aaf1faae | |||
56fc078da8 | |||
0842b8241d | |||
59c307891a | |||
4d07b748c2 | |||
f5169501d2 | |||
186f4abe93 | |||
34cea6137e | |||
ffcfb7e3e0 | |||
c0af11abee | |||
1a241a2d02 | |||
acbb2374bc | |||
4edeac5bfa | |||
f592ff9868 | |||
24ee6b9721 | |||
a71b8d3b3b | |||
732fb3f8be | |||
008f247077 | |||
661cc229d2 | |||
b92d3c5343 | |||
ab6f6aee78 | |||
26e40542dd | |||
99a0baf370 | |||
d41ed6d243 | |||
815d2a36d8 | |||
e0b6e50ccd | |||
3a194cb4ec | |||
9b73471801 | |||
489ffc1182 | |||
0b23c222ba | |||
b51a4ebed4 | |||
9463637887 | |||
3cbecdd111 | |||
15846398ca | |||
c19ef77c31 | |||
b3277115a1 | |||
9bccdc7004 | |||
cf0cabbe50 | |||
556dbe7fe3 | |||
2417d41535 | |||
2c302cf66b | |||
c1fa3f4672 | |||
17f8deeb48 | |||
b8a03b6660 | |||
c60089c022 | |||
af59bddc4e | |||
23b35a634e | |||
74af9c700d | |||
d61aa5eb37 | |||
c3a65c3de0 | |||
ee4c091ce5 | |||
b494d6856c | |||
bc35ed3fb6 | |||
0c1c6f4b9f | |||
6d119c2a6b | |||
4201ba13e6 | |||
8bc0800d7c | |||
a089545e03 | |||
30dda24de3 | |||
9d5b29c881 | |||
6c031a35f3 | |||
271808b6b2 | |||
8d1fbe0cb2 | |||
a243abb80d | |||
42697bab3c | |||
94629e537f | |||
e84495cd8d | |||
7c20b7484c | |||
04a3d4d234 | |||
12afdc2ad6 | |||
f4ec8dce48 | |||
f3c21cb7a7 | |||
972efe60c3 | |||
4447fb2332 | |||
d77ac73790 | |||
1fe84be0f3 | |||
1076858f76 | |||
cccd70a275 | |||
eb3f008c9e | |||
f1e70fc2ff | |||
1560baacc6 | |||
460f61fac4 | |||
baa3e1845b | |||
aaf2b7c57a | |||
b687c85eab | |||
538b17a09c | |||
4e44598547 | |||
136078966b | |||
8a5f0a6357 | |||
c0bd51c090 | |||
c1c2fe2045 | |||
ddd53c392e | |||
79fc8496c6 | |||
0ce8c66fb0 | |||
906420cae3 | |||
16e2c8f771 | |||
dcae7b3fdc | |||
8e4988f1a2 | |||
a7acf868a5 | |||
6f0be93747 | |||
af62de104f | |||
cd55c6ccd7 | |||
621a2800ca | |||
b80e2ebc8d | |||
99d537a5e0 | |||
8854f3fe78 | |||
abe8cb763f | |||
a0758dfa1a |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.01.14
|
||||
[debug] youtube-dl version 2017.02.14
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -6,8 +6,14 @@ python:
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
before_script:
|
||||
- chmod +x ./devscripts/run_tests.sh
|
||||
script: ./devscripts/run_tests.sh
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
|
11
AUTHORS
11
AUTHORS
@ -191,3 +191,14 @@ Rich Leeper
|
||||
Zhong Jianxin
|
||||
Thor77
|
||||
Mattias Wadman
|
||||
Arjan Verwer
|
||||
Costy Petrisor
|
||||
Logan B
|
||||
Alex Seiler
|
||||
Vijay Singh
|
||||
Paul Hartmann
|
||||
Stephen Chen
|
||||
Fabian Stahl
|
||||
Bagira
|
||||
Odd Stråbø
|
||||
Philip Herzog
|
||||
|
245
ChangeLog
245
ChangeLog
@ -1,3 +1,248 @@
|
||||
version 2017.02.14
|
||||
|
||||
Core
|
||||
* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
|
||||
|
||||
Extractor
|
||||
* [zdf] Fix extraction (#12117)
|
||||
* [xtube] Fix extraction for both kinds of video id (#12088)
|
||||
* [xtube] Improve title extraction (#12088)
|
||||
+ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
|
||||
* [bellmedia] Allow video id longer than 6 characters (#12114)
|
||||
+ [limelight] Add support for referer protected videos
|
||||
* [disney] Improve extraction (#4975, #11000, #11882, #11936)
|
||||
* [hotstar] Improve extraction (#12096)
|
||||
* [einthusan] Fix extraction (#11416)
|
||||
+ [aenetworks] Add support for lifetimemovieclub.com (#12097)
|
||||
* [youtube] Fix parsing codecs (#12091)
|
||||
|
||||
|
||||
version 2017.02.11
|
||||
|
||||
Core
|
||||
+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
|
||||
utility functions
|
||||
+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
|
||||
|
||||
Extractor
|
||||
* [pluralsight:course] Fix extraction (#12075)
|
||||
+ [bbc] Extract m3u8 formats with 320k audio
|
||||
* [facebook] Relax video id matching (#11017, #12055, #12056)
|
||||
+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
|
||||
+ [pluralsight] Detect blocked account error message (#12070)
|
||||
+ [bloomberg] Add another video id pattern (#12062)
|
||||
* [extractor/commonmistakes] Restrict URL regular expression (#12050)
|
||||
+ [tvplayer] Add support for tvplayer.com
|
||||
|
||||
|
||||
version 2017.02.10
|
||||
|
||||
Extractors
|
||||
* [xtube] Fix extraction (#12023)
|
||||
* [pornhub] Fix extraction (#12007, #12018)
|
||||
* [facebook] Improve JS data regular expression (#12042)
|
||||
* [kaltura] Improve embed partner id extraction (#12041)
|
||||
+ [sprout] Add support for sproutonline.com
|
||||
* [6play] Improve extraction
|
||||
+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
|
||||
+ [go] Add support for Adobe Pass authentication (#11468, #10831)
|
||||
* [6play] Fix extraction (#12011)
|
||||
+ [nbc] Add support for Adobe Pass authentication (#12006)
|
||||
|
||||
|
||||
version 2017.02.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
|
||||
+ [downloader/fragment] Respect --no-part
|
||||
* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
|
||||
|
||||
Extractors
|
||||
* [pornhub] Fix extraction (#11997)
|
||||
+ [canalplus] Add support for cstar.fr (#11990)
|
||||
+ [extractor/generic] Improve RTMP support (#11993)
|
||||
+ [gaskrank] Add support for gaskrank.tv (#11685)
|
||||
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||
* [iwara] Fix extraction (#11781)
|
||||
* [googledrive] Fix extraction on Python 3.6
|
||||
+ [videopress] Add support for videopress.com
|
||||
+ [afreecatv] Extract RTMP formats
|
||||
|
||||
|
||||
version 2017.02.04.1
|
||||
|
||||
Extractors
|
||||
+ [twitch:stream] Add support for player.twitch.tv (#11971)
|
||||
* [radiocanada] Fix extraction for toutv rtmp formats
|
||||
|
||||
|
||||
version 2017.02.04
|
||||
|
||||
Core
|
||||
+ Add --playlist-random to shuffle playlists (#11889, #11901)
|
||||
* [utils] Improve comments processing in js_to_json (#11947)
|
||||
* [utils] Handle single-line comments in js_to_json
|
||||
* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
|
||||
|
||||
Extractors
|
||||
+ [piksel] Add another app token pattern (#11969)
|
||||
+ [vk] Capture and output author blocked error message (#11965)
|
||||
+ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
|
||||
#11800)
|
||||
+ [drtv] Add support for live and radio sections (#1827, #3427)
|
||||
* [myspace] Fix extraction and extract HLS and HTTP formats
|
||||
+ [youtube] Add format info for itag 325 and 328
|
||||
* [vine] Fix extraction (#11955)
|
||||
- [sportbox] Remove extractor (#11954)
|
||||
+ [filmon] Add support for filmon.com (#11187)
|
||||
+ [infoq] Add audio only formats (#11565)
|
||||
* [douyutv] Improve room id regular expression (#11931)
|
||||
* [iprima] Fix extraction (#11920, #11896)
|
||||
* [youtube] Fix ytsearch when cookies are provided (#11924)
|
||||
* [go] Relax video id regular expression (#11937)
|
||||
* [facebook] Fix title extraction (#11941)
|
||||
+ [youtube:playlist] Recognize TL playlists (#11945)
|
||||
+ [bilibili] Support new Bangumi URLs (#11845)
|
||||
+ [cbc:watch] Extract audio codec for audio only formats (#11893)
|
||||
+ [elpais] Fix extraction for some URLs (#11765)
|
||||
|
||||
|
||||
version 2017.02.01
|
||||
|
||||
Extractors
|
||||
+ [facebook] Add another fallback extraction scenario (#11926)
|
||||
* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
|
||||
- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
|
||||
+ [vimeo] Extract upload timestamp
|
||||
+ [vimeo] Extract license (#8726, #11880)
|
||||
+ [nrk:series] Add support for series (#11571, #11711)
|
||||
|
||||
|
||||
version 2017.01.31
|
||||
|
||||
Core
|
||||
+ [compat] Add compat_etree_register_namespace
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
|
||||
#11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
|
||||
#11913, #11914, #11915, #11916, #11917, #11918, #11919)
|
||||
+ [vimeo] Extract both mixed and separated DASH formats
|
||||
+ [ruutu] Extract DASH formats
|
||||
* [itv] Fix extraction for python 2.6
|
||||
|
||||
|
||||
version 2017.01.29
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix initialization template (#11605, #11825)
|
||||
+ [extractor/common] Document fragment_base_url and fragment's path fields
|
||||
* [extractor/common] Fix duration per DASH segment (#11868)
|
||||
+ Introduce --autonumber-start option for initial value of %(autonumber)s
|
||||
template (#727, #2702, #9362, #10457, #10529, #11862)
|
||||
|
||||
Extractors
|
||||
+ [azmedien:playlist] Add support for topic and themen playlists (#11817)
|
||||
* [npo] Fix subtitles extraction
|
||||
+ [itv] Extract subtitles
|
||||
+ [itv] Add support for itv.com (#9240)
|
||||
+ [mtv81] Add support for mtv81.com (#7619)
|
||||
+ [vlive] Add support for channels (#11826)
|
||||
+ [kaltura] Add fallback for fileExt
|
||||
+ [kaltura] Improve uploader_id extraction
|
||||
+ [konserthusetplay] Add support for rspoplay.se (#11828)
|
||||
|
||||
|
||||
version 2017.01.28
|
||||
|
||||
Core
|
||||
* [utils] Improve parse_duration
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Improve series and season metadata extraction (#11832)
|
||||
* [soundcloud] Improve formats extraction and extract audio bitrate
|
||||
+ [soundcloud] Extract HLS formats
|
||||
* [soundcloud] Fix track URL extraction (#11852)
|
||||
+ [twitch:vod] Expand URL regular expressions (#11846)
|
||||
* [aenetworks] Fix season episodes extraction (#11669)
|
||||
+ [tva] Add support for videos.tva.ca (#11842)
|
||||
* [jamendo] Improve and extract more metadata (#11836)
|
||||
+ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000)
|
||||
* [vevo] Remove request to old API and catch API v2 errors
|
||||
+ [cmt,mtv,southpark] Add support for episode URLs (#11837)
|
||||
+ [youtube] Add fallback for duration extraction (#11841)
|
||||
|
||||
|
||||
version 2017.01.25
|
||||
|
||||
Extractors
|
||||
+ [openload] Fallback video extension to mp4
|
||||
+ [extractor/generic] Add support for Openload embeds (#11536, #11812)
|
||||
* [srgssr] Fix rts video extraction (#11831)
|
||||
+ [afreecatv:global] Add support for afreeca.tv (#11807)
|
||||
+ [crackle] Extract vtt subtitles
|
||||
+ [crackle] Extract multiple resolutions for thumbnails
|
||||
+ [crackle] Add support for mobile URLs
|
||||
+ [konserthusetplay] Extract subtitles (#11823)
|
||||
+ [konserthusetplay] Add support for HLS videos (#11823)
|
||||
* [vimeo:review] Fix config URL extraction (#11821)
|
||||
|
||||
|
||||
version 2017.01.24
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Fix extraction (#11820)
|
||||
+ [nextmedia] Add support for NextTV (壹電視)
|
||||
* [24video] Fix extraction (#11811)
|
||||
* [youtube:playlist] Fix nonexistent and private playlist detection (#11604)
|
||||
+ [chirbit] Extract uploader (#11809)
|
||||
|
||||
|
||||
version 2017.01.22
|
||||
|
||||
Extractors
|
||||
+ [pornflip] Add support for pornflip.com (#11556, #11795)
|
||||
* [chaturbate] Fix extraction (#11797, #11802)
|
||||
+ [azmedien] Add support for AZ Medien sites (#11784, #11785)
|
||||
+ [nextmedia] Support redirected URLs
|
||||
+ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
|
||||
+ [youtube] Extract episode metadata (#9695, #11774)
|
||||
+ [cspan] Support Ustream embedded videos (#11547)
|
||||
+ [1tv] Add support for HLS videos (#11786)
|
||||
* [uol] Fix extraction (#11770)
|
||||
* [mtv] Relax triforce feed regular expression (#11766)
|
||||
|
||||
|
||||
version 2017.01.18
|
||||
|
||||
Extractors
|
||||
* [bilibili] Fix extraction (#11077)
|
||||
+ [canalplus] Add fallback for video id (#11764)
|
||||
* [20min] Fix extraction (#11683, #11751)
|
||||
* [imdb] Extend URL regular expression (#11744)
|
||||
+ [naver] Add support for tv.naver.com links (#11743)
|
||||
|
||||
|
||||
version 2017.01.16
|
||||
|
||||
Core
|
||||
* [options] Apply custom config to final composite configuration (#11741)
|
||||
* [YoutubeDL] Improve protocol auto determining (#11720)
|
||||
|
||||
Extractors
|
||||
* [xiami] Relax URL regular expressions
|
||||
* [xiami] Improve track metadata extraction (#11699)
|
||||
+ [limelight] Check hand-make direct HTTP links
|
||||
+ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
|
||||
+ [brightcove] Recognize another player ID pattern (#11688)
|
||||
+ [niconico] Support login via cookies (#7968)
|
||||
* [yourupload] Fix extraction (#11601)
|
||||
+ [beam:live] Add support for beam.pro live streams (#10702, #11596)
|
||||
* [vevo] Improve geo restriction detection
|
||||
+ [dramafever] Add support for URLs with language code (#11714)
|
||||
* [cbc] Improve playlist support (#11704)
|
||||
|
||||
|
||||
version 2017.01.14
|
||||
|
||||
Core
|
||||
|
52
README.md
52
README.md
@ -88,8 +88,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@ -99,16 +97,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
(experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
actual downloading. (experimental)
|
||||
actual downloading.
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
@ -139,23 +134,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter (experimental).
|
||||
Specify any key (see help for -o for a list
|
||||
of available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present,key > NUMBER (like "comment_count >
|
||||
12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, and & to require
|
||||
multiple matches. Values which are not
|
||||
known are excluded unless you put a
|
||||
question mark (?) after the operator.For
|
||||
example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
service), but who also have a description,
|
||||
use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
help for -o for a list of available keys)
|
||||
to match if the key is present, !key to
|
||||
check if the key is not present,key >
|
||||
NUMBER (like "comment_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, and & to require multiple
|
||||
matches. Values which are not known are
|
||||
excluded unless you put a question mark (?)
|
||||
after the operator.For example, to only
|
||||
match videos that have been liked more than
|
||||
100 times and disliked less than 50 times
|
||||
(or the dislike functionality is not
|
||||
available at the given service), but who
|
||||
also have a description, use --match-filter
|
||||
"like_count > 100 & dislike_count <? 50 &
|
||||
description" .
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist.
|
||||
--yes-playlist Download the playlist, if the URL refers to
|
||||
@ -178,6 +173,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@ -185,6 +182,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
@ -210,7 +208,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--autonumber-size NUMBER Specify the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
is given
|
||||
is given (default is 5)
|
||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||
(default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"vorbis", "mp3", "m4a", "opus", or "wav";
|
||||
"best" by default
|
||||
"best" by default; No effect without -x
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||
a value between 0 (better) and 9 (worse)
|
||||
for VBR or a specific bitrate like 128K
|
||||
@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
|
19
devscripts/run_tests.sh
Normal file
19
devscripts/run_tests.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter"
|
||||
|
||||
test_set=""
|
||||
|
||||
case "$YTDL_TEST_SET" in
|
||||
core)
|
||||
test_set="-I test_($DOWNLOAD_TESTS)\.py"
|
||||
;;
|
||||
download)
|
||||
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
|
||||
nosetests test --verbose $test_set
|
@ -11,6 +11,7 @@
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **6play**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
@ -33,7 +34,8 @@
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:global**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
@ -74,6 +76,8 @@
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@ -81,11 +85,13 @@
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **Beam:live**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@ -163,6 +169,7 @@
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
@ -198,6 +205,7 @@
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **Disney**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@ -206,7 +214,8 @@
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **drtv**
|
||||
- **drtv:live**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
@ -242,6 +251,8 @@
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **fernsehkritik.tv**
|
||||
- **filmon**
|
||||
- **filmon:channel**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
@ -273,6 +284,7 @@
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gaskrank**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
@ -298,7 +310,6 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **hgtv.com:show**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
@ -332,6 +343,7 @@
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITV**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
@ -440,6 +452,7 @@
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv81**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
@ -482,6 +495,7 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **NextTV**: 壹電視
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
@ -520,6 +534,7 @@
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
@ -571,6 +586,7 @@
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PornCom**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
@ -652,6 +668,7 @@
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **scrippsnetworks:watch**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
@ -661,7 +678,6 @@
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
@ -693,10 +709,10 @@
|
||||
- **Spiegeltv**
|
||||
- **Spike**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
@ -779,6 +795,7 @@
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
@ -789,6 +806,7 @@
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@ -845,6 +863,7 @@
|
||||
- **videomore:season**
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **vidme**
|
||||
@ -879,6 +898,7 @@
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **Vodlocker**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
|
@ -34,6 +34,9 @@ from youtube_dl.utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@ -510,6 +513,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@ -784,12 +788,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{ 0: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ 0: // comment\n1 }')
|
||||
self.assertEqual(json.loads(on), {'0': 1})
|
||||
|
||||
on = js_to_json(r'["<p>x<\/p>"]')
|
||||
self.assertEqual(json.loads(on), ['<p>x</p>'])
|
||||
|
||||
@ -799,15 +818,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json("['a\\\nb']")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json('{0xff:0xff}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{077:077}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{42:42}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
@ -1096,6 +1127,32 @@ The first line
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
def test_get_element_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||
|
||||
def test_get_elements_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||
|
||||
def test_get_elements_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -24,6 +24,7 @@ import sys
|
||||
import time
|
||||
import tokenize
|
||||
import traceback
|
||||
import random
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
@ -159,6 +160,7 @@ class YoutubeDL(object):
|
||||
playlistend: Playlist item to end at.
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistreverse: Download playlist items in reverse order.
|
||||
playlistrandom: Download playlist items in random order.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
@ -584,7 +586,7 @@ class YoutubeDL(object):
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
|
||||
if template_dict.get('resolution') is None:
|
||||
@ -842,6 +844,9 @@ class YoutubeDL(object):
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
extra = {
|
||||
@ -1363,7 +1368,7 @@ class YoutubeDL(object):
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Automatically determine protocol if missing (useful for format
|
||||
# selection purposes)
|
||||
if 'protocol' not in format:
|
||||
if format.get('protocol') is None:
|
||||
format['protocol'] = determine_protocol(format)
|
||||
# Add HTTP headers, so that external programs can use them from the
|
||||
# json output
|
||||
|
@ -133,6 +133,12 @@ def _real_main(argv=None):
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.autonumber_size is not None:
|
||||
if opts.autonumber_size <= 0:
|
||||
parser.error('auto number size must be positive')
|
||||
if opts.autonumber_start is not None:
|
||||
if opts.autonumber_start < 0:
|
||||
parser.error('auto number start must be positive or 0')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
@ -321,6 +327,7 @@ def _real_main(argv=None):
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
@ -337,6 +344,7 @@ def _real_main(argv=None):
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
'playlistrandom': opts.playlist_random,
|
||||
'noplaylist': opts.noplaylist,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
'consoletitle': opts.consoletitle,
|
||||
|
@ -2529,6 +2529,24 @@ else:
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
if hasattr(etree, 'register_namespace'):
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
else:
|
||||
def compat_etree_register_namespace(prefix, uri):
|
||||
"""Register a namespace prefix.
|
||||
The registry is global, and any existing mapping for either the
|
||||
given prefix or the namespace URI will be removed.
|
||||
*prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
|
||||
attributes in this namespace will be serialized with prefix if possible.
|
||||
ValueError is raised if prefix is reserved or is invalid.
|
||||
"""
|
||||
if re.match(r"ns\d+$", prefix):
|
||||
raise ValueError("Prefix format reserved for internal use")
|
||||
for k, v in list(etree._namespace_map.items()):
|
||||
if k == uri or v == prefix:
|
||||
del etree._namespace_map[k]
|
||||
etree._namespace_map[uri] = prefix
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
@ -2865,6 +2883,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
)
|
||||
|
||||
|
||||
@ -198,6 +199,15 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||
# can cause problems in some cases
|
||||
# https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
@ -264,7 +274,9 @@ class FFmpegFD(ExternalFD):
|
||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
args += ['-f', 'mp4']
|
||||
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
else:
|
||||
|
@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'nopart': self.params.get('nopart', False),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||
@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
@ -87,7 +91,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_NAME = 'afreecatv'
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@ -143,3 +144,107 @@ class AfreecaTVIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVGlobalIE(AfreecaTVIE):
|
||||
IE_NAME = 'afreecatv:global'
|
||||
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://afreeca.tv/36853014/v/58301',
|
||||
'info_dict': {
|
||||
'id': '58301',
|
||||
'title': 'tryhard top100',
|
||||
'uploader_id': '36853014',
|
||||
'uploader': 'makgi Hearthstone Live!',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_type = 'video' if video_id else 'live'
|
||||
query = {
|
||||
'pt': 'view',
|
||||
'bid': channel_id,
|
||||
}
|
||||
if video_id:
|
||||
query['vno'] = video_id
|
||||
video_data = self._download_json(
|
||||
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
|
||||
video_id or channel_id, query=query)['channel']
|
||||
|
||||
if video_data.get('result') != 1:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
info = {
|
||||
'thumbnail': video_data.get('thumb'),
|
||||
'view_count': int_or_none(video_data.get('vcnt')),
|
||||
'age_limit': int_or_none(video_data.get('grade')),
|
||||
'uploader_id': channel_id,
|
||||
'uploader': video_data.get('cname'),
|
||||
}
|
||||
|
||||
if video_id:
|
||||
entries = []
|
||||
for i, f in enumerate(video_data.get('flist', [])):
|
||||
video_key = self.parse_video_key(f.get('key', ''))
|
||||
f_url = f.get('file')
|
||||
if not video_key or not f_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(f.get('length')),
|
||||
'url': f_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
i = entries[0].copy()
|
||||
i.update(info)
|
||||
info = i
|
||||
else:
|
||||
formats = []
|
||||
for s in video_data.get('strm', []):
|
||||
s_url = s.get('purl')
|
||||
if not s_url:
|
||||
continue
|
||||
stype = s.get('stype')
|
||||
if stype == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||
elif stype == 'RTMP':
|
||||
format_id = [stype]
|
||||
label = s.get('label')
|
||||
if label:
|
||||
format_id.append(label)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': s_url,
|
||||
'tbr': int_or_none(s.get('bps')),
|
||||
'height': int_or_none(s.get('brt')),
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': channel_id,
|
||||
'title': self._live_title(title),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return info
|
||||
|
172
youtube_dl/extractor/azmedien.py
Normal file
172
youtube_dl/extractor/azmedien.py
Normal file
@ -0,0 +1,172 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'ext': 'mov',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
entries = []
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# not all tracks have songs
|
||||
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||
'info_dict': {
|
||||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
|
@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
|
73
youtube_dl/extractor/beampro.py
Normal file
73
youtube_dl/extractor/beampro.py
Normal file
@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class BeamProLiveIE(InfoExtractor):
|
||||
IE_NAME = 'Beam:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
_TEST = {
|
||||
'url': 'http://www.beam.pro/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||
'thumbnail': r're:https://.*\.jpg$',
|
||||
'timestamp': 1483477281,
|
||||
'upload_date': '20170103',
|
||||
'uploader': 'niterhayven',
|
||||
'uploader_id': '373396',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'niterhayven is offline',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
'{0} is offline'.format(channel_name), expected=True)
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
|
||||
return {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
|
||||
space
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
|
@ -5,19 +5,27 @@ import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor):
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}]
|
||||
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
|
||||
elif 'code' in result:
|
||||
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Can\'t extract Bangumi episode ID')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
anime_id = mobj.group('anime_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/v' not in url:
|
||||
if 'anime/' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
headers=headers)
|
||||
if 'result' not in js:
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
video_id, note='Downloading video info page',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
||||
entries = []
|
||||
|
||||
@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
|
||||
|
||||
IE_NAME = 'bangumi.bilibili.com'
|
||||
IE_DESC = 'BiliBili番剧'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '91da8621454dd58316851c27c68b0c13',
|
||||
'info_dict': {
|
||||
'id': '40062',
|
||||
'ext': 'mp4',
|
||||
'title': '混沌武士',
|
||||
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
|
||||
'timestamp': 1414538739,
|
||||
'upload_date': '20141028',
|
||||
'episode': '疾风怒涛 Tempestuous Temperaments',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlist_items': '1',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
bangumi_id = self._match_id(url)
|
||||
|
||||
# Sometimes this API returns a JSONP response
|
||||
season_info = self._download_json(
|
||||
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
|
||||
bangumi_id, transform_source=strip_jsonp)['result']
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
|
||||
'episode': episode.get('index_title'),
|
||||
'episode_number': int_or_none(episode.get('index')),
|
||||
} for episode in season_info['episodes']]
|
||||
|
||||
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# data-bmmrid=
|
||||
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='url', default=None)
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='id', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
|
@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
params = {}
|
||||
|
||||
playerID = find_param('playerID')
|
||||
playerID = find_param('playerID') or find_param('playerId')
|
||||
if playerID is None:
|
||||
raise ExtractorError('Cannot find player ID')
|
||||
params['playerID'] = playerID
|
||||
|
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:(?:football|www)\.)?cstar\.fr|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'cstar': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
@ -86,6 +88,19 @@ class CanalplusIE(InfoExtractor):
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||
'info_dict': {
|
||||
'id': '1416769',
|
||||
'display_id': 'pid7566-feminines-videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||
'upload_date': '20160921',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
@ -107,7 +122,7 @@ class CanalplusIE(InfoExtractor):
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||
r'data-video=["\'](?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
webpage, 'video id', default=mobj.group('vid'), group='id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
|
@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_player_init(self, player_init, display_id):
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_init = self._search_regex(
|
||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||
default=None)
|
||||
if player_init:
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
entries = [
|
||||
self._extract_player_init(player_init, display_id)
|
||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||
entries.extend([
|
||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
self._og_search_title(webpage, fatal=False),
|
||||
self._og_search_description(webpage))
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
@ -283,6 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
for f in formats:
|
||||
format_id = f.get('format_id')
|
||||
if format_id.startswith('AAC'):
|
||||
f['acodec'] = 'aac'
|
||||
elif format_id.startswith('AC3'):
|
||||
f['acodec'] = 'ac-3'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
@ -31,30 +33,35 @@ class ChaturbateIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage,
|
||||
'playlist', default=None, group='url')
|
||||
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
|
||||
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
|
||||
|
||||
if not m3u8_url:
|
||||
if not m3u8_formats:
|
||||
error = self._search_regex(
|
||||
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
||||
webpage, 'error', group='error', default=None)
|
||||
if not error:
|
||||
if any(p not in webpage for p in (
|
||||
if any(p in webpage for p in (
|
||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||
error = self._ROOM_OFFLINE
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
formats = []
|
||||
for m3u8_id, m3u8_url in m3u8_formats:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4',
|
||||
# ffmpeg skips segments for fast m3u8
|
||||
preference=-10 if m3u8_id == 'fast' else None,
|
||||
m3u8_id=m3u8_id, fatal=False, live=True))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(video_id),
|
||||
'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id,
|
||||
'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
|
@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor):
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
'uploader': 'Gerryaudio',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor):
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ from .mtv import MTVIE
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
|
@ -121,9 +121,19 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* fragment_base_url
|
||||
Base URL for fragments. Each fragment's path
|
||||
value (if present) will be relative to
|
||||
this URL.
|
||||
* fragments A list of fragments of a fragmented media.
|
||||
Each fragment entry must contain either an url
|
||||
or a path. If an url is present it should be
|
||||
considered by a client. Otherwise both path and
|
||||
fragment_base_url must be present. Here is
|
||||
the list of all potential fields:
|
||||
* "url" - fragment's URL
|
||||
* "path" - fragment's path relative to
|
||||
fragment_base_url
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
@ -1015,13 +1025,13 @@ class InfoExtractor(object):
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return True
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
@ -1198,6 +1208,9 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
@ -1305,8 +1318,8 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
@ -1627,12 +1640,12 @@ class InfoExtractor(object):
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
media = segment_template.get('media')
|
||||
if media:
|
||||
ms_info['media'] = media
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
ms_info['initialization'] = initialization
|
||||
else:
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
@ -1676,6 +1689,7 @@ class InfoExtractor(object):
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
@ -1683,7 +1697,7 @@ class InfoExtractor(object):
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'tbr': int_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
@ -1692,13 +1706,32 @@ class InfoExtractor(object):
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
@ -1710,7 +1743,7 @@ class InfoExtractor(object):
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
@ -1728,7 +1761,7 @@ class InfoExtractor(object):
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
@ -1751,14 +1784,16 @@ class InfoExtractor(object):
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
'url': representation_ms_info['segment_urls'][segment_index],
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
@ -1768,7 +1803,7 @@ class InfoExtractor(object):
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
@ -1927,7 +1962,12 @@ class InfoExtractor(object):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
|
@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
||||
class CommonMistakesIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:url|URL)
|
||||
(?:url|URL)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
|
72
youtube_dl/extractor/corus.py
Normal file
72
youtube_dl/extractor/corus.py
Normal file
@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CorusIE(ThePlatformFeedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
'title': 'Movie Night Popcorn with Bryan',
|
||||
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'upload_date': '20170206',
|
||||
'timestamp': 1486392197,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TP_FEEDS = {
|
||||
'globaltv': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'etcanada': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'hgtv': {
|
||||
'feed_id': 'L0BMHXi2no43',
|
||||
'account_id': 2414428465,
|
||||
},
|
||||
'foodnetwork': {
|
||||
'feed_id': 'ukK8o58zbRmJ',
|
||||
'account_id': 2414429569,
|
||||
},
|
||||
'slice': {
|
||||
'feed_id': '5tUJLgV2YNJ5',
|
||||
'account_id': 2414427935,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
||||
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
||||
'episode_number': int_or_none(e.get('pl1$episode')),
|
||||
'season_number': int_or_none(e.get('pl1$season')),
|
||||
'series': e.get('pl1$show'),
|
||||
}, {
|
||||
'HLS': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'DesktopHLS Default': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'MP4 MBR': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
}, feed_info['account_id'])
|
@ -6,7 +6,7 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
'info_dict': {
|
||||
@ -31,8 +31,32 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
(120, 90),
|
||||
(208, 156),
|
||||
(220, 124),
|
||||
(220, 220),
|
||||
(240, 180),
|
||||
(250, 141),
|
||||
(315, 236),
|
||||
(320, 180),
|
||||
(360, 203),
|
||||
(400, 300),
|
||||
(421, 316),
|
||||
(460, 330),
|
||||
(460, 460),
|
||||
(462, 260),
|
||||
(480, 270),
|
||||
(587, 330),
|
||||
(640, 480),
|
||||
(700, 330),
|
||||
(700, 394),
|
||||
(854, 480),
|
||||
(1024, 1024),
|
||||
(1920, 1080),
|
||||
]
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
@ -61,17 +85,25 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
video_id, headers=self.geo_verification_headers()).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnail = None
|
||||
thumbnails = []
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
for width, height in self._THUMBNAIL_RES:
|
||||
res = '%dx%d' % (width, height)
|
||||
thumbnails.append({
|
||||
'id': res,
|
||||
'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'resolution': res,
|
||||
})
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
@ -86,10 +118,11 @@ class CrackleIE(InfoExtractor):
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')):
|
||||
subtitles.setdefault(locale, []).append({
|
||||
'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
@ -100,7 +133,7 @@ class CrackleIE(InfoExtractor):
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
'upload_date': '20170118',
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@ -236,8 +255,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: no
|
||||
|
||||
output += """
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
@ -439,6 +457,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
|
||||
webpage, 'series', default=xpath_text(metadata, 'series_title'))
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
|
||||
webpage, 'season number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@ -446,9 +476,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': xpath_text(metadata, 'series_title'),
|
||||
'episode': xpath_text(metadata, 'episode_title'),
|
||||
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
@ -22,14 +23,13 @@ class CSpanIE(InfoExtractor):
|
||||
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
|
||||
'info_dict': {
|
||||
'id': '315139',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'c4486943',
|
||||
'ext': 'mp4',
|
||||
@ -38,14 +38,11 @@ class CSpanIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
||||
'md5': '2ae5051559169baadba13fc35345ae74',
|
||||
'info_dict': {
|
||||
'id': '342759',
|
||||
'ext': 'mp4',
|
||||
'title': 'General Motors Ignition Switch Recall',
|
||||
'duration': 14848,
|
||||
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
# Video from senate.gov
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
@ -57,12 +54,30 @@ class CSpanIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}, {
|
||||
# Ustream embedded video
|
||||
'url': 'https://www.c-span.org/video/?114917-1/armed-services',
|
||||
'info_dict': {
|
||||
'id': '58428542',
|
||||
'ext': 'flv',
|
||||
'title': 'USHR07 Armed Services Committee',
|
||||
'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
|
||||
'timestamp': 1423060374,
|
||||
'upload_date': '20150204',
|
||||
'uploader': 'HouseCommittee',
|
||||
'uploader_id': '12987475',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_type = None
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
ustream_url = UstreamIE._extract_url(webpage)
|
||||
if ustream_url:
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||
|
159
youtube_dl/extractor/disney.py
Normal file
159
youtube_dl/extractor/disney.py
Normal file
@ -0,0 +1,159 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DisneyIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||
_TESTS = [{
|
||||
# Disney.EmbedVideo
|
||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||
'info_dict': {
|
||||
'id': '545ed1857afee5a0ec239977',
|
||||
'ext': 'mp4',
|
||||
'title': 'Moana - Trailer',
|
||||
'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
|
||||
'upload_date': '20170112',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# Grill.burger
|
||||
'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
|
||||
'info_dict': {
|
||||
'id': '5454e9f4e9804a552e3524c8',
|
||||
'ext': 'mp4',
|
||||
'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
|
||||
'upload_date': '20170104',
|
||||
'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
|
||||
r'Grill\.burger\s*=\s*({.+})\s*:',
|
||||
webpage, 'grill data'))
|
||||
page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
|
||||
video_data = page_data['data'][0]
|
||||
else:
|
||||
webpage = self._download_webpage(
|
||||
'http://%s/embed/%s' % (domain, video_id), video_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'Disney\.EmbedVideo\s*=\s*({.+});',
|
||||
webpage, 'embed data'), video_id)
|
||||
video_data = page_data['video']
|
||||
|
||||
for external in video_data.get('externals', []):
|
||||
if external.get('source') == 'vevo':
|
||||
return self.url_result('vevo:' + external['data_id'], 'Vevo')
|
||||
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for flavor in video_data.get('flavors', []):
|
||||
flavor_format = flavor.get('format')
|
||||
flavor_url = flavor.get('url')
|
||||
if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
|
||||
continue
|
||||
tbr = int_or_none(flavor.get('bitrate'))
|
||||
if tbr == 99999:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
flavor_url, video_id, 'mp4',
|
||||
m3u8_id=flavor_format, fatal=False))
|
||||
continue
|
||||
format_id = []
|
||||
if flavor_format:
|
||||
format_id.append(flavor_format)
|
||||
if tbr:
|
||||
format_id.append(compat_str(tbr))
|
||||
ext = determine_ext(flavor_url)
|
||||
if flavor_format == 'applehttp' or ext == 'm3u8':
|
||||
ext = 'mp4'
|
||||
width = int_or_none(flavor.get('width'))
|
||||
height = int_or_none(flavor.get('height'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': flavor_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if (width == 0 and height == 0) else None,
|
||||
})
|
||||
if not formats and video_data.get('expired'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
|
||||
expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
caption_url = caption.get('url')
|
||||
caption_format = caption.get('format')
|
||||
if not caption_url or caption_format.startswith('unknown'):
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': {
|
||||
'webvtt': 'vtt',
|
||||
}.get(caption_format, caption_format),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description') or video_data.get('short_desc'),
|
||||
'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
|
||||
'duration': int_or_none(video_data.get('duration_sec')),
|
||||
'upload_date': unified_strdate(video_data.get('publish_date')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -18,7 +18,7 @@ from ..utils import (
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.douyu.com/xiaocang',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# \"room_id\"
|
||||
'url': 'http://www.douyu.com/t/lpl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
|
@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE):
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
||||
'info_dict': {
|
||||
|
@ -9,12 +9,13 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset.get('Kind') == 'Image':
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor):
|
||||
preference = -1
|
||||
format_id += '-spoken-subtitles'
|
||||
if target == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
video_id, preference, f4m_id=format_id))
|
||||
video_id, preference, f4m_id=format_id)
|
||||
if kind == 'AudioResource':
|
||||
for f in f4m_formats:
|
||||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DRTVLiveIE(InfoExtractor):
|
||||
IE_NAME = 'drtv:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/live/dr1',
|
||||
'info_dict': {
|
||||
'id': 'dr1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
|
||||
channel_id)
|
||||
title = self._live_title(channel_data['Title'])
|
||||
|
||||
formats = []
|
||||
for streaming_server in channel_data.get('StreamingServers', []):
|
||||
server = streaming_server.get('Server')
|
||||
if not server:
|
||||
continue
|
||||
link_type = streaming_server.get('LinkType')
|
||||
for quality in streaming_server.get('Qualities', []):
|
||||
for stream in quality.get('Streams', []):
|
||||
stream_path = stream.get('Stream')
|
||||
if not stream_path:
|
||||
continue
|
||||
stream_url = update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'b': ''})
|
||||
if link_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, channel_id, 'mp4',
|
||||
m3u8_id=link_type, fatal=False, live=True))
|
||||
elif link_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
|
||||
channel_id, f4m_id=link_type, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': title,
|
||||
'thumbnail': channel_data.get('PrimaryImageUri'),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@ -1,67 +1,94 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ek Villain',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
||||
}
|
||||
},
|
||||
]
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
'id': '9097',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ae Dil Hai Mushkil',
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(base64.b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
).encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||
|
||||
m3u8_url = self._download_webpage(
|
||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
||||
% video_id, video_id, headers={'Referer': url})
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
'EJOutcomes': player_params['data-ejpingables'],
|
||||
'NativeHLS': False
|
||||
}),
|
||||
'arcVersion': 3,
|
||||
'appVersion': 59,
|
||||
'gorilla.csrf.Token': page_id,
|
||||
}))['Data']
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||
raise ExtractorError(
|
||||
'Download rate reached. Please try again later.', expected=True)
|
||||
|
||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = ej_links.get('HLSLink')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
|
||||
mp4_url = ej_links.get('MP4Link')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = get_elements_by_class('synopsis', webpage)[0]
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||
webpage, "thumbnail url", fatal=False)
|
||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||
webpage, 'thumbnail url', fatal=False, group='url')
|
||||
if thumbnail is not None:
|
||||
thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import strip_jsonp, unified_strdate
|
||||
|
||||
|
||||
class ElPaisIE(InfoExtractor):
|
||||
@ -29,6 +29,16 @@ class ElPaisIE(InfoExtractor):
|
||||
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
|
||||
'upload_date': '20160303',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
|
||||
'md5': '9c79923a118a067e1a45789e1e0b0f9c',
|
||||
'info_dict': {
|
||||
'id': '1485456786_417876',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
|
||||
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
||||
'upload_date': '20170127',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -37,8 +47,15 @@ class ElPaisIE(InfoExtractor):
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
id_multimedia = self._search_regex(
|
||||
r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
|
||||
if id_multimedia:
|
||||
url_info = self._download_json(
|
||||
'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
|
||||
video_suffix = url_info['mp4']
|
||||
else:
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||
|
@ -30,7 +30,10 @@ from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVGlobalIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@ -77,6 +80,10 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@ -88,6 +95,7 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beampro import BeamProLiveIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
@ -95,7 +103,10 @@ from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
@ -191,6 +202,7 @@ from .commonprotocols import (
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
@ -237,12 +249,16 @@ from .dramafever import (
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
DRTVLiveIE,
|
||||
)
|
||||
from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import DiscoveryGoIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
@ -287,6 +303,10 @@ from .fc2 import (
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
)
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@ -330,6 +350,7 @@ from .gameone import (
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
@ -361,10 +382,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import (
|
||||
HGTVIE,
|
||||
HGTVComShowIE,
|
||||
)
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
@ -406,6 +424,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import ITVIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@ -544,6 +563,7 @@ from .mtv import (
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
MTV81IE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
@ -593,6 +613,7 @@ from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
NextTVIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
@ -656,6 +677,7 @@ from .nrk import (
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
@ -719,6 +741,7 @@ from .polskieradio import (
|
||||
)
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@ -813,6 +836,7 @@ from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
@ -867,12 +891,10 @@ from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import (
|
||||
SportBoxIE,
|
||||
SportBoxEmbedIE,
|
||||
)
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@ -974,6 +996,7 @@ from .tv2 import (
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tva import TVAIE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
@ -994,6 +1017,7 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@ -1073,6 +1097,7 @@ from .videomore import (
|
||||
VideomoreSeasonIE,
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
@ -1117,7 +1142,10 @@ from .vk import (
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLiveChannelIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -12,14 +13,16 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@ -71,7 +74,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@ -132,6 +135,46 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
|
||||
'info_dict': {
|
||||
'id': '1072691702860471',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
|
||||
'timestamp': 1477305000,
|
||||
'upload_date': '20161024',
|
||||
'uploader': 'La Guía Del Varón',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -243,14 +286,30 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
def extract_video_data(instances):
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id'):
|
||||
return video_item['videoData']
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
video_data = video_item['videoData']
|
||||
break
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||
'server js data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(try_get(
|
||||
server_js_data, lambda x: x['jsmods']['instances'],
|
||||
list) or [])
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
@ -300,10 +359,16 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title')
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
178
youtube_dl/extractor/filmon.py
Normal file
178
youtube_dl/extractor/filmon.py
Normal file
@ -0,0 +1,178 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FilmOnIE(InfoExtractor):
|
||||
IE_NAME = 'filmon'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
|
||||
'info_dict': {
|
||||
'id': '24869',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plan 9 From Outer Space',
|
||||
'description': 'Dead human, zombies and vampires',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
|
||||
'info_dict': {
|
||||
'id': '2825',
|
||||
'title': 'Popeye Series 1',
|
||||
'description': 'The original series of Popeye.',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
title = response['title']
|
||||
description = strip_or_none(response.get('description'))
|
||||
|
||||
if response.get('type_id') == 1:
|
||||
entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for format_id, stream in response.get('streams', {}).items():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(stream.get('quality')),
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
poster = response.get('poster', {})
|
||||
thumbs = poster.get('thumbs', {})
|
||||
thumbs['poster'] = poster
|
||||
for thumb_id, thumb in thumbs.items():
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumb_id,
|
||||
'url': thumb_url,
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class FilmOnChannelIE(InfoExtractor):
|
||||
IE_NAME = 'filmon:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
# VOD
|
||||
'url': 'http://www.filmon.com/tv/sports-haters',
|
||||
'info_dict': {
|
||||
'id': '4190',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sports Haters',
|
||||
'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
|
||||
},
|
||||
}, {
|
||||
# LIVE
|
||||
'url': 'https://www.filmon.com/channel/filmon-sports',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/tv/2894',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
('logo', 56, 28),
|
||||
('big_logo', 106, 106),
|
||||
('extra_big_logo', 300, 300),
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
channel_id = compat_str(channel_data['id'])
|
||||
is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
|
||||
title = channel_data['title']
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for stream in channel_data.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
if not is_live:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
|
||||
continue
|
||||
quality = stream.get('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
# this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
|
||||
# because it doesn't have bitrate variants anyway
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for name, width, height in self._THUMBNAIL_RES:
|
||||
thumbnails.append({
|
||||
'id': name,
|
||||
'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'display_id': channel_data.get('alias'),
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': channel_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor):
|
||||
title = item['title']
|
||||
quality = qualities(QUALITIES)
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
if not path:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?)_\d+\.mp4', src,
|
||||
'm3u8 path', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'quality': quality(f.get('name')),
|
||||
'source_preference': quality(f.get('name')),
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
# is taken from [2].
|
||||
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
|
||||
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
|
||||
if not path and len(formats) == 1:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?$)', formats[0]['url'],
|
||||
'm3u8 path', default=None)
|
||||
if path:
|
||||
if len(formats) == 1:
|
||||
m3u8_path = ','
|
||||
else:
|
||||
tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
|
||||
m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
|
||||
% (path, m3u8_path),
|
||||
display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
||||
|
@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor):
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# Note that this only retrieves comments that are initially loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
|
123
youtube_dl/extractor/gaskrank.py
Normal file
123
youtube_dl/extractor/gaskrank.py
Normal file
@ -0,0 +1,123 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GaskrankIE(InfoExtractor):
|
||||
"""InfoExtractor for gaskrank.tv"""
|
||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||
'info_dict': {
|
||||
'id': '201601/26955',
|
||||
'ext': 'mp4',
|
||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['motorrad-fun'],
|
||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||
'uploader_id': 'Bikefun',
|
||||
'upload_date': '20170110',
|
||||
'uploader_url': None,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||
'info_dict': {
|
||||
'id': '201106/15920',
|
||||
'ext': 'mp4',
|
||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['racing'],
|
||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||
'uploader_id': 'IOM',
|
||||
'upload_date': '20160506',
|
||||
'uploader_url': 'www.iomtt.com',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""extract information from gaskrank.tv"""
|
||||
def fix_json(code):
|
||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||
title = self._search_regex(
|
||||
r'movieName\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
mobj = re.search(
|
||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
uploader_id = mobj.groupdict().get('uploader_id')
|
||||
upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
|
||||
|
||||
uploader_url = self._search_regex(
|
||||
r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
|
||||
webpage, 'uploader_url', default=None)
|
||||
tags = re.findall(
|
||||
r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
|
||||
webpage)
|
||||
|
||||
view_count = self._search_regex(
|
||||
r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
|
||||
webpage, 'view_count', default=None)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace('.', ''))
|
||||
|
||||
average_rating = self._search_regex(
|
||||
r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
|
||||
webpage, 'average_rating')
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||
webpage, 'playlist', default='{}'),
|
||||
display_id, transform_source=fix_json, fatal=False)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||
playlist.get('0').get('src'), 'video id')
|
||||
|
||||
formats = []
|
||||
for key in playlist:
|
||||
formats.append({
|
||||
'url': playlist[key]['src'],
|
||||
'format_id': key,
|
||||
'quality': playlist[key].get('quality')})
|
||||
self._sort_formats(formats, field_preference=['format_id'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'display_id': display_id,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'uploader_url': uploader_url,
|
||||
'tags': tags,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
}
|
@ -29,6 +29,7 @@ from ..utils import (
|
||||
UnsupportedError,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
@ -78,6 +79,10 @@ from .vbox7 import Vbox7IE
|
||||
from .dbtv import DBTVIE
|
||||
from .piksel import PikselIE
|
||||
from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -422,6 +427,26 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove with alternative playerID key
|
||||
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
|
||||
'info_dict': {
|
||||
'id': 'nmeth.2062_SV1',
|
||||
'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '2228375078001',
|
||||
'ext': 'mp4',
|
||||
'title': 'nmeth.2062-sv1',
|
||||
'description': 'nmeth.2062-sv1',
|
||||
'timestamp': 1363357591,
|
||||
'upload_date': '20130315',
|
||||
'uploader': 'Nature Publishing Group',
|
||||
'uploader_id': '1964492299001',
|
||||
},
|
||||
}],
|
||||
},
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
@ -567,17 +592,6 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
||||
}
|
||||
},
|
||||
# Embedded Ustream video
|
||||
{
|
||||
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
|
||||
'md5': '27b99cdb639c9b12a79bca876a073417',
|
||||
'info_dict': {
|
||||
'id': '45734260',
|
||||
'ext': 'flv',
|
||||
'uploader': 'AU SPA: The NSA and Privacy',
|
||||
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
|
||||
}
|
||||
},
|
||||
# nowvideo embed hidden behind percent encoding
|
||||
{
|
||||
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||
@ -934,6 +948,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
},
|
||||
# jwplayer rtmp
|
||||
{
|
||||
'url': 'http://www.suffolk.edu/sjc/',
|
||||
'info_dict': {
|
||||
'id': 'sjclive',
|
||||
'ext': 'flv',
|
||||
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
|
||||
'uploader': 'www.suffolk.edu',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@ -964,19 +991,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Kaltura embed protected with referrer
|
||||
{
|
||||
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
|
||||
'info_dict': {
|
||||
'id': '1_g4fbemnq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violetta - Achter De Schermen - Ruggero',
|
||||
'description': 'Achter de schermen met Ruggero',
|
||||
'timestamp': 1435133761,
|
||||
'upload_date': '20150624',
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Kaltura embed with single quotes
|
||||
{
|
||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||
@ -1448,6 +1462,35 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
{
|
||||
# 20 minuten embed
|
||||
'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
|
||||
'info_dict': {
|
||||
'id': '523629',
|
||||
'ext': 'mp4',
|
||||
'title': 'So kommen Sie bei Eis und Schnee sicher an',
|
||||
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# VideoPress embed
|
||||
'url': 'https://en.support.wordpress.com/videopress/',
|
||||
'info_dict': {
|
||||
'id': 'OcobLTqC',
|
||||
'ext': 'm4v',
|
||||
'title': 'IMG_5786',
|
||||
'timestamp': 1435711927,
|
||||
'upload_date': '20150701',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VideoPressIE.ie_key()],
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2077,10 +2120,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
# Look for embedded Ustream videos
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Ustream')
|
||||
ustream_url = UstreamIE._extract_url(webpage)
|
||||
if ustream_url:
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
@ -2295,8 +2337,9 @@ class GenericIE(InfoExtractor):
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
return self.url_result('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
||||
'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?sx)
|
||||
@ -2306,7 +2349,9 @@ class GenericIE(InfoExtractor):
|
||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||
''', webpage)
|
||||
if mobj:
|
||||
return self.url_result('limelight:media:%s' % mobj.group('id'))
|
||||
return self.url_result(smuggle_url(
|
||||
'limelight:media:%s' % mobj.group('id'),
|
||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
@ -2401,6 +2446,24 @@ class GenericIE(InfoExtractor):
|
||||
if videa_urls:
|
||||
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
||||
|
||||
# Look for 20 minuten embeds
|
||||
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
||||
if twentymin_urls:
|
||||
return _playlist_from_matches(
|
||||
twentymin_urls, ie=TwentyMinutenIE.ie_key())
|
||||
|
||||
# Look for Openload embeds
|
||||
openload_urls = OpenloadIE._extract_urls(webpage)
|
||||
if openload_urls:
|
||||
return _playlist_from_matches(
|
||||
openload_urls, ie=OpenloadIE.ie_key())
|
||||
|
||||
# Look for VideoPress embeds
|
||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||
if videopress_urls:
|
||||
return _playlist_from_matches(
|
||||
videopress_urls, ie=VideoPressIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@ -2428,6 +2491,8 @@ class GenericIE(InfoExtractor):
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
if RtmpIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
@ -2535,6 +2600,15 @@ class GenericIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
if RtmpIE.suitable(video_url):
|
||||
entry_info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RtmpIE.ie_key(),
|
||||
'url': video_url,
|
||||
})
|
||||
entries.append(entry_info_dict)
|
||||
continue
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@ -13,15 +13,30 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class GoIE(InfoExtractor):
|
||||
_BRANDS = {
|
||||
'abc': '001',
|
||||
'freeform': '002',
|
||||
'watchdisneychannel': '004',
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
class GoIE(AdobePassIE):
|
||||
_SITE_INFO = {
|
||||
'abc': {
|
||||
'brand': '001',
|
||||
'requestor_id': 'ABC',
|
||||
},
|
||||
'freeform': {
|
||||
'brand': '002',
|
||||
'requestor_id': 'ABCFamily',
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'requestor_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'requestor_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
@ -43,8 +58,12 @@ class GoIE(InfoExtractor):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
|
||||
brand = self._BRANDS[sub_domain]
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
@ -60,14 +79,26 @@ class GoIE(InfoExtractor):
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
if video_type == 'lf':
|
||||
data = {
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info['requestor_id']
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
data.update({
|
||||
'token': auth,
|
||||
'token_type': 'ap',
|
||||
'adobe_requestor_id': requestor_id,
|
||||
})
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata({
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}))
|
||||
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
)
|
||||
|
||||
|
||||
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||
'info_dict': {
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 46,
|
||||
'duration': 45,
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
|
||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
resolution = fmt.split('/')[1]
|
||||
width, height = resolution.split('x')
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'url': lowercase_escape(fmt_url),
|
||||
'format_id': fmt_id,
|
||||
'resolution': resolution,
|
||||
'width': int_or_none(width),
|
||||
|
@ -2,50 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class HGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'aFH__I_5FBOX',
|
||||
'ext': 'mp4',
|
||||
'title': 'Overnight Success',
|
||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'timestamp': 1470320034,
|
||||
'upload_date': '20160804',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
||||
webpage, 'embed vars'), display_id, js_to_json)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'series': embed_vars.get('show'),
|
||||
'season_number': int_or_none(embed_vars.get('season')),
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
|
@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
|
||||
json_data = super(HotStarIE, self)._download_json(
|
||||
url_or_request, video_id, note, fatal=fatal, query=query)
|
||||
if json_data['resultCode'] != 'OK':
|
||||
if fatal:
|
||||
raise ExtractorError(json_data['errorDescription'])
|
||||
@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
self._GET_CONTENT_TEMPLATE % video_id,
|
||||
video_id)['contentInfo'][0]
|
||||
'http://account.hotstar.com/AVS/besc', video_id, query={
|
||||
'action': 'GetAggregatedContentDetails',
|
||||
'channel': 'PCTV',
|
||||
'contentId': video_id,
|
||||
})['contentInfo'][0]
|
||||
title = video_data['episodeTitle']
|
||||
|
||||
if video_data.get('encrypted') == 'Y':
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = []
|
||||
# PCTV for extracting f4m manifest
|
||||
for f in ('TABLET',):
|
||||
for f in ('JIO',):
|
||||
format_data = self._download_json(
|
||||
self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
|
||||
video_id, 'Downloading %s JSON metadata' % f, fatal=False)
|
||||
'http://getcdn.hotstar.com/AVS/besc',
|
||||
video_id, 'Downloading %s JSON metadata' % f,
|
||||
fatal=False, query={
|
||||
'action': 'GetCDN',
|
||||
'asJson': 'Y',
|
||||
'channel': f,
|
||||
'id': video_id,
|
||||
'type': 'VOD',
|
||||
})
|
||||
if format_data:
|
||||
format_url = format_data['src']
|
||||
format_url = format_data.get('src')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
continue
|
||||
@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['episodeTitle'],
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
||||
'formats': formats,
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('contentTitle'),
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import determine_ext
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
|
||||
'md5': '0e34642d4d9ef44bf86f66f6399672db',
|
||||
'info_dict': {
|
||||
'id': 'Simple-Made-Easy',
|
||||
'title': 'Simple Made Easy',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestaudio',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_rtmp_videos(self, webpage):
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
|
||||
@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE):
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
'format_id': 'rtmp',
|
||||
'format_id': 'rtmp_video',
|
||||
'url': video_url,
|
||||
'ext': determine_ext(playpath),
|
||||
'play_path': playpath,
|
||||
}]
|
||||
|
||||
def _extract_http_videos(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
|
||||
def _extract_cookies(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id)
|
||||
|
||||
def _extract_http_video(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
return [{
|
||||
'format_id': 'http',
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {
|
||||
'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id),
|
||||
'Cookie': self._extract_cookies(webpage)
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
# so probe URL to make sure
|
||||
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||
return []
|
||||
|
||||
return [{
|
||||
'format_id': 'http_audio',
|
||||
'url': http_audio_url,
|
||||
'vcodec': 'none',
|
||||
'http_headers': cookies_header,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE):
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage) +
|
||||
self._extract_http_video(webpage) +
|
||||
self._extract_http_audio(webpage, video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
|
||||
r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
|
||||
playerpage, 'player options', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if options:
|
||||
|
@ -173,11 +173,12 @@ class IqiyiIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||
'md5': '667171934041350c5de3f5015f7f1152',
|
||||
'md5': 'b7dc800a4004b1b57749d9abae0472da',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||
'ext': 'mp4',
|
||||
'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
|
||||
# This can be either Simplified Chinese or Traditional Chinese
|
||||
'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
|
196
youtube_dl/extractor/itv.py
Normal file
196
youtube_dl/extractor/itv.py
Normal file
@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'info_dict': {
|
||||
'id': '2a2936a0053',
|
||||
'ext': 'flv',
|
||||
'title': 'Home Movie',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ns_map = {
|
||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
||||
'tem': 'http://tempuri.org/',
|
||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
||||
}
|
||||
for ns, full_ns in ns_map.items():
|
||||
compat_etree_register_namespace(ns, full_ns)
|
||||
|
||||
def _add_ns(name):
|
||||
return xpath_with_ns(name, ns_map)
|
||||
|
||||
def _add_sub_element(element, name):
|
||||
return etree.SubElement(element, _add_ns(name))
|
||||
|
||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||
_add_sub_element(req_env, 'soapenv:Header')
|
||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||
request = _add_sub_element(get_playlist, 'tem:request')
|
||||
_add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
|
||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||
_add_sub_element(vodcrid, 'com:Id')
|
||||
_add_sub_element(request, 'itv:Partition')
|
||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
||||
_add_sub_element(user_info, 'itv:DM')
|
||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
||||
_add_sub_element(user_info, 'itv:SessionId')
|
||||
_add_sub_element(user_info, 'itv:SsoToken')
|
||||
_add_sub_element(user_info, 'itv:UserToken')
|
||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
||||
_add_sub_element(site_info, 'itv:Category')
|
||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
||||
})
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env))
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
||||
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
formats = []
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
if ios_playlist_url and hmac:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Apple',
|
||||
'model': 'iPad',
|
||||
'os': {
|
||||
'name': 'iPhone OS',
|
||||
'version': '9.3',
|
||||
'type': 'ios'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes'],
|
||||
'max': ['hls', 'aes']
|
||||
},
|
||||
'platformTag': 'mobile'
|
||||
}
|
||||
}).encode(), headers=headers, fatal=False)
|
||||
if ios_playlist:
|
||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
||||
ios_base_url = video_data.get('Base')
|
||||
for media_file in video_data.get('MediaFiles', []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if not caption_url.text:
|
||||
continue
|
||||
ext = determine_ext(caption_url.text, 'ttml')
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url.text,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
@ -3,14 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IwaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'ext': 'mp4',
|
||||
@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
||||
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'age_limit': 18,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||
|
||||
if not entries:
|
||||
if not video_data:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
|
||||
info_dict = entries[0]
|
||||
info_dict.update({
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'quality': 1 if format_id == 'Source' else 0,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -5,9 +5,27 @@ import re
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class JamendoIE(InfoExtractor):
|
||||
class JamendoBaseIE(InfoExtractor):
|
||||
def _extract_meta(self, webpage, fatal=True):
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
|
||||
if not title:
|
||||
title = self._html_search_meta(
|
||||
'name', webpage, 'title', fatal=fatal)
|
||||
mobj = re.search(r'(.+) - (.+)', title or '')
|
||||
artist, second = mobj.groups() if mobj else [None] * 2
|
||||
return title, artist, second
|
||||
|
||||
|
||||
class JamendoIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
@ -16,7 +34,10 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Stories from Emona I',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
}
|
||||
}
|
||||
@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
title, artist, track = self._extract_meta(webpage)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@ -46,37 +67,47 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'image', webpage, 'thumbnail', fatal=False)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
'title': 'Duck On Cover'
|
||||
'title': 'Shearer - Duck On Cover'
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
||||
'info_dict': {
|
||||
'id': '1032333',
|
||||
'ext': 'flac',
|
||||
'title': 'Warmachine'
|
||||
'title': 'Shearer - Warmachine',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Warmachine',
|
||||
}
|
||||
}, {
|
||||
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
||||
'info_dict': {
|
||||
'id': '1032330',
|
||||
'ext': 'flac',
|
||||
'title': 'Without Your Ghost'
|
||||
'title': 'Shearer - Without Your Ghost',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Without Your Ghost',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, mobj.group('display_id'))
|
||||
|
||||
title = self._html_search_meta('name', webpage, 'title')
|
||||
title, artist, album = self._extract_meta(webpage, fatal=False)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, m.group('path')),
|
||||
ie=JamendoIE.ie_key(),
|
||||
video_id=self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'),
|
||||
'track id', default=None))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)
|
||||
]
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, m.group('path')),
|
||||
'ie_key': JamendoIE.ie_key(),
|
||||
'id': self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'), 'track id', default=None),
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
} for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_result(entries, album_id, title)
|
||||
|
@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
index\.php/kwidget|
|
||||
index\.php/(?:kwidget|extwidget/preview)|
|
||||
# html5 player
|
||||
html5/html5lib/[^/]+/mwEmbedFrame\.php
|
||||
)
|
||||
@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
|
||||
partner_id = params['wid'][0][1:]
|
||||
elif 'p' in params:
|
||||
partner_id = params['p'][0]
|
||||
elif 'partner_id' in params:
|
||||
partner_id = params['partner_id'][0]
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
if 'entry_id' in params:
|
||||
@ -266,9 +276,12 @@ class KalturaIE(InfoExtractor):
|
||||
# skip for now.
|
||||
if f.get('fileExt') == 'chun':
|
||||
continue
|
||||
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
|
||||
if not f.get('fileExt'):
|
||||
# QT indicates QuickTime; some videos have broken fileExt
|
||||
f['fileExt'] = 'mov'
|
||||
if f.get('containerFormat') == 'qt':
|
||||
f['fileExt'] = 'mov'
|
||||
else:
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
@ -319,6 +332,6 @@ class KalturaIE(InfoExtractor):
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'view_count': info.get('plays'),
|
||||
}
|
||||
|
@ -2,29 +2,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
'duration': 398.76,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -42,12 +44,18 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates'))
|
||||
source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = source.get('url')
|
||||
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
@ -97,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
subtitles = {}
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -104,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
|
||||
'md5': '01fb3c92de4c12c573343d63e163d302',
|
||||
'md5': 'da120c8722d8632eec6ced937536cc98',
|
||||
'info_dict': {
|
||||
'id': 'lqm3kl',
|
||||
'ext': 'mp4',
|
||||
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 320,
|
||||
'duration': 309,
|
||||
'upload_date': '20160119',
|
||||
'timestamp': 1453194778,
|
||||
'uploader_id': '3pmkp',
|
||||
},
|
||||
}, {
|
||||
# standard iframe embed
|
||||
'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
|
||||
'info_dict': {
|
||||
'id': 'uzsxms',
|
||||
'ext': 'mp4',
|
||||
'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 325,
|
||||
'upload_date': '20161021',
|
||||
'timestamp': 1477044540,
|
||||
'uploader_id': '3pmkp',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# YouTube embeds
|
||||
'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor):
|
||||
|
||||
digiteka_url = self._proto_relative_url(self._search_regex(
|
||||
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
|
||||
webpage, 'digiteka url', group='url'))
|
||||
return self.url_result(digiteka_url, 'Digiteka')
|
||||
webpage, 'digiteka url', group='url', default=None))
|
||||
|
||||
if digiteka_url:
|
||||
return self.url_result(digiteka_url, 'Digiteka')
|
||||
|
||||
return self.url_result(url, 'Generic')
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@ -15,20 +16,23 @@ class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True):
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
||||
|
||||
def _call_api(self, organization_id, item_id, method):
|
||||
return self._download_json(
|
||||
self._API_URL % (organization_id, self._API_PATH, item_id, method),
|
||||
item_id, 'Downloading API %s JSON' % method)
|
||||
|
||||
def _extract(self, item_id, pc_method, mobile_method, meta_method):
|
||||
pc = self._call_playlist_service(item_id, pc_method)
|
||||
def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
|
||||
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||
metadata = self._call_api(pc['orgId'], item_id, meta_method)
|
||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
|
||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile, metadata
|
||||
|
||||
def _extract_info(self, streams, mobile_urls, properties):
|
||||
@ -59,14 +63,26 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
|
||||
urls.append(http_url)
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': format_id.replace('rtmp', 'http'),
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
http_format_id = format_id.replace('rtmp', 'http')
|
||||
|
||||
CDN_HOSTS = (
|
||||
('delvenetworks.com', 'cpl.delvenetworks.com'),
|
||||
('video.llnw.net', 's2.content.video.llnw.net'),
|
||||
)
|
||||
for cdn_host, http_host in CDN_HOSTS:
|
||||
if cdn_host not in rtmp.group('host').lower():
|
||||
continue
|
||||
http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
if self._is_valid_url(http_url, video_id, http_format_id):
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': http_format_id,
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
break
|
||||
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
@ -195,10 +211,13 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
_API_PATH = 'media'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
|
||||
pc, mobile, metadata = self._extract(
|
||||
video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
|
||||
video_id, 'getPlaylistByMediaId',
|
||||
'getMobilePlaylistByMediaId', 'properties',
|
||||
smuggled_data.get('source_url'))
|
||||
|
||||
return self._extract_info(
|
||||
pc['playlistItems'][0].get('streams', []),
|
||||
@ -235,11 +254,13 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
_API_PATH = 'channels'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
pc, mobile, medias = self._extract(
|
||||
channel_id, 'getPlaylistByChannelId',
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||
'media', smuggled_data.get('source_url'))
|
||||
|
||||
entries = [
|
||||
self._extract_info(
|
||||
|
@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||
triforce_feed = self._parse_json(self._search_regex(
|
||||
r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage,
|
||||
r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||
'triforce feed', default='{}'), video_id, fatal=False)
|
||||
|
||||
data_zone = self._search_regex(
|
||||
@ -304,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://www.mtv.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
@ -321,9 +321,41 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MTV81IE(InfoExtractor):
|
||||
IE_NAME = 'mtv81'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
|
||||
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
|
||||
'info_dict': {
|
||||
'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
|
||||
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
|
||||
'timestamp': 1468846800,
|
||||
'upload_date': '20160718',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(
|
||||
r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
|
||||
'mgid', group='id')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._extract_mgid(webpage)
|
||||
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
|
||||
|
||||
|
||||
class MTVVideoIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv:video'
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
|
@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
||||
'info_dict': {
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor):
|
||||
'timestamp': 1414108751,
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
||||
'info_dict': {
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'ext': 'm4a',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
'id': 'xqds0B_meys',
|
||||
'ext': 'webm',
|
||||
'title': 'Three Days Grace - Animal I Have Become',
|
||||
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
||||
'uploader': 'ThreeDaysGraceVEVO',
|
||||
'uploader_id': 'ThreeDaysGraceVEVO',
|
||||
'upload_date': '20091002',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
is_song = mobj.group('mediatype').startswith('music/song')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
|
||||
|
||||
def rtmp_format_from_stream_url(stream_url, width=None, height=None):
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
return {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
|
||||
formats = []
|
||||
vcodec = 'none' if is_song else None
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'm4a' if is_song else 'mp4',
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if stream_url and player_url:
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if http_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': http_stream_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
return formats
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
if is_song:
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
stream_url = search_data('stream-url')
|
||||
if not stream_url:
|
||||
formats = formats_from_stream_urls(
|
||||
search_data('stream-url'), search_data('hls-stream-url'),
|
||||
search_data('http-stream-url'))
|
||||
if not formats:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor):
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(search_data('duration')),
|
||||
'formats': [rtmp_format_from_stream_url(stream_url)]
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'),
|
||||
video_id)['video']
|
||||
formats = []
|
||||
hls_stream_url = video.get('hlsStreamUrl')
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
stream_url = video.get('streamUrl')
|
||||
if stream_url:
|
||||
formats.append(rtmp_format_from_stream_url(
|
||||
stream_url,
|
||||
int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height'))))
|
||||
formats = formats_from_stream_urls(
|
||||
video.get('streamUrl'), video.get('hlsStreamUrl'),
|
||||
video.get('mp4StreamUrl'), int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height')))
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -12,10 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'url': 'http://tv.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
@ -24,7 +24,7 @@ class NaverIE(InfoExtractor):
|
||||
'upload_date': '20130903',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/395837',
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
@ -34,6 +34,9 @@ class NaverIE(InfoExtractor):
|
||||
'upload_date': '20150519',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,23 +4,26 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
lowercase_escape,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NBCIE(InfoExtractor):
|
||||
class NBCIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
|
||||
'info_dict': {
|
||||
'id': '112966',
|
||||
'id': '2848237',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
|
||||
# HLS streams requires the 'hdnea3' cookie
|
||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||
'info_dict': {
|
||||
'id': 'n1806',
|
||||
'id': '101528f5a9e8127b107e98c5e6ce4638',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goliath',
|
||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||
@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return {
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||
'id': video_id,
|
||||
}
|
||||
video_data = None
|
||||
preload = self._search_regex(
|
||||
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
|
||||
if preload:
|
||||
preload_data = self._parse_json(preload, video_id)
|
||||
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
|
||||
entity_id = preload_data.get('xref', {}).get(path)
|
||||
video_data = preload_data.get('entities', {}).get(entity_id)
|
||||
if video_data:
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
video_id = video_data['guid']
|
||||
title = video_data['title']
|
||||
if video_data.get('entitlement') == 'auth':
|
||||
resource = self._get_mvpd_resource(
|
||||
'nbcentertainment', title, video_id,
|
||||
video_data.get('vChipRating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
||||
query), {'force_smil_url': True})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': theplatform_url,
|
||||
'description': video_data.get('description'),
|
||||
'keywords': video_data.get('keywords'),
|
||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('showName'),
|
||||
})
|
||||
else:
|
||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||
[
|
||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||
],
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
|
||||
return info
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
@ -2,7 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
@ -30,6 +38,12 @@ class NextMediaIE(InfoExtractor):
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
redirection_url = self._search_regex(
|
||||
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
|
||||
page, 'redirection URL', default=None, group='url')
|
||||
if redirection_url:
|
||||
return self.url_result(compat_urlparse.urljoin(url, redirection_url))
|
||||
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
@ -93,7 +107,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
||||
|
||||
class AppleDailyIE(NextMediaIE):
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
@ -157,6 +171,10 @@ class AppleDailyIE(NextMediaIE):
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
|
||||
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
@ -173,3 +191,48 @@ class AppleDailyIE(NextMediaIE):
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
|
||||
|
||||
class NextTVIE(InfoExtractor):
|
||||
IE_DESC = '壹電視'
|
||||
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
|
||||
'info_dict': {
|
||||
'id': '11779671',
|
||||
'ext': 'mp4',
|
||||
'title': '「超收稅」近4千億! 藍議員籲發消費券',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1484825400,
|
||||
'upload_date': '20170119',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
|
||||
|
||||
data = self._hidden_inputs(webpage)
|
||||
|
||||
video_url = data['ntt-vod-src-detailview']
|
||||
|
||||
date_str = get_element_by_class('date', webpage)
|
||||
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
|
||||
|
||||
view_count = int_or_none(remove_start(
|
||||
clean_html(get_element_by_class('click', webpage)), '點閱:'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': data.get('ntt-vod-img-src'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@ -7,7 +7,6 @@ import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
'duration': 33,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# File downloaded with and without credentials are different, so omit
|
||||
# the md5 field
|
||||
@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'timestamp': 1304065916,
|
||||
'duration': 209,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
# 'video exists but is marked as "deleted"
|
||||
# md5 is unstable
|
||||
@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor):
|
||||
'description': 'deleted',
|
||||
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
||||
'upload_date': '20071224',
|
||||
'timestamp': 1198527840, # timestamp field has different value if logged in
|
||||
'timestamp': int, # timestamp field has different value if logged in
|
||||
'duration': 304,
|
||||
},
|
||||
'skip': 'Requires an account',
|
||||
}, {
|
||||
'url': 'http://www.nicovideo.jp/watch/so22543406',
|
||||
'info_dict': {
|
||||
@ -79,13 +81,12 @@ class NiconicoIE(InfoExtractor):
|
||||
'upload_date': '20140104',
|
||||
'uploader': 'アニメロチャンネル',
|
||||
'uploader_id': '312',
|
||||
}
|
||||
},
|
||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# Determine whether the downloader used authentication to download video
|
||||
_AUTHENTICATED = False
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@ -109,8 +110,6 @@ class NiconicoIE(InfoExtractor):
|
||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
# Successful login
|
||||
self._AUTHENTICATED = True
|
||||
return True
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -128,35 +127,19 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note='Downloading video info page')
|
||||
|
||||
if self._AUTHENTICATED:
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
else:
|
||||
# Get external player info
|
||||
ext_player_info = self._download_webpage(
|
||||
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
|
||||
thumb_play_key = self._search_regex(
|
||||
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
|
||||
|
||||
# Get flv info
|
||||
flv_info_data = compat_urllib_parse_urlencode({
|
||||
'k': thumb_play_key,
|
||||
'v': video_id
|
||||
})
|
||||
flv_info_request = sanitized_Request(
|
||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
flv_info_webpage = self._download_webpage(
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif 'closed' in flv_info:
|
||||
raise ExtractorError('Niconico videos now require logging in',
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
|
||||
|
@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE):
|
||||
if metadata.get('tt888') == 'ja':
|
||||
subtitles['nl'] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||
'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
|
||||
}]
|
||||
|
||||
return {
|
||||
|
@ -128,6 +128,22 @@ class NRKBaseIE(InfoExtractor):
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
@ -140,11 +156,15 @@ class NRKBaseIE(InfoExtractor):
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@ -227,54 +247,102 @@ class NRKTVIE(NRKBaseIE):
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål - TV',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '43d0be26663d380603a9cf0c24366531',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'duration': 6947.52,
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
@ -360,6 +428,64 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||
|
@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
elif delivery_type == 'hds' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif delivery_type == 'hds' or ext == 'mpd':
|
||||
elif delivery_type == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
s_url, embed_code, mpd_id='dash', fatal=False))
|
||||
elif delivery_type == 'smooth':
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||
@ -93,7 +101,7 @@ class OpenloadIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
# Seems all videos have extensions in their titles
|
||||
'ext': determine_ext(title),
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
return info_dict
|
||||
|
@ -16,18 +16,33 @@ from ..utils import (
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
}
|
||||
},
|
||||
{
|
||||
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
|
||||
'url': 'https://player.piksel.com/v/v80kqp41',
|
||||
'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
|
||||
'info_dict': {
|
||||
'id': 'v80kqp41',
|
||||
'ext': 'mp4',
|
||||
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
|
||||
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
|
||||
'timestamp': 1486171129,
|
||||
'upload_date': '20170204',
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@ -40,8 +55,10 @@ class PikselIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_token = self._search_regex(
|
||||
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
qualities,
|
||||
srt_subtitles_timecode,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||
if BLOCKED in response:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % BLOCKED, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
||||
@ -157,13 +162,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
|
||||
display_id = '%s-%s' % (name, clip_id)
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
|
||||
payload_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='app.pluralsight.com', path='player/api/v1/payload'))
|
||||
|
||||
course = self._download_json(
|
||||
payload_url, display_id, headers={'Referer': url})['payload']['course']
|
||||
'https://app.pluralsight.com/player/user/api/v1/player/payload',
|
||||
display_id, data=urlencode_postdata({'courseId': course_name}),
|
||||
headers={'Referer': url})
|
||||
|
||||
collection = course['modules']
|
||||
|
||||
@ -330,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course JSON')
|
||||
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
||||
'Downloading course JSON',
|
||||
data=json.dumps({
|
||||
'fn': 'bootstrapPlayer',
|
||||
'payload': {
|
||||
'courseId': course_id,
|
||||
}
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8'
|
||||
})['payload']['course']
|
||||
|
||||
title = course['title']
|
||||
course_name = course['name']
|
||||
course_data = course['modules']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for num, module in enumerate(course_data, 1):
|
||||
author = module.get('author')
|
||||
module_name = module.get('name')
|
||||
if not author or not module_name:
|
||||
continue
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
clip_index = int_or_none(clip.get('index'))
|
||||
if clip_index is None:
|
||||
continue
|
||||
clip_url = update_url_query(
|
||||
'%s/player' % self._API_BASE, query={
|
||||
'mode': 'live',
|
||||
'course': course_name,
|
||||
'author': author,
|
||||
'name': module_name,
|
||||
'clip': clip_index,
|
||||
})
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'url': clip_url,
|
||||
'ie_key': PluralsightIE.ie_key(),
|
||||
'chapter': module.get('title'),
|
||||
'chapter_number': num,
|
||||
|
92
youtube_dl/extractor/pornflip.py
Normal file
92
youtube_dl/extractor/pornflip.py
Normal file
@ -0,0 +1,92 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||
'info_dict': {
|
||||
'id': 'wz7DfNhMmep',
|
||||
'ext': 'mp4',
|
||||
'title': '2 Amateurs swallow make his dream cumshots true',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 112,
|
||||
'timestamp': 1481655502,
|
||||
'upload_date': '20161213',
|
||||
'uploader_id': '106786',
|
||||
'uploader': 'figifoto',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.pornflip.com/v/%s' % video_id, video_id)
|
||||
|
||||
flashvars = compat_parse_qs(self._search_regex(
|
||||
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
|
||||
webpage, 'flashvars', group='flashvars'))
|
||||
|
||||
title = flashvars['video_vars[title]'][0]
|
||||
|
||||
def flashvar(kind):
|
||||
return try_get(
|
||||
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
|
||||
|
||||
formats = []
|
||||
for key, value in flashvars.items():
|
||||
if not (value and isinstance(value, list)):
|
||||
continue
|
||||
format_url = value[0]
|
||||
if key == 'video_vars[hds_manifest]':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-%s' % height,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
|
||||
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
|
||||
webpage, 'uploader', fatal=False, group='uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': flashvar('big_thumb'),
|
||||
'duration': int_or_none(flashvar('duration')),
|
||||
'timestamp': unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp')),
|
||||
'uploader_id': flashvar('author_id'),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(flashvar('views')),
|
||||
'age_limit': 18,
|
||||
}
|
@ -156,7 +156,18 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
||||
video_variables = {}
|
||||
for video_variablename, quote, video_variable in re.findall(
|
||||
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
|
||||
video_variables[video_variablename] = video_variable
|
||||
|
||||
video_urls = []
|
||||
for encoded_video_url in re.findall(
|
||||
r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
|
||||
for varname, varval in video_variables.items():
|
||||
encoded_video_url = encoded_video_url.replace(varname, varval)
|
||||
video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
|
||||
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
|
@ -147,16 +147,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 18 - Staffel 2',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
'duration': 5845.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
|
||||
@ -258,7 +254,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
'info_dict': {
|
||||
'id': '2572814',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreas Kümmert: Rocket Man',
|
||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||
'upload_date': '20131017',
|
||||
@ -272,7 +268,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
||||
'info_dict': {
|
||||
'id': '2156342',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
|
||||
'duration': 307.24,
|
||||
@ -289,12 +285,13 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'description': 'md5:63b8963e71f481782aeea877658dec84',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '4187506',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Circus HalliGalli',
|
||||
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
|
||||
'upload_date': '20151229',
|
||||
@ -375,7 +372,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||
info = self._extract_video_info(url, clip_id)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
@ -54,9 +54,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
device_types = ['ipad']
|
||||
if app_code != 'toutv':
|
||||
device_types.append('flash')
|
||||
if not smuggled_data:
|
||||
device_types.append('flash')
|
||||
device_types.append('android')
|
||||
|
||||
formats = []
|
||||
@ -103,7 +102,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
continue
|
||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
||||
protocol = determine_protocol({'url': f_url})
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': '%s-%d' % (protocol, tbr),
|
||||
'url': f_url,
|
||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
||||
@ -111,7 +110,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
}
|
||||
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'url': mobj.group('url') + mobj.group('auth'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
})
|
||||
formats.append(f)
|
||||
if protocol == 'rtsp':
|
||||
base_url = self._search_regex(
|
||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
||||
|
@ -81,6 +81,9 @@ class RuutuIE(InfoExtractor):
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
proto = compat_urllib_parse_urlparse(video_url).scheme
|
||||
if not child.tag.startswith('HTTP') and proto != 'rtmp':
|
||||
|
60
youtube_dl/extractor/scrippsnetworks.py
Normal file
60
youtube_dl/extractor/scrippsnetworks.py
Normal file
@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class ScrippsNetworksWatchIE(AdobePassIE):
|
||||
IE_NAME = 'scrippsnetworks:watch'
|
||||
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
|
||||
'md5': '26545fd676d939954c6808274bdb905a',
|
||||
'info_dict': {
|
||||
'id': '0256538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seeking a Wow House',
|
||||
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
|
||||
'uploader': 'SCNI',
|
||||
'upload_date': '20170207',
|
||||
'timestamp': 1486450493,
|
||||
},
|
||||
'skip': 'requires TV provider authentication',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
channel = self._parse_json(self._search_regex(
|
||||
r'"channels"\s*:\s*(\[.+\])',
|
||||
webpage, 'channels'), video_id)[0]
|
||||
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
|
||||
title = video_data['title']
|
||||
release_url = video_data['releaseUrl']
|
||||
if video_data.get('restricted'):
|
||||
requestor_id = self._search_regex(
|
||||
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id,
|
||||
video_data.get('ratings', [{}])[0].get('rating'))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
release_url = update_url_query(release_url, {'auth': auth})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnailUrl'),
|
||||
'series': video_data.get('showTitle'),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@ -1,64 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
qualities,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
|
||||
'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
|
||||
'md5': '42310bffe4ba3982db112b9cd3467328',
|
||||
'info_dict': {
|
||||
'id': '11495320',
|
||||
'id': '11638450',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jamel et ses amis au Marrakech du rire 2015',
|
||||
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
|
||||
'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
|
||||
'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
|
||||
'duration': 39,
|
||||
'series': 'Le meilleur pâtissier',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
clip_data = self._download_json(
|
||||
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
|
||||
video_id)
|
||||
video_data = clip_data['videoInfo']
|
||||
|
||||
data = self._download_json(
|
||||
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
|
||||
video_id, query={
|
||||
'csa': 5,
|
||||
'with': 'clips',
|
||||
})
|
||||
|
||||
clip_data = data['clips'][0]
|
||||
title = clip_data['title']
|
||||
|
||||
urls = []
|
||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||
formats = []
|
||||
for source in clip_data['sources']:
|
||||
source_type, source_url = source.get('type'), source.get('src')
|
||||
if not source_url or source_type == 'hls/primetime':
|
||||
for asset in clip_data['assets']:
|
||||
asset_url = asset.get('full_physical_path')
|
||||
protocol = asset.get('protocol')
|
||||
if not asset_url or protocol == 'primetime' or asset_url in urls:
|
||||
continue
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'mp4':
|
||||
quality = source.get('quality')
|
||||
urls.append(asset_url)
|
||||
container = asset.get('video_container')
|
||||
ext = determine_ext(asset_url)
|
||||
if container == 'm3u8' or ext == 'm3u8':
|
||||
if protocol == 'usp':
|
||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
asset_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
asset_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif container == 'mp4' or ext == 'mp4':
|
||||
quality = asset.get('video_quality')
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'url': asset_url,
|
||||
'format_id': quality,
|
||||
'quality': quality_key(quality),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
def get(getter):
|
||||
for src in (data, clip_data):
|
||||
v = try_get(src, getter, compat_str)
|
||||
if v:
|
||||
return v
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'].strip(),
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'series': video_data.get('titlePgm'),
|
||||
'title': title,
|
||||
'description': get(lambda x: x['description']),
|
||||
'duration': int_or_none(clip_data.get('duration')),
|
||||
'series': get(lambda x: x['program']['title']),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -173,46 +173,54 @@ class SoundcloudIE(InfoExtractor):
|
||||
})
|
||||
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
format_dict = self._download_json(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||
track_id, 'Downloading track url', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'secret_token': secret_token,
|
||||
})
|
||||
|
||||
for key, stream_url in format_dict.items():
|
||||
abr = int_or_none(self._search_regex(
|
||||
r'_(\d+)_url', key, 'audio bitrate', default=None))
|
||||
if key.startswith('http'):
|
||||
formats.append({
|
||||
stream_formats = [{
|
||||
'format_id': key,
|
||||
'ext': ext,
|
||||
'url': stream_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
}]
|
||||
elif key.startswith('rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
formats.append({
|
||||
stream_formats = [{
|
||||
'format_id': key,
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
'ext': 'flv',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
}]
|
||||
elif key.startswith('hls'):
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
|
||||
m3u8_id=key, fatal=False)
|
||||
else:
|
||||
continue
|
||||
|
||||
if not formats:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
formats.append({
|
||||
'format_id': 'fallback',
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
for f in stream_formats:
|
||||
f['abr'] = abr
|
||||
|
||||
for f in formats:
|
||||
if f['format_id'].startswith('http'):
|
||||
f['protocol'] = 'http'
|
||||
if f['format_id'].startswith('rtmp'):
|
||||
f['protocol'] = 'rtmp'
|
||||
formats.extend(stream_formats)
|
||||
|
||||
if not formats:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
formats.append({
|
||||
'format_id': 'fallback',
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
for f in formats:
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
self._check_formats(formats, track_id)
|
||||
self._sort_formats(formats)
|
||||
|
@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
class SouthParkIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'southpark.cc.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
@ -75,7 +75,7 @@ class SouthParkDeIE(SouthParkIE):
|
||||
|
||||
class SouthParkNlIE(SouthParkIE):
|
||||
IE_NAME = 'southpark.nl'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -4,65 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SportBoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S',
|
||||
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
||||
'info_dict': {
|
||||
'id': '80822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
|
||||
'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140928',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
|
||||
webpage, 'title')
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'dateCreated', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, '/%s' % player),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class SportBoxEmbedIE(InfoExtractor):
|
||||
|
52
youtube_dl/extractor/sprout.py
Normal file
52
youtube_dl/extractor/sprout.py
Normal file
@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
update_url_query,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||
'info_dict': {
|
||||
'id': '9dexnwtmh8_X',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Cowboy Adventure',
|
||||
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||
'timestamp': 1437758640,
|
||||
'upload_date': '20150724',
|
||||
'uploader': 'NBCU-SPROUT-NEW',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_component = self._search_regex(
|
||||
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||
webpage, 'video component', default=None)
|
||||
if video_component:
|
||||
options = self._parse_json(extract_attributes(
|
||||
video_component)['data-options'], video_id)
|
||||
theplatform_url = options['video']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if options.get('protected'):
|
||||
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
theplatform_url, query), {'force_smil_url': True})
|
||||
else:
|
||||
iframe = self._search_regex(
|
||||
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||
webpage, 'iframe')
|
||||
theplatform_url = extract_attributes(iframe)['src']
|
||||
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
@ -48,9 +48,6 @@ class SRGSSRIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if bu == 'rts':
|
||||
return self.url_result('rts:%s' % media_id, 'RTS')
|
||||
|
||||
media_data = self.get_media_data(bu, media_type, media_id)
|
||||
|
||||
metadata = media_data['AssetMetadatas']['AssetMetadata'][0]
|
||||
|
@ -306,9 +306,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@ -333,7 +334,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
if asset_type in asset_types_query:
|
||||
query.update(asset_types_query[asset_type])
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
||||
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||
main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
|
||||
|
@ -100,9 +100,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
m3u8_id=format_id or 'hls', fatal=False))
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
if '/secure/' in video_url and '?hdnea=' in video_url:
|
||||
for f in m3u8_formats:
|
||||
f['_seekable'] = False
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(video_url, {'hdcore': '3.7.0'}),
|
||||
|
54
youtube_dl/extractor/tva.py
Normal file
54
youtube_dl/extractor/tva.py
Normal file
@ -0,0 +1,54 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class TVAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://videos.tva.ca/episode/85538',
|
||||
'info_dict': {
|
||||
'id': '85538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode du 25 janvier 2017',
|
||||
'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
|
||||
'upload_date': '20170126',
|
||||
'timestamp': 1485442329,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
"https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
|
||||
video_id, query={
|
||||
'$expand': 'Metadata,CustomId',
|
||||
'$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
|
||||
'$format': 'json',
|
||||
})
|
||||
metadata = video_data.get('Metadata', {})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_data['Title'],
|
||||
'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
|
||||
'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
|
||||
'series': video_data.get('ShowName'),
|
||||
'episode': metadata.get('EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('EpisodeNumber')),
|
||||
'categories': video_data.get('Categories'),
|
||||
'average_rating': video_data.get('AverageUserRating'),
|
||||
'timestamp': parse_iso8601(video_data.get('CreatedDate')),
|
||||
'ie_key': 'Ooyala',
|
||||
}
|
75
youtube_dl/extractor/tvplayer.py
Normal file
75
youtube_dl/extractor/tvplayer.py
Normal file
@ -0,0 +1,75 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tvplayer.com/watch/bbcone',
|
||||
'info_dict': {
|
||||
'id': '89',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
current_channel = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
|
||||
webpage, 'channel element'))
|
||||
title = current_channel['data-name']
|
||||
|
||||
resource_id = self._search_regex(
|
||||
r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
|
||||
platform = self._search_regex(
|
||||
r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
|
||||
token = self._search_regex(
|
||||
r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
|
||||
validate = self._search_regex(
|
||||
r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'http://api.tvplayer.com/api/v2/stream/live',
|
||||
resource_id, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}, data=urlencode_postdata({
|
||||
'service': 1,
|
||||
'platform': platform,
|
||||
'id': resource_id,
|
||||
'token': token,
|
||||
'validate': validate,
|
||||
}))['tvplayer']['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode(), resource_id)['tvplayer']['response']
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': resource_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.24video.net/video/view/%s' % video_id, video_id)
|
||||
'http://www.24video.sex/video/view/%s' % video_id, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
|
||||
# Sets some cookies
|
||||
self._download_xml(
|
||||
r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
|
||||
r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
|
||||
video_id, 'Downloading init XML')
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
|
||||
'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
|
||||
video_id, 'Downloading video XML')
|
||||
|
||||
video = xpath_element(video_xml, './/video', 'video', fatal=True)
|
||||
|
@ -4,91 +4,88 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TwentyMinutenIE(InfoExtractor):
|
||||
IE_NAME = '20min'
|
||||
_VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?20min\.ch/
|
||||
(?:
|
||||
videotv/*\?.*?\bvid=|
|
||||
videoplayer/videoplayer\.html\?.*?\bvideoId@
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# regular video
|
||||
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
|
||||
'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
|
||||
'md5': 'e7264320db31eed8c38364150c12496e',
|
||||
'info_dict': {
|
||||
'id': '469148',
|
||||
'ext': 'flv',
|
||||
'title': '85 000 Franken für 15 perfekte Minuten',
|
||||
'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
|
||||
'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
|
||||
}
|
||||
}, {
|
||||
# news article with video
|
||||
'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
||||
'md5': 'cd4cbb99b94130cff423e967cd275e5e',
|
||||
'info_dict': {
|
||||
'id': '469408',
|
||||
'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
||||
'ext': 'flv',
|
||||
'title': '«Wir müssen mutig nach vorne schauen»',
|
||||
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
|
||||
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
|
||||
},
|
||||
'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
|
||||
'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
|
||||
'info_dict': {
|
||||
'id': 'ivM7A7SpDOs',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
|
||||
'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
|
||||
'upload_date': '20160424',
|
||||
'uploader': 'RTVCM Castilla-La Mancha',
|
||||
'uploader_id': 'RTVCM',
|
||||
'title': '85 000 Franken für 15 perfekte Minuten',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
|
||||
'info_dict': {
|
||||
'id': '523629',
|
||||
'ext': 'mp4',
|
||||
'title': 'So kommen Sie bei Eis und Schnee sicher an',
|
||||
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [m.group('url') for m in re.finditer(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video = self._download_json(
|
||||
'http://api.20min.ch/video/%s/show' % video_id,
|
||||
video_id)['content']
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube embed URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
title = video['title']
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>.*?<span>(.+?)</span></h1>',
|
||||
webpage, 'title', default=None)
|
||||
if not title:
|
||||
title = remove_end(re.sub(
|
||||
r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
|
||||
'quality': quality,
|
||||
} for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
|
||||
description = video.get('lead')
|
||||
thumbnail = video.get('thumbnail')
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
def extract_count(kind):
|
||||
return try_get(
|
||||
video,
|
||||
lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
|
||||
|
||||
like_count = extract_count('up')
|
||||
dislike_count = extract_count('down')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -209,7 +209,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?twitch\.tv/[^/]+/v/|
|
||||
(?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@ -259,6 +259,9 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
}, {
|
||||
'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/videos/6528877',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -444,7 +447,14 @@ class TwitchHighlightsIE(TwitchVideosBaseIE):
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?twitch\.tv/|
|
||||
player\.twitch\.tv/\?.*?\bchannel=
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/shroomztv',
|
||||
@ -468,8 +478,25 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/miracle_doto#profile-0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if any(ie.suitable(url) for ie in (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchAllVideosIE,
|
||||
TwitchUploadsIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchHighlightsIE))
|
||||
else super(TwitchStreamIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
|
@ -84,12 +84,27 @@ class UOLIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not video_id.isdigit():
|
||||
embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
|
||||
video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id')
|
||||
media_id = None
|
||||
|
||||
if video_id.isdigit():
|
||||
media_id = video_id
|
||||
|
||||
if not media_id:
|
||||
embed_page = self._download_webpage(
|
||||
'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
|
||||
video_id, 'Downloading embed page', fatal=False)
|
||||
if embed_page:
|
||||
media_id = self._search_regex(
|
||||
(r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
|
||||
embed_page, 'media id', default=None)
|
||||
|
||||
if not media_id:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id,
|
||||
video_id)['item']
|
||||
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
|
||||
media_id)['item']
|
||||
title = video_data['title']
|
||||
|
||||
query = {
|
||||
@ -118,7 +133,7 @@ class UOLIE(InfoExtractor):
|
||||
tags.append(tag_description)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': clean_html(video_data.get('desMedia')),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
|
@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
|
||||
def num_to_hex(n):
|
||||
return hex(n)[2:]
|
||||
|
@ -4,9 +4,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -140,21 +140,6 @@ class VevoIE(VevoBaseIE):
|
||||
'url': 'http://www.vevo.com/watch/INS171400764',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
|
||||
_SOURCE_TYPES = {
|
||||
0: 'youtube',
|
||||
1: 'brightcove',
|
||||
2: 'http',
|
||||
3: 'hls_ios',
|
||||
4: 'hls',
|
||||
5: 'smil', # http
|
||||
7: 'f4m_cc',
|
||||
8: 'f4m_ak',
|
||||
9: 'f4m_l3',
|
||||
10: 'ism',
|
||||
13: 'smil', # rtmp
|
||||
18: 'dash',
|
||||
}
|
||||
_VERSIONS = {
|
||||
0: 'youtube', # only in AuthenticateVideo videoVersions
|
||||
1: 'level3',
|
||||
@ -163,41 +148,6 @@ class VevoIE(VevoBaseIE):
|
||||
4: 'amazon',
|
||||
}
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
formats = []
|
||||
els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
m = re.match(r'''(?xi)
|
||||
(?P<ext>[a-z0-9]+):
|
||||
(?P<path>
|
||||
[/a-z0-9]+ # The directory and main part of the URL
|
||||
_(?P<tbr>[0-9]+)k
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.[a-z0-9]+ # File extension
|
||||
)''', src)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'smil_' + m.group('tbr'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'tbr': int(m.group('tbr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _initialize_api(self, video_id):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
@ -206,7 +156,7 @@ class VevoIE(VevoBaseIE):
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token')
|
||||
|
||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||
if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
|
||||
self.raise_geo_restricted(
|
||||
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
|
||||
|
||||
@ -214,148 +164,91 @@ class VevoIE(VevoBaseIE):
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(self._api_url_template % path, *args, **kwargs)
|
||||
try:
|
||||
data = self._download_json(self._api_url_template % path, *args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errors = self._parse_json(e.cause.read().decode(), None)['errors']
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
raise
|
||||
return data
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
response = self._download_json(
|
||||
json_url, video_id, 'Downloading video info',
|
||||
'Unable to download info', fatal=False) or {}
|
||||
video_info = response.get('video') or {}
|
||||
self._initialize_api(video_id)
|
||||
|
||||
video_info = self._call_api(
|
||||
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||
'Failed to download video info')
|
||||
|
||||
video_versions = self._call_api(
|
||||
'video/%s/streams' % video_id, video_id,
|
||||
'Downloading video versions info',
|
||||
'Failed to download video versions info',
|
||||
fatal=False)
|
||||
|
||||
# Some videos are only available via webpage (e.g.
|
||||
# https://github.com/rg3/youtube-dl/issues/9366)
|
||||
if not video_versions:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
|
||||
|
||||
uploader = None
|
||||
artist = None
|
||||
featured_artist = None
|
||||
uploader = None
|
||||
view_count = None
|
||||
artists = video_info.get('artists')
|
||||
for curr_artist in artists:
|
||||
if curr_artist.get('role') == 'Featured':
|
||||
featured_artist = curr_artist['name']
|
||||
else:
|
||||
artist = uploader = curr_artist['name']
|
||||
|
||||
formats = []
|
||||
for video_version in video_versions:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
version_url = video_version.get('url')
|
||||
if not version_url:
|
||||
continue
|
||||
|
||||
if not video_info:
|
||||
try:
|
||||
self._initialize_api(video_id)
|
||||
except ExtractorError:
|
||||
ytid = response.get('errorInfo', {}).get('ytid')
|
||||
if ytid:
|
||||
self.report_warning(
|
||||
'Video is geoblocked, trying with the YouTube video %s' % ytid)
|
||||
return self.url_result(ytid, 'Youtube', ytid)
|
||||
|
||||
raise
|
||||
|
||||
video_info = self._call_api(
|
||||
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||
'Failed to download video info')
|
||||
|
||||
video_versions = self._call_api(
|
||||
'video/%s/streams' % video_id, video_id,
|
||||
'Downloading video versions info',
|
||||
'Failed to download video versions info',
|
||||
fatal=False)
|
||||
|
||||
# Some videos are only available via webpage (e.g.
|
||||
# https://github.com/rg3/youtube-dl/issues/9366)
|
||||
if not video_versions:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
|
||||
|
||||
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||
artists = video_info.get('artists')
|
||||
for curr_artist in artists:
|
||||
if curr_artist.get('role') == 'Featured':
|
||||
featured_artist = curr_artist['name']
|
||||
else:
|
||||
artist = uploader = curr_artist['name']
|
||||
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||
|
||||
for video_version in video_versions:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
version_url = video_version.get('url')
|
||||
if not version_url:
|
||||
if '.ism' in version_url:
|
||||
continue
|
||||
elif '.mpd' in version_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
version_url, video_id, mpd_id='dash-%s' % version,
|
||||
note='Downloading %s MPD information' % version,
|
||||
errnote='Failed to download %s MPD information' % version,
|
||||
fatal=False))
|
||||
elif '.m3u8' in version_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
version_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
else:
|
||||
m = re.search(r'''(?xi)
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.(?P<ext>[a-z0-9]+)''', version_url)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
if '.ism' in version_url:
|
||||
continue
|
||||
elif '.mpd' in version_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
version_url, video_id, mpd_id='dash-%s' % version,
|
||||
note='Downloading %s MPD information' % version,
|
||||
errnote='Failed to download %s MPD information' % version,
|
||||
fatal=False))
|
||||
elif '.m3u8' in version_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
version_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
else:
|
||||
m = re.search(r'''(?xi)
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.(?P<ext>[a-z0-9]+)''', version_url)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': version_url,
|
||||
'format_id': 'http-%s-%s' % (version, video_version['quality']),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
else:
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'/Date\((\d+)\)/',
|
||||
video_info['releaseDate'], 'release date', fatal=False),
|
||||
scale=1000)
|
||||
artists = video_info.get('mainArtists')
|
||||
if artists:
|
||||
artist = uploader = artists[0]['artistName']
|
||||
|
||||
featured_artists = video_info.get('featuredArtists')
|
||||
if featured_artists:
|
||||
featured_artist = featured_artists[0]['artistName']
|
||||
|
||||
smil_parsed = False
|
||||
for video_version in video_info['videoVersions']:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
if version == 'youtube':
|
||||
continue
|
||||
else:
|
||||
source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
|
||||
renditions = compat_etree_fromstring(video_version['data'])
|
||||
if source_type == 'http':
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': 'http-%s-%s' % (version, attr['name']),
|
||||
'height': int_or_none(attr.get('frameheight')),
|
||||
'width': int_or_none(attr.get('frameWidth')),
|
||||
'tbr': int_or_none(attr.get('totalBitrate')),
|
||||
'vbr': int_or_none(attr.get('videoBitrate')),
|
||||
'abr': int_or_none(attr.get('audioBitrate')),
|
||||
'vcodec': attr.get('videoCodec'),
|
||||
'acodec': attr.get('audioCodec'),
|
||||
})
|
||||
elif source_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
elif source_type == 'smil' and version == 'level3' and not smil_parsed:
|
||||
formats.extend(self._extract_smil_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id, False))
|
||||
smil_parsed = True
|
||||
formats.append({
|
||||
'url': version_url,
|
||||
'format_id': 'http-%s-%s' % (version, video_version['quality']),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
track = video_info['title']
|
||||
@ -376,17 +269,15 @@ class VevoIE(VevoBaseIE):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
duration = video_info.get('duration')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
|
||||
'timestamp': timestamp,
|
||||
'timestamp': parse_iso8601(video_info.get('releaseDate')),
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'view_count': int_or_none(video_info.get('views', {}).get('total')),
|
||||
'age_limit': age_limit,
|
||||
'track': track,
|
||||
'artist': uploader,
|
||||
|
99
youtube_dl/extractor/videopress.py
Normal file
99
youtube_dl/extractor/videopress.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VideoPressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||
'md5': '706956a6c875873d51010921310e4bc6',
|
||||
'info_dict': {
|
||||
'id': 'kUJmAcSf',
|
||||
'ext': 'mp4',
|
||||
'title': 'VideoPress Demo',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 634.6,
|
||||
'timestamp': 1434983935,
|
||||
'upload_date': '20150622',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# 17+, requires birth_* params
|
||||
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query={
|
||||
'birth_month': random.randint(1, 12),
|
||||
'birth_day': random.randint(1, 31),
|
||||
'birth_year': random.randint(1950, 1995),
|
||||
})
|
||||
|
||||
title = video['title']
|
||||
|
||||
def base_url(scheme):
|
||||
return try_get(
|
||||
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||
|
||||
base_url = base_url('https') or base_url('http')
|
||||
|
||||
QUALITIES = ('std', 'dvd', 'hd')
|
||||
quality = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, f in video['files'].items():
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
for ext, path in f.items():
|
||||
if ext in ('mp4', 'ogg'):
|
||||
formats.append({
|
||||
'url': urljoin(base_url, path),
|
||||
'format_id': '%s-%s' % (format_id, ext),
|
||||
'ext': determine_ext(path, ext),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||
if original_url:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'original',
|
||||
'quality': len(QUALITIES),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('poster'),
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'timestamp': unified_timestamp(video.get('upload_date')),
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'formats': formats,
|
||||
}
|
@ -21,12 +21,12 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
unified_strdate,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
unescapeHTML,
|
||||
parse_filesize,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@ -142,10 +142,19 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
note='Downloading %s m3u8 information' % cdn_name,
|
||||
fatal=False))
|
||||
elif files_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id,
|
||||
'Downloading %s MPD information' % cdn_name,
|
||||
fatal=False))
|
||||
mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
|
||||
mpd_manifest_urls = []
|
||||
if re.search(mpd_pattern, manifest_url):
|
||||
for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
|
||||
mpd_manifest_urls.append((format_id + suffix, re.sub(
|
||||
mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
|
||||
else:
|
||||
mpd_manifest_urls = [(format_id, manifest_url)]
|
||||
for f_id, m_url in mpd_manifest_urls:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
||||
'Downloading %s MPD information' % cdn_name,
|
||||
fatal=False))
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
@ -204,11 +213,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
'description': 'md5:2d3305bad981a06ff79f027f19865021',
|
||||
'timestamp': 1355990239,
|
||||
'upload_date': '20121220',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
|
||||
'uploader_id': 'user7108434',
|
||||
'uploader': 'Filippo Valsorda',
|
||||
'duration': 10,
|
||||
'license': 'by-sa',
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -249,6 +260,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'upload_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
@ -271,7 +283,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
|
||||
'uploader_id': 'atencio',
|
||||
'uploader': 'Peter Atencio',
|
||||
'upload_date': '20130927',
|
||||
'timestamp': 1380339469,
|
||||
'upload_date': '20130928',
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
@ -283,6 +296,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||
'timestamp': 1381846109,
|
||||
'upload_date': '20131015',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
|
||||
'uploader_id': 'staff',
|
||||
@ -314,6 +328,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'The DMCI',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
|
||||
'uploader_id': 'dmci',
|
||||
'timestamp': 1324343742,
|
||||
'upload_date': '20111220',
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
},
|
||||
@ -329,6 +344,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Casey Donahue',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
||||
'uploader_id': 'caseydonahue',
|
||||
'timestamp': 1250886430,
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
},
|
||||
@ -338,7 +354,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
},
|
||||
{
|
||||
# redirects to ondemand extractor and should be passed throught it
|
||||
# redirects to ondemand extractor and should be passed through it
|
||||
# for successful extraction
|
||||
'url': 'https://vimeo.com/73445910',
|
||||
'info_dict': {
|
||||
@ -477,6 +493,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
||||
expected=True)
|
||||
|
||||
cc_license = None
|
||||
timestamp = None
|
||||
|
||||
# Extract the config JSON
|
||||
try:
|
||||
try:
|
||||
@ -490,8 +509,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
vimeo_clip_page_config = self._search_regex(
|
||||
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
|
||||
'vimeo clip page config')
|
||||
config_url = self._parse_json(
|
||||
vimeo_clip_page_config, video_id)['player']['config_url']
|
||||
page_config = self._parse_json(vimeo_clip_page_config, video_id)
|
||||
config_url = page_config['player']['config_url']
|
||||
cc_license = page_config.get('cc_license')
|
||||
timestamp = try_get(
|
||||
page_config, lambda x: x['clip']['uploaded_on'],
|
||||
compat_str)
|
||||
config_json = self._download_webpage(config_url, video_id)
|
||||
config = json.loads(config_json)
|
||||
except RegexNotFoundError:
|
||||
@ -560,10 +583,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
self._downloader.report_warning('Cannot find video description')
|
||||
|
||||
# Extract upload date
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = unified_strdate(mobj.group(1))
|
||||
if not timestamp:
|
||||
timestamp = self._search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage,
|
||||
'timestamp', default=None)
|
||||
|
||||
try:
|
||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
|
||||
@ -600,15 +623,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
formats.extend(info_dict['formats'])
|
||||
self._vimeo_sort_formats(formats)
|
||||
|
||||
if not cc_license:
|
||||
cc_license = self._search_regex(
|
||||
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
|
||||
webpage, 'license', default=None, group='license')
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'upload_date': video_upload_date,
|
||||
'timestamp': unified_timestamp(timestamp),
|
||||
'description': video_description,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'license': cc_license,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
@ -730,12 +760,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
# Try extracting href first since not all videos are available via
|
||||
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
|
||||
clips = re.findall(
|
||||
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
|
||||
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
|
||||
if clips:
|
||||
for video_id, video_url in clips:
|
||||
for video_id, video_url, video_title in clips:
|
||||
yield self.url_result(
|
||||
compat_urlparse.urljoin(base_url, video_url),
|
||||
VimeoIE.ie_key(), video_id=video_id)
|
||||
VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
|
||||
# More relaxed fallback
|
||||
else:
|
||||
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
|
||||
@ -884,10 +914,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
|
||||
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
|
||||
default=NO_DEFAULT if video_password_verified else '{}'), video_id)
|
||||
config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
|
||||
config_url = self._html_search_regex(
|
||||
r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'config URL', default=None, group='url')
|
||||
if not config_url:
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
|
||||
default=NO_DEFAULT if video_password_verified else '{}'), video_id)
|
||||
config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
|
||||
if config_url is None:
|
||||
self._verify_video_password(webpage_url, video_id, webpage)
|
||||
config_url = self._get_config_url(
|
||||
|
@ -6,8 +6,9 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@ -20,50 +21,16 @@ class VineIE(InfoExtractor):
|
||||
'id': 'b9KOOWX7HUx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken.',
|
||||
'alt_title': 'Vine by Jack Dorsey',
|
||||
'alt_title': 'Vine by Jack',
|
||||
'timestamp': 1368997951,
|
||||
'upload_date': '20130519',
|
||||
'uploader': 'Jack Dorsey',
|
||||
'uploader': 'Jack',
|
||||
'uploader_id': '76',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
'md5': '7b9a7cbc76734424ff942eb52c8f1065',
|
||||
'info_dict': {
|
||||
'id': 'MYxVapFvz2z',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
|
||||
'alt_title': 'Vine by Mars Ruiz',
|
||||
'upload_date': '20140815',
|
||||
'uploader': 'Mars Ruiz',
|
||||
'uploader_id': '1102363502380728320',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
'md5': 'ea27decea3fa670625aac92771a96b73',
|
||||
'info_dict': {
|
||||
'id': 'bxVjBbZlPUH',
|
||||
'ext': 'mp4',
|
||||
'title': '#mw3 #ac130 #killcam #angelofdeath',
|
||||
'alt_title': 'Vine by Z3k3',
|
||||
'upload_date': '20130430',
|
||||
'uploader': 'Z3k3',
|
||||
'uploader_id': '936470460173008896',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/e192BnZnZ9V',
|
||||
'info_dict': {
|
||||
@ -71,6 +38,7 @@ class VineIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
|
||||
'alt_title': 'Vine by Pimry_zaa',
|
||||
'timestamp': 1436057405,
|
||||
'upload_date': '20150705',
|
||||
'uploader': 'Pimry_zaa',
|
||||
'uploader_id': '1135760698325307392',
|
||||
@ -82,43 +50,60 @@ class VineIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
|
||||
webpage, 'vine data'),
|
||||
video_id)
|
||||
data = self._download_json(
|
||||
'https://archive.vine.co/posts/%s.json' % video_id, video_id)
|
||||
|
||||
data = data[list(data.keys())[0]]
|
||||
|
||||
formats = [{
|
||||
'format_id': '%(format)s-%(rate)s' % f,
|
||||
'vcodec': f.get('format'),
|
||||
'quality': f.get('rate'),
|
||||
'url': f['videoUrl'],
|
||||
} for f in data['videoUrls'] if f.get('videoUrl')]
|
||||
def video_url(kind):
|
||||
for url_suffix in ('Url', 'URL'):
|
||||
format_url = data.get('video%s%s' % (kind, url_suffix))
|
||||
if format_url:
|
||||
return format_url
|
||||
|
||||
formats = []
|
||||
for quality, format_id in enumerate(('low', '', 'dash')):
|
||||
format_url = video_url(format_id.capitalize())
|
||||
if not format_url:
|
||||
continue
|
||||
# DASH link returns plain mp4
|
||||
if format_id == 'dash' and determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id or 'standard',
|
||||
'quality': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
username = data.get('username')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data.get('description') or self._og_search_title(webpage),
|
||||
'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
|
||||
'title': data.get('description'),
|
||||
'alt_title': 'Vine by %s' % username if username else None,
|
||||
'thumbnail': data.get('thumbnailUrl'),
|
||||
'upload_date': unified_strdate(data.get('created')),
|
||||
'timestamp': unified_timestamp(data.get('created')),
|
||||
'uploader': username,
|
||||
'uploader_id': data.get('userIdStr'),
|
||||
'view_count': int_or_none(data.get('loops', {}).get('count')),
|
||||
'like_count': int_or_none(data.get('likes', {}).get('count')),
|
||||
'comment_count': int_or_none(data.get('comments', {}).get('count')),
|
||||
'repost_count': int_or_none(data.get('reposts', {}).get('count')),
|
||||
'view_count': int_or_none(data.get('loops')),
|
||||
'like_count': int_or_none(data.get('likes')),
|
||||
'comment_count': int_or_none(data.get('comments')),
|
||||
'repost_count': int_or_none(data.get('reposts')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@ -281,6 +281,11 @@ class VKIE(VKBaseIE):
|
||||
{
|
||||
'url': 'http://new.vk.com/video205387401_165548505',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# This video is no longer available, because its author has been blocked.
|
||||
'url': 'https://vk.com/video-10639516_456240611',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -328,6 +333,12 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>Access denied':
|
||||
'Access denied to video %s.',
|
||||
|
||||
r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
|
||||
r'<!>This video is no longer available, because its author has been blocked.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user