Compare commits
302 Commits
2017.04.17
...
2017.06.12
Author | SHA1 | Date | |
---|---|---|---|
|
cb1e6d8985 | ||
|
9932ac5c58 | ||
|
bf87c36c93 | ||
|
b4a3d461e4 | ||
|
72b409559c | ||
|
534863e057 | ||
|
16bc958287 | ||
|
624bd0104c | ||
|
28a4d6cce8 | ||
|
2ae2ffda5e | ||
|
70e7967202 | ||
|
6e999fbc12 | ||
|
7409af9eb3 | ||
|
4e3637034c | ||
|
1afd0b0da7 | ||
|
7515830422 | ||
|
f5521ea209 | ||
|
34646967ba | ||
|
e4d2e76d8e | ||
|
87f5646937 | ||
|
cc69a3de1b | ||
|
15aeeb1188 | ||
|
1693bebe4d | ||
|
4244a13a1d | ||
|
931adf8cc1 | ||
|
c996943418 | ||
|
76e6378358 | ||
|
a355b57f58 | ||
|
1508da30c2 | ||
|
eb703e5380 | ||
|
0a3924e746 | ||
|
e1db730d86 | ||
|
537191826f | ||
|
130880ba48 | ||
|
f8ba3fda4d | ||
|
e1b90cc3db | ||
|
43e6579558 | ||
|
6d923aab35 | ||
|
62bafabc09 | ||
|
9edcdac90c | ||
|
cd138d8bd4 | ||
|
cd750b731c | ||
|
4bede0d8f5 | ||
|
f129c3f349 | ||
|
39d4c1be4d | ||
|
f7a747ce59 | ||
|
4489d41816 | ||
|
87b5184a0d | ||
|
c56ad5c975 | ||
|
6b7ce85cdc | ||
|
d10d0e3cf8 | ||
|
941ea38ef5 | ||
|
99bea8d298 | ||
|
a49eccdfa7 | ||
|
a846173d93 | ||
|
78e210dea5 | ||
|
8555204274 | ||
|
164fcbfeb7 | ||
|
bc22df29c4 | ||
|
7e688d2f6a | ||
|
5a6d1da442 | ||
|
703751add4 | ||
|
4050be78e5 | ||
|
4d9fc40100 | ||
|
765522345f | ||
|
6bceb36b99 | ||
|
1e0d65f0bd | ||
|
03327bc9a6 | ||
|
b407d8533d | ||
|
20e2c9de04 | ||
|
d16c0121b9 | ||
|
7f4c3a7439 | ||
|
28dbde9cc3 | ||
|
cc304ce588 | ||
|
98a0618941 | ||
|
fd545fc6d1 | ||
|
97067db2ae | ||
|
c130f0a37b | ||
|
d3d4ba7f24 | ||
|
5552c9eb0f | ||
|
59ed87cbd9 | ||
|
b7f8749304 | ||
|
5192ee17e7 | ||
|
e834f04400 | ||
|
884d09f330 | ||
|
9e35298f97 | ||
|
0551f1b07b | ||
|
de53511201 | ||
|
2570e85167 | ||
|
9dc5ab041f | ||
|
01f3c8e290 | ||
|
06c1b3ce07 | ||
|
0b75e42dfb | ||
|
a609e61a90 | ||
|
afdb387cd8 | ||
|
dc4e4f90a2 | ||
|
fdc20f87a6 | ||
|
35a2d221a3 | ||
|
daa4e9ff90 | ||
|
2ca29f1aaf | ||
|
77d682da9d | ||
|
8fffac6927 | ||
|
5f6fbcea08 | ||
|
00cb0faca8 | ||
|
bfdf6fcc66 | ||
|
bcaa1dd060 | ||
|
0e2d626ddd | ||
|
9221d5d7a8 | ||
|
9d63e57d1f | ||
|
3bc1eea0d8 | ||
|
7769f83701 | ||
|
650bd94716 | ||
|
36b226d48f | ||
|
f2e2f0c777 | ||
|
6f76679804 | ||
|
7073015a23 | ||
|
89fd03079b | ||
|
1c45b7a8a9 | ||
|
60f5c9fb19 | ||
|
c360e641e9 | ||
|
6f3c632c24 | ||
|
09b866e171 | ||
|
166d12b00c | ||
|
2b8e6a68f8 | ||
|
d105a7edc6 | ||
|
5d29af3d15 | ||
|
ca04de463d | ||
|
946826eec7 | ||
|
76d5a36391 | ||
|
56f9c77f0e | ||
|
0de136341a | ||
|
1339ecb2f8 | ||
|
efe9316703 | ||
|
851a01aed6 | ||
|
b845766597 | ||
|
fa26734e07 | ||
|
12f01118b0 | ||
|
7fc60f4ee9 | ||
|
58bb440283 | ||
|
7ad4362357 | ||
|
6c52477f59 | ||
|
116283ff64 | ||
|
7274f3d0e9 | ||
|
3166b1f0ac | ||
|
39ee263819 | ||
|
a7ed6b341c | ||
|
cbd84b5817 | ||
|
6d1ded7502 | ||
|
5d0968f0af | ||
|
8d65880e24 | ||
|
b972fb037b | ||
|
5996d21aea | ||
|
afa0200bf0 | ||
|
e9137224b3 | ||
|
804181dda9 | ||
|
8fa17117df | ||
|
3b859145c2 | ||
|
04c09f1961 | ||
|
bf82b87323 | ||
|
b6eb74e340 | ||
|
3d40084b83 | ||
|
52294cdda7 | ||
|
2eeb588efe | ||
|
4ac0f573ef | ||
|
3892a9f4ab | ||
|
3995d37da5 | ||
|
e4a75d7932 | ||
|
e00eb564e9 | ||
|
10c87c151b | ||
|
228cd9bb90 | ||
|
566fbbaefd | ||
|
74c09c852a | ||
|
fd178b8748 | ||
|
a57a8e9918 | ||
|
1f9fefe7f5 | ||
|
8b4774dcac | ||
|
a99cc4ca16 | ||
|
9cafc3fd8b | ||
|
329e3dd5ad | ||
|
1d9e0a4f40 | ||
|
7ad53cb7ff | ||
|
b2ad479d17 | ||
|
4ac6dc3732 | ||
|
cc7bda4fff | ||
|
50ad078b7b | ||
|
4947f13cd0 | ||
|
7f09e523e8 | ||
|
4fe14732a2 | ||
|
ff6f9a6704 | ||
|
0c26548601 | ||
|
5401bea27f | ||
|
7a6d33a9a5 | ||
|
fa2a36d9bc | ||
|
55949fede6 | ||
|
7fc875195f | ||
|
c6fe5a7e12 | ||
|
ae21d2fd94 | ||
|
77481f1386 | ||
|
d86d169dd5 | ||
|
b9f9f361fa | ||
|
ab39a25c75 | ||
|
a146fa1c68 | ||
|
e0c1e9a98c | ||
|
086041e2f8 | ||
|
74da856544 | ||
|
9edf47df7b | ||
|
238cec17ae | ||
|
50534b7158 | ||
|
9cd4209724 | ||
|
33a81c2c6f | ||
|
deef31955b | ||
|
9dac2cec2d | ||
|
6ec371cd9e | ||
|
13081db1f5 | ||
|
b07ea5eaec | ||
|
5599253009 | ||
|
98ce1a3fd3 | ||
|
ba5c3caf88 | ||
|
b5c39537be | ||
|
1c7c76e4fb | ||
|
557194591a | ||
|
27e70a8f6c | ||
|
a4c81e4968 | ||
|
7986c3abcd | ||
|
a1ebfd4494 | ||
|
d19093bd50 | ||
|
24eb7c2578 | ||
|
e7db6759e4 | ||
|
b364c87c42 | ||
|
9222d94510 | ||
|
edd9221cd2 | ||
|
bc8a2ea071 | ||
|
7527923371 | ||
|
20783b8b50 | ||
|
bf2a5555c0 | ||
|
fb8e8b2d16 | ||
|
b62985a9a5 | ||
|
e31fed95b4 | ||
|
3fd0f70f6a | ||
|
33c62efc32 | ||
|
6b4ddd336c | ||
|
c12b4b80f8 | ||
|
064fafe932 | ||
|
ac1a5b9a12 | ||
|
a15777491a | ||
|
d8571dd6bf | ||
|
c0fa4245ce | ||
|
8814ae42bc | ||
|
0f63dc2402 | ||
|
dde97ea8da | ||
|
30bb6ce1a4 | ||
|
c89b49f743 | ||
|
6f4a888416 | ||
|
f5edd7ae51 | ||
|
96820c1c6b | ||
|
c95e2b5911 | ||
|
374560f018 | ||
|
ff99fe529e | ||
|
e095109da1 | ||
|
d68afc5bc9 | ||
|
76c1951036 | ||
|
e8bfe2a946 | ||
|
3dc8b61b7f | ||
|
a82f41841d | ||
|
30a4ab191a | ||
|
ac9c69ace7 | ||
|
85f6de25e4 | ||
|
538eee7b6a | ||
|
9f54ae2873 | ||
|
01cb57016f | ||
|
290f64dbaa | ||
|
adb4b03cd5 | ||
|
0eee52f34b | ||
|
d3f0687cf7 | ||
|
a4d6cf970c | ||
|
3019cb0c99 | ||
|
ddd258f922 | ||
|
07ad0cf34f | ||
|
9c99bef704 | ||
|
ffbc8386b9 | ||
|
4abdba643c | ||
|
3e0304fe6e | ||
|
fbf56be213 | ||
|
54f54fcca7 | ||
|
facfd79f9a | ||
|
3110bb937d | ||
|
cb2520802d | ||
|
f779958250 | ||
|
8abc7dca39 | ||
|
ea0c2f219c | ||
|
481ef51e23 | ||
|
5b995f713b | ||
|
75a2485407 | ||
|
58f6ab72ed | ||
|
2dc48df5bc | ||
|
18848d226a | ||
|
a32a9a7ef5 | ||
|
bae1404893 | ||
|
06d0ad9a4e | ||
|
f631b55791 | ||
|
bf1b87cd91 | ||
|
1c35b3da44 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.17**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.04.17
|
||||
[debug] youtube-dl version 2017.06.12
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@@ -35,8 +35,8 @@ updates_key.pem
|
||||
*.mkv
|
||||
*.swf
|
||||
*.part
|
||||
*.ytdl
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
8
AUTHORS
8
AUTHORS
@@ -212,3 +212,11 @@ Xiao Di Guan
|
||||
Thomas Winant
|
||||
Daniel Twardowski
|
||||
Jeremie Jarosh
|
||||
Gerard Rovira
|
||||
Marvin Ewald
|
||||
Frédéric Bournival
|
||||
Timendum
|
||||
gritstub
|
||||
Adam Voss
|
||||
Mike Fährmann
|
||||
Jan Kundrát
|
||||
|
293
ChangeLog
293
ChangeLog
@@ -1,3 +1,296 @@
|
||||
version 2017.06.12
|
||||
|
||||
Core
|
||||
* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
|
||||
+ [compat] Introduce compat_HTMLParseError
|
||||
* [utils] Improve unified_timestamp
|
||||
* [extractor/generic] Ensure format id is unicode string
|
||||
* [extractor/common] Return unicode string from _match_id
|
||||
+ [YoutubeDL] Sanitize more fields (#13313)
|
||||
|
||||
Extractors
|
||||
+ [xfileshare] Add support for rapidvideo.tv (#13348)
|
||||
* [xfileshare] Modernize and pass Referer
|
||||
+ [rutv] Add support for testplayer.vgtrk.com (#13347)
|
||||
+ [newgrounds] Extract more metadata (#13232)
|
||||
+ [newgrounds:playlist] Add support for playlists (#10611)
|
||||
* [newgrounds] Improve formats and uploader extraction (#13346)
|
||||
* [msn] Fix formats extraction
|
||||
* [turbo] Ensure format id is string
|
||||
* [sexu] Ensure height is int
|
||||
* [jove] Ensure comment count is int
|
||||
* [golem] Ensure format id is string
|
||||
* [gfycat] Ensure filesize is int
|
||||
* [foxgay] Ensure height is int
|
||||
* [flickr] Ensure format id is string
|
||||
* [sohu] Fix numeric fields
|
||||
* [safari] Improve authentication detection (#13319)
|
||||
* [liveleak] Ensure height is int (#13313)
|
||||
* [streamango] Make title optional (#13292)
|
||||
* [rtlnl] Improve URL regular expression (#13295)
|
||||
* [tvplayer] Fix extraction (#13291)
|
||||
|
||||
|
||||
version 2017.06.05
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
|
||||
|
||||
Extractors
|
||||
+ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
|
||||
* [pornhub:playlist] Fix extraction (#13281)
|
||||
- [godtv] Remove extractor (#13175)
|
||||
* [safari] Fix typo (#13252)
|
||||
* [youtube] Improve chapters extraction (#13247)
|
||||
* [1tv] Lower preference for HTTP formats (#13246)
|
||||
* [francetv] Relax URL regular expression
|
||||
* [drbonanza] Fix extraction (#13231)
|
||||
* [packtpub] Fix authentication (#13240)
|
||||
|
||||
|
||||
version 2017.05.29
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
|
||||
(#13211)
|
||||
* [xhamster] Fix uploader and like/dislike count extraction (#13216))
|
||||
+ [xhamster] Extract categories (#11728)
|
||||
+ [abcnews] Add support for embed URLs (#12851)
|
||||
* [gaskrank] Fix extraction (#12493)
|
||||
* [medialaan] Fix videos with missing videoUrl (#12774)
|
||||
* [dvtv] Fix playlist support
|
||||
+ [dvtv] Add support for DASH and HLS formats (#3063)
|
||||
+ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
|
||||
* [cbsinteractive] Relax URL regular expression (#13213)
|
||||
* [adn] Fix formats extraction
|
||||
+ [youku] Extract more metadata (#10433)
|
||||
* [cbsnews] Fix extraction (#13205)
|
||||
|
||||
|
||||
version 2017.05.26
|
||||
|
||||
Core
|
||||
+ [utils] strip_jsonp() can recognize more patterns
|
||||
* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
|
||||
|
||||
Extractors
|
||||
+ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
|
||||
+ [bbc] Add support for authentication
|
||||
* [tudou] Merge into youku extractor (#12214)
|
||||
* [youku:show] Fix extraction
|
||||
* [youku] Fix extraction (#13191)
|
||||
* [udemy] Fix extraction for outputs' format entries without URL (#13192)
|
||||
* [vimeo] Fix formats' sorting (#13189)
|
||||
* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
|
||||
|
||||
|
||||
version 2017.05.23
|
||||
|
||||
Core
|
||||
+ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
|
||||
+ [adobepass] Add support for Bright House Networks (#13149)
|
||||
|
||||
Extractors
|
||||
+ [streamcz] Add support for subtitles (#13174)
|
||||
* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
|
||||
* [toggle] Relax URL regular expression (#13172)
|
||||
* [toypics] Fix extraction (#13077)
|
||||
* [njpwworld] Fix extraction (#13162, #13169)
|
||||
+ [hitbox] Add support for smashcast.tv (#13154)
|
||||
* [mitele] Update app key regular expression (#13158)
|
||||
|
||||
|
||||
version 2017.05.18.1
|
||||
|
||||
Core
|
||||
* [jsinterp] Fix typo and cleanup regular expressions (#13134)
|
||||
|
||||
|
||||
version 2017.05.18
|
||||
|
||||
Core
|
||||
+ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125,
|
||||
#13126, #13128, #13129, #13130, #13131, #13132)
|
||||
+ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats
|
||||
(#13088, #13092)
|
||||
+ [utils] Recognize more audio codecs (#13081)
|
||||
|
||||
Extractors
|
||||
+ [vier] Extract more metadata (#12539)
|
||||
* [vier] Improve extraction (#12801)
|
||||
+ Add support for authentication
|
||||
* Bypass authentication when no credentials provided
|
||||
* Improve extraction robustness
|
||||
* [dailymail] Fix sources extraction (#13057)
|
||||
* [dailymotion] Extend URL regular expression (#13079)
|
||||
|
||||
|
||||
version 2017.05.14
|
||||
|
||||
Core
|
||||
+ [extractor/common] Respect Width and Height attributes in ISM manifests
|
||||
+ [postprocessor/metadatafromtitle] Add support regular expression syntax for
|
||||
--metadata-from-title (#13065)
|
||||
|
||||
Extractors
|
||||
+ [mediaset] Add support for video.mediaset.it (#12708, #12964)
|
||||
* [orf:radio] Fix extraction (#11643, #12926)
|
||||
* [aljazeera] Extend URL regular expression (#13053)
|
||||
* [imdb] Relax URL regular expression (#13056)
|
||||
+ [francetv] Add support for mobile.france.tv (#13068)
|
||||
+ [upskill] Add support for upskillcourses.com (#13043)
|
||||
* [thescene] Fix extraction (#13061)
|
||||
* [condenast] Improve embed support
|
||||
* [liveleak] Fix extraction (#12053)
|
||||
+ [douyu] Support Douyu shows (#12228)
|
||||
* [myspace] Improve URL regular expression (#13040)
|
||||
* [adultswim] Use desktop platform in assets URL (#13041)
|
||||
|
||||
|
||||
version 2017.05.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
|
||||
versions (#13027)
|
||||
|
||||
Extractors
|
||||
* [francetv] Adapt to site redesign (#13034)
|
||||
+ [packtpub] Add support for authentication (#12622)
|
||||
* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
|
||||
+ [cspan] Add support for brightcove live embeds (#13028)
|
||||
* [vrv] Extract DASH formats and subtitles
|
||||
* [funimation] Fix authentication (#13021)
|
||||
* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
|
||||
+ Add support for Adobe Pass authentication
|
||||
+ Add support for live streams
|
||||
+ Add support for show pages
|
||||
* [turner] Extract thumbnail, is_live and strip description
|
||||
+ [nonktube] Add support for nonktube.com (#8647, #13024)
|
||||
+ [nuevo] Pass headers to _extract_nuevo
|
||||
* [nbc] Improve extraction (#12364)
|
||||
|
||||
|
||||
version 2017.05.07
|
||||
|
||||
Common
|
||||
* [extractor/common] Fix typo in _extract_akamai_formats
|
||||
+ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
|
||||
+ [extractor/common] Introduce chapters meta field
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
|
||||
#13003)
|
||||
* [bilibili] Fix video downloading (#13001)
|
||||
* [rmcdecouverte] Fix extraction (#12937)
|
||||
* [theplatform] Extract chapters
|
||||
* [bandcamp] Fix thumbnail extraction (#12980)
|
||||
* [pornhub] Extend URL regular expression (#12996)
|
||||
+ [youtube] Extract chapters
|
||||
+ [nrk] Extract chapters
|
||||
+ [vice] Add support for ooyala embeds in article pages
|
||||
+ [vice] Support vice articles (#12968)
|
||||
* [vice] Fix extraction for non en_us videos (#12967)
|
||||
* [gdcvault] Fix extraction for some videos (#12733)
|
||||
* [pbs] Improve multipart video support (#12981)
|
||||
* [laola1tv] Fix extraction (#12880)
|
||||
+ [cda] Support birthday verification (#12789)
|
||||
* [leeco] Fix extraction (#12974)
|
||||
+ [pbs] Extract chapters
|
||||
* [amp] Imporove thumbnail and subtitles extraction
|
||||
* [foxsports] Fix extraction (#12945)
|
||||
- [coub] Remove comment count extraction (#12941)
|
||||
|
||||
|
||||
version 2017.05.01
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract view count from JSON-LD
|
||||
* [utils] Improve unified_timestamp
|
||||
+ [utils] Add video/mp2t to mimetype2ext
|
||||
* [downloader/external] Properly handle live stream downloading cancellation
|
||||
(#8932)
|
||||
+ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
|
||||
|
||||
Extractors
|
||||
* [infoq] Make audio format extraction non fatal (#12938)
|
||||
* [brightcove] Allow whitespace around attribute names in embedded code
|
||||
+ [zaq1] Add support for zaq1.pl (#12693)
|
||||
+ [xvideos] Extract duration (#12828)
|
||||
* [vevo] Fix extraction (#12879)
|
||||
+ [noovo] Add support for noovo.ca (#12792)
|
||||
+ [washingtonpost] Add support for embeds (#12699)
|
||||
* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
|
||||
* [anvato] Improve extraction (#12913)
|
||||
* Promote to regular shortcut based extractor
|
||||
* Add mcp to access key mapping table
|
||||
* Add support for embeds extraction
|
||||
* Add support for anvato embeds in generic extractor
|
||||
* [xtube] Fix extraction for older FLV videos (#12734)
|
||||
* [tvplayer] Fix extraction (#12908)
|
||||
|
||||
|
||||
version 2017.04.28
|
||||
|
||||
Core
|
||||
+ [adobepass] Use geo verification headers for all requests
|
||||
- [downloader/fragment] Remove assert for resume_len when no fragments
|
||||
downloaded
|
||||
+ [extractor/common] Add manifest_url for explicit group rendition formats
|
||||
* [extractor/common] Fix manifest_url for m3u8 formats
|
||||
- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
|
||||
|
||||
Extractor
|
||||
* [aenetworks] Fix extraction for shows with single season
|
||||
+ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
|
||||
* [youtube] Recognize new locale-based player URLs (#12885)
|
||||
+ [streamable] Add support for new embedded URL schema (#12844)
|
||||
* [arte:+7] Relax URL regular expression (#12837)
|
||||
|
||||
|
||||
version 2017.04.26
|
||||
|
||||
Core
|
||||
* Introduce --keep-fragments for keeping fragments of fragmented download
|
||||
on disk after download is finished
|
||||
* [YoutubeDL] Fix output template for missing timestamp (#12796)
|
||||
* [socks] Handle cases where credentials are required but missing
|
||||
* [extractor/common] Improve HLS extraction (#12211)
|
||||
* Extract m3u8 parsing to separate method
|
||||
* Improve rendition groups extraction
|
||||
* Build stream name according stream GROUP-ID
|
||||
* Ignore reference to AUDIO group without URI when stream has no CODECS
|
||||
* Use float for scaled tbr in _parse_m3u8_formats
|
||||
* [utils] Add support for TTML styles in dfxp2srt
|
||||
* [downloader/hls] No need to download keys for fragments that have been
|
||||
already downloaded
|
||||
* [downloader/fragment] Improve fragment downloading
|
||||
* Resume immediately
|
||||
* Don't concatenate fragments and decrypt them on every resume
|
||||
* Optimize disk storage usage, don't store intermediate fragments on disk
|
||||
* Store bookkeeping download state file
|
||||
+ [extractor/common] Add support for multiple getters in try_get
|
||||
+ [extractor/common] Add support for video of WebPage context in _json_ld
|
||||
(#12778)
|
||||
+ [extractor/common] Relax JWPlayer regular expression and remove
|
||||
duplicate URLs (#12768)
|
||||
|
||||
Extractors
|
||||
* [iqiyi] Fix extraction of Yule videos
|
||||
* [vidio] Improve extraction and sort formats
|
||||
+ [brightcove] Match only video elements with data-video-id attribute
|
||||
* [iqiyi] Fix playlist detection (#12504)
|
||||
- [azubu] Remove extractor (#12813)
|
||||
* [porn91] Fix extraction (#12814)
|
||||
* [vidzi] Fix extraction (#12793)
|
||||
+ [amp] Extract error message (#12795)
|
||||
+ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
|
||||
* [instagram] Fix extraction (#12777)
|
||||
+ [generic] Support Brightcove videos in <iframe> (#12482)
|
||||
+ [brightcove] Support URLs with bcpid instead of playerID (#12482)
|
||||
* [brightcove] Fix _extract_url (#12782)
|
||||
+ [odnoklassniki] Extract HLS formats
|
||||
|
||||
|
||||
version 2017.04.17
|
||||
|
||||
Extractors
|
||||
|
2
Makefile
2
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
56
README.md
56
README.md
@@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
help for -o for a list of available keys)
|
||||
to match if the key is present, !key to
|
||||
check if the key is not present, key >
|
||||
NUMBER (like "comment_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, key = 'LITERAL' (like
|
||||
"uploader = 'Mike Smith'", also works with
|
||||
!=) to match against a string literal and &
|
||||
to require multiple matches. Values which
|
||||
are not known are excluded unless you put a
|
||||
question mark (?) after the operator. For
|
||||
example, to only match videos that have
|
||||
the "OUTPUT TEMPLATE" for a list of
|
||||
available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present, key > NUMBER (like "comment_count
|
||||
> 12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, key = 'LITERAL'
|
||||
(like "uploader = 'Mike Smith'", also works
|
||||
with !=) to match against a string literal
|
||||
and & to require multiple matches. Values
|
||||
which are not known are excluded unless you
|
||||
put a question mark (?) after the operator.
|
||||
For example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
@@ -187,6 +187,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
and ISM)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--keep-fragments Keep downloaded fragments on disk after
|
||||
downloading is finished; fragments are
|
||||
erased by default
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@@ -274,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--get-filename Simulate, quiet but print output filename
|
||||
--get-format Simulate, quiet but print output format
|
||||
-j, --dump-json Simulate, quiet but print JSON information.
|
||||
See --output for a description of available
|
||||
keys.
|
||||
See the "OUTPUT TEMPLATE" for a description
|
||||
of available keys.
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
@@ -397,12 +400,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--add-metadata Write metadata to the video file
|
||||
--metadata-from-title FORMAT Parse additional metadata like song title /
|
||||
artist from the video title. The format
|
||||
syntax is the same as --output, the parsed
|
||||
parameters replace existing values.
|
||||
Additional templates: %(album)s,
|
||||
%(artist)s. Example: --metadata-from-title
|
||||
"%(artist)s - %(title)s" matches a title
|
||||
like "Coldplay - Paradise"
|
||||
syntax is the same as --output. Regular
|
||||
expression with named capture groups may
|
||||
also be used. The parsed parameters replace
|
||||
existing values. Example: --metadata-from-
|
||||
title "%(artist)s - %(title)s" matches a
|
||||
title like "Coldplay - Paradise". Example
|
||||
(regex): --metadata-from-title
|
||||
"(?P<artist>.+?) - (?P<title>.+)"
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
@@ -469,7 +474,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
|
||||
```
|
||||
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
|
||||
|
||||
On Windows you may also need to setup the `%HOME%` environment variable manually.
|
||||
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
|
||||
```
|
||||
set HOME=%USERPROFILE%
|
||||
```
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -527,13 +535,14 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
- `playlist_title` (string): Playlist title
|
||||
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
||||
- `chapter` (string): Name or title of the chapter the video belongs to
|
||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
|
||||
- `series` (string): Title of the series or programme the video episode belongs to
|
||||
- `season` (string): Title of the season the video episode belongs to
|
||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||
@@ -543,6 +552,7 @@ Available for the video that is an episode of some series or programme:
|
||||
- `episode_id` (string): Id of the video episode
|
||||
|
||||
Available for the media that is a track or a part of a music album:
|
||||
|
||||
- `track` (string): Title of the track
|
||||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
@@ -644,7 +654,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
@@ -45,6 +45,7 @@
|
||||
- **anderetijden**: npo.nl and ntr.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **Anvato**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleConnect**
|
||||
@@ -81,20 +82,18 @@
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:weekly**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **Beam:live**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@@ -217,6 +216,7 @@
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **Dotsub**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
- **DPlayIt**
|
||||
@@ -282,7 +282,8 @@
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceInter**
|
||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||
- **FranceTV**
|
||||
- **FranceTVEmbed**
|
||||
- **francetvinfo.fr**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
@@ -310,7 +311,6 @@
|
||||
- **Go**
|
||||
- **Go90**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
@@ -433,6 +433,7 @@
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **Medialaan**
|
||||
- **Mediaset**
|
||||
- **Medici**
|
||||
- **Meipai**: 美拍
|
||||
- **MelonVOD**
|
||||
@@ -451,6 +452,8 @@
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:stream**
|
||||
- **mixcloud:user**
|
||||
- **Mixer:live**
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
@@ -509,6 +512,7 @@
|
||||
- **netease:song**: 网易云音乐
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **NewgroundsPlaylist**
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
@@ -531,6 +535,8 @@
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **NonkTube**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
@@ -602,7 +608,6 @@
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
@@ -800,16 +805,13 @@
|
||||
- **ToonGoggles**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **Toypics**: Toypics video
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
- **tudou**
|
||||
- **tudou:album**
|
||||
- **tudou:playlist**
|
||||
- **Tumblr**
|
||||
- **tunein:clip**
|
||||
- **tunein:program**
|
||||
@@ -860,6 +862,8 @@
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Upskill**
|
||||
- **UpskillCourse**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
@@ -879,9 +883,10 @@
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
@@ -970,7 +975,7 @@
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
@@ -1015,6 +1020,7 @@
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
|
@@ -3,12 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
@@ -175,6 +176,318 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
}]
|
||||
})
|
||||
|
||||
def test_parse_m3u8_formats(self):
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/11507
|
||||
# http://pluzz.francetv.fr/videos/le_ministere.html
|
||||
'pluzz_francetv_11507',
|
||||
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
[{
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '180',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 180,
|
||||
'width': 256,
|
||||
'height': 144,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '303',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 303,
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '575',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 575,
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '831',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.77.30',
|
||||
'tbr': 831,
|
||||
'width': 704,
|
||||
'height': 396,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'format_id': '1467',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.77.30',
|
||||
'tbr': 1467,
|
||||
'width': 1024,
|
||||
'height': 576,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/11995
|
||||
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
|
||||
'teamcoco_11995',
|
||||
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
[{
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-0-Default',
|
||||
'protocol': 'm3u8',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-1-Default',
|
||||
'protocol': 'm3u8',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '71',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.5',
|
||||
'vcodec': 'none',
|
||||
'tbr': 71,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '413',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.42001e',
|
||||
'tbr': 413,
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '522',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.42001e',
|
||||
'tbr': 522,
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '1205',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.4d001e',
|
||||
'tbr': 1205,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '2374',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.4d001f',
|
||||
'tbr': 2374,
|
||||
'width': 1024,
|
||||
'height': 576,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/12211
|
||||
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
|
||||
'toggle_mobile_12211',
|
||||
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
[{
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-English',
|
||||
'protocol': 'm3u8',
|
||||
'language': 'eng',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-Undefined',
|
||||
'protocol': 'm3u8',
|
||||
'language': 'und',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '155',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 155.648,
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '502',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 502.784,
|
||||
'width': 480,
|
||||
'height': 270,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '827',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 827.392,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '1396',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 1396.736,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# http://www.twitch.tv/riotgames/v/6528877
|
||||
'twitch_vod',
|
||||
'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
[{
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Audio Only',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'none',
|
||||
'tbr': 182.725,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Mobile',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C00D',
|
||||
'tbr': 280.474,
|
||||
'width': 400,
|
||||
'height': 226,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Low',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01E',
|
||||
'tbr': 628.347,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Medium',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01E',
|
||||
'tbr': 893.387,
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'High',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01F',
|
||||
'tbr': 1603.789,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Source',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.100.31',
|
||||
'tbr': 3214.134,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
|
||||
# EXT-X-STREAM-INF tag with NAME attribute that is not defined
|
||||
# in HLS specification
|
||||
'vidio',
|
||||
'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
[{
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '270p 3G',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 300,
|
||||
'width': 480,
|
||||
'height': 270,
|
||||
}, {
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '360p SD',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 600,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '720p HD',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 1200,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
)
|
||||
]
|
||||
|
||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
||||
mode='r', encoding='utf-8') as f:
|
||||
formats = self.ie._parse_m3u8_formats(
|
||||
f.read(), m3u8_url, ext='mp4')
|
||||
self.ie._sort_formats(formats)
|
||||
expect_value(self, formats, expected_formats, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -225,7 +225,7 @@ def generator(test_case, tname):
|
||||
format_bytes(got_fsize)))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
self.assertEqual(tc['md5'], md5_for_file)
|
||||
# Finally, check test cases' data again but this time against
|
||||
# extracted data from info JSON file written during processing
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
|
@@ -44,6 +44,7 @@ from youtube_dl.utils import (
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
multipart_encode,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@@ -338,6 +339,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -619,6 +622,16 @@ class TestUtil(unittest.TestCase):
|
||||
'http://example.com/path', {'test': '第二行тест'})),
|
||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||
|
||||
def test_multipart_encode(self):
|
||||
self.assertEqual(
|
||||
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
|
||||
self.assertEqual(
|
||||
multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
|
||||
self.assertRaises(
|
||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
@@ -666,6 +679,14 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@@ -895,10 +916,13 @@ class TestUtil(unittest.TestCase):
|
||||
supports_outside_bmp = False
|
||||
if supports_outside_bmp:
|
||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||
# Malformed HTML should not break attributes extraction on older Python
|
||||
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
self.assertEqual(
|
||||
@@ -1069,6 +1093,47 @@ The first line
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
||||
|
||||
dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
|
||||
<head>
|
||||
<styling>
|
||||
<style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
|
||||
<style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
|
||||
<style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
|
||||
<style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
|
||||
</styling>
|
||||
</head>
|
||||
<body tts:textAlign="center" style="s0">
|
||||
<div>
|
||||
<p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
|
||||
<p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
|
||||
<p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
|
||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
srt_data = '''1
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||
|
||||
2
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
|
||||
</font>part 2</font></b>
|
||||
|
||||
3
|
||||
00:00:05,839 --> 00:00:09,560
|
||||
<u><font color="lime">line 3
|
||||
part 3</font></u>
|
||||
|
||||
4
|
||||
00:00:09,560 --> 00:00:12,359
|
||||
<i><u><font color="yellow"><font color="lime">inner
|
||||
</font>style</font></u></i>
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||
|
||||
def test_cli_option(self):
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
|
275
test/test_youtube_chapters.py
Normal file
275
test/test_youtube_chapters.py
Normal file
@@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import expect_value
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
|
||||
class TestYoutubeChapters(unittest.TestCase):
|
||||
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://www.youtube.com/watch?v=A22oy8dFjqc
|
||||
# pattern: 00:00 - <title>
|
||||
'''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
|
||||
1477,
|
||||
[{
|
||||
'start_time': 36,
|
||||
'end_time': 162,
|
||||
'title': 'Bohemian Rhapsody',
|
||||
}, {
|
||||
'start_time': 162,
|
||||
'end_time': 413,
|
||||
'title': 'Radio Ga Ga',
|
||||
}, {
|
||||
'start_time': 413,
|
||||
'end_time': 454,
|
||||
'title': 'Ay Oh!',
|
||||
}, {
|
||||
'start_time': 454,
|
||||
'end_time': 728,
|
||||
'title': 'Hammer To Fall',
|
||||
}, {
|
||||
'start_time': 728,
|
||||
'end_time': 963,
|
||||
'title': 'Crazy Little Thing Called Love',
|
||||
}, {
|
||||
'start_time': 963,
|
||||
'end_time': 1038,
|
||||
'title': 'We Will Rock You',
|
||||
}, {
|
||||
'start_time': 1038,
|
||||
'end_time': 1272,
|
||||
'title': 'We Are The Champions',
|
||||
}, {
|
||||
'start_time': 1272,
|
||||
'end_time': 1477,
|
||||
'title': 'Is This The World We Created...?',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=ekYlRhALiRQ
|
||||
# pattern: <num>. <title> 0:00
|
||||
'1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
|
||||
4009,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 707,
|
||||
'title': '1. Those Beaten Paths of Confusion',
|
||||
}, {
|
||||
'start_time': 707,
|
||||
'end_time': 1590,
|
||||
'title': '2. Beyond the Shadows of Emptiness & Nothingness',
|
||||
}, {
|
||||
'start_time': 1590,
|
||||
'end_time': 2157,
|
||||
'title': '3. Poison Yourself...With Thought',
|
||||
}, {
|
||||
'start_time': 2157,
|
||||
'end_time': 2672,
|
||||
'title': '4. The Agents of Transformation',
|
||||
}, {
|
||||
'start_time': 2672,
|
||||
'end_time': 3187,
|
||||
'title': '5. Drowning in the Pain of Consciousness',
|
||||
}, {
|
||||
'start_time': 3187,
|
||||
'end_time': 4009,
|
||||
'title': '6. Deny the Disease of Life',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=WjL4pSzog9w
|
||||
# pattern: 00:00 <title>
|
||||
'<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo',
|
||||
2705,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 428,
|
||||
'title': 'Christening Unborn Deformities',
|
||||
}, {
|
||||
'start_time': 428,
|
||||
'end_time': 976,
|
||||
'title': 'Taste of Purity',
|
||||
}, {
|
||||
'start_time': 976,
|
||||
'end_time': 1485,
|
||||
'title': 'Sculpting Sins of a Universal Tongue',
|
||||
}, {
|
||||
'start_time': 1485,
|
||||
'end_time': 1884,
|
||||
'title': 'Birth',
|
||||
}, {
|
||||
'start_time': 1884,
|
||||
'end_time': 2275,
|
||||
'title': 'Neves',
|
||||
}, {
|
||||
'start_time': 2275,
|
||||
'end_time': 2705,
|
||||
'title': 'Libations in Limbo',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=o3r1sn-t3is
|
||||
# pattern: <title> 00:00 <note>
|
||||
'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>',
|
||||
5640,
|
||||
[{
|
||||
'start_time': 45,
|
||||
'end_time': 266,
|
||||
'title': 'I-E-A-I-A-I-O',
|
||||
}, {
|
||||
'start_time': 266,
|
||||
'end_time': 331,
|
||||
'title': 'Suite-Pee (Incomplete)',
|
||||
}, {
|
||||
'start_time': 331,
|
||||
'end_time': 522,
|
||||
'title': 'Attack (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 522,
|
||||
'end_time': 752,
|
||||
'title': 'Prison Song',
|
||||
}, {
|
||||
'start_time': 752,
|
||||
'end_time': 932,
|
||||
'title': 'Know (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 932,
|
||||
'end_time': 1153,
|
||||
'title': 'Aerials',
|
||||
}, {
|
||||
'start_time': 1153,
|
||||
'end_time': 1209,
|
||||
'title': 'Soldier Side - Intro',
|
||||
}, {
|
||||
'start_time': 1209,
|
||||
'end_time': 1472,
|
||||
'title': 'B.Y.O.B.',
|
||||
}, {
|
||||
'start_time': 1472,
|
||||
'end_time': 1668,
|
||||
'title': 'Soil',
|
||||
}, {
|
||||
'start_time': 1668,
|
||||
'end_time': 1838,
|
||||
'title': 'Darts',
|
||||
}, {
|
||||
'start_time': 1838,
|
||||
'end_time': 2105,
|
||||
'title': 'Radio/Video',
|
||||
}, {
|
||||
'start_time': 2105,
|
||||
'end_time': 2288,
|
||||
'title': 'Hypnotize',
|
||||
}, {
|
||||
'start_time': 2288,
|
||||
'end_time': 2460,
|
||||
'title': 'Temper (First live performance since 1999)',
|
||||
}, {
|
||||
'start_time': 2460,
|
||||
'end_time': 2577,
|
||||
'title': 'CUBErt',
|
||||
}, {
|
||||
'start_time': 2577,
|
||||
'end_time': 2787,
|
||||
'title': 'Needles',
|
||||
}, {
|
||||
'start_time': 2787,
|
||||
'end_time': 2978,
|
||||
'title': 'Deer Dance',
|
||||
}, {
|
||||
'start_time': 2978,
|
||||
'end_time': 3085,
|
||||
'title': 'Bounce',
|
||||
}, {
|
||||
'start_time': 3085,
|
||||
'end_time': 3232,
|
||||
'title': 'Suggestions',
|
||||
}, {
|
||||
'start_time': 3232,
|
||||
'end_time': 3493,
|
||||
'title': 'Psycho',
|
||||
}, {
|
||||
'start_time': 3493,
|
||||
'end_time': 3675,
|
||||
'title': 'Chop Suey!',
|
||||
}, {
|
||||
'start_time': 3675,
|
||||
'end_time': 3854,
|
||||
'title': 'Lonely Day',
|
||||
}, {
|
||||
'start_time': 3854,
|
||||
'end_time': 4090,
|
||||
'title': 'Question!',
|
||||
}, {
|
||||
'start_time': 4090,
|
||||
'end_time': 4420,
|
||||
'title': 'Lost in Hollywood',
|
||||
}, {
|
||||
'start_time': 4420,
|
||||
'end_time': 4577,
|
||||
'title': 'Vicinity of Obscenity (First live performance since 2012)',
|
||||
}, {
|
||||
'start_time': 4577,
|
||||
'end_time': 4802,
|
||||
'title': 'Forest',
|
||||
}, {
|
||||
'start_time': 4802,
|
||||
'end_time': 5037,
|
||||
'title': 'Cigaro',
|
||||
}, {
|
||||
'start_time': 5037,
|
||||
'end_time': 5273,
|
||||
'title': 'Toxicity (with Chino Moreno)',
|
||||
}, {
|
||||
'start_time': 5273,
|
||||
'end_time': 5640,
|
||||
'title': 'Sugar',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=PkYLQbsqCE8
|
||||
# pattern: <num> - <title> [<latinized title>] 0:00:00
|
||||
'''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''',
|
||||
1138,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 490,
|
||||
'title': '1 - Во прах [Vo prakh]',
|
||||
}, {
|
||||
'start_time': 490,
|
||||
'end_time': 870,
|
||||
'title': '2 - Искупление [Iskupleniye]',
|
||||
}, {
|
||||
'start_time': 870,
|
||||
'end_time': 1138,
|
||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||
# time point more than duration
|
||||
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||
283,
|
||||
[]
|
||||
),
|
||||
]
|
||||
|
||||
def test_youtube_chapters(self):
|
||||
for description, duration, expected_chapters in self._TEST_CASES:
|
||||
ie = YoutubeIE()
|
||||
expect_value(
|
||||
self, ie._extract_chapters(description, duration),
|
||||
expected_chapters, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
14
test/testdata/m3u8/pluzz_francetv_11507.m3u8
vendored
Normal file
14
test/testdata/m3u8/pluzz_francetv_11507.m3u8
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
#EXTM3U
|
||||
|
||||
#EXT-X-VERSION:5
|
||||
|
||||
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
|
||||
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
|
16
test/testdata/m3u8/teamcoco_11995.m3u8
vendored
Normal file
16
test/testdata/m3u8/teamcoco_11995.m3u8
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
#EXTM3U
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-2m_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-1m_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
|
||||
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
|
||||
hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
|
13
test/testdata/m3u8/toggle_mobile_12211.m3u8
vendored
Normal file
13
test/testdata/m3u8/toggle_mobile_12211.m3u8
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:4
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
|
20
test/testdata/m3u8/twitch_vod.m3u8
vendored
Normal file
20
test/testdata/m3u8/twitch_vod.m3u8
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
#EXTM3U
|
||||
#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
|
10
test/testdata/m3u8/vidio.m3u8
vendored
Normal file
10
test/testdata/m3u8/vidio.m3u8
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
#EXTM3U
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8
|
@@ -58,6 +58,7 @@ from .utils import (
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
ISO3166Utils,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
@@ -302,6 +303,17 @@ class YoutubeDL(object):
|
||||
postprocessor.
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
'playlist_index',
|
||||
))
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = []
|
||||
@@ -370,10 +382,10 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise
|
||||
|
||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||
if (sys.platform != 'win32' and
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||
not params.get('restrictfilenames', False)):
|
||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||
# Unicode filesystem API will throw errors (#1474, #13027)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
'cannot encode all characters. '
|
||||
@@ -498,7 +510,8 @@ class YoutubeDL(object):
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
if compat_os_name == 'nt':
|
||||
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
@@ -508,14 +521,14 @@ class YoutubeDL(object):
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
self._write_string('\033[22;0t', self._screen_file)
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
if compat_os_name != 'nt' and 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
self._write_string('\033[23;0t', self._screen_file)
|
||||
|
||||
@@ -638,22 +651,11 @@ class YoutubeDL(object):
|
||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||
outtmpl)
|
||||
|
||||
NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
'chapter_number', 'season_number', 'episode_number',
|
||||
'track_number', 'disc_number', 'release_year',
|
||||
'playlist_index',
|
||||
))
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
for numeric_field in NUMERIC_FIELDS:
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||
@@ -1344,9 +1346,28 @@ class YoutubeDL(object):
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result')
|
||||
|
||||
if not isinstance(info_dict['id'], compat_str):
|
||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
||||
info_dict['id'] = compat_str(info_dict['id'])
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
% (field, field_not, conversion))
|
||||
|
||||
def sanitize_string_field(info, string_field):
|
||||
field = info.get(string_field)
|
||||
if field is None or isinstance(field, compat_str):
|
||||
return
|
||||
report_force_conversion(string_field, 'a string', 'string')
|
||||
info[string_field] = compat_str(field)
|
||||
|
||||
def sanitize_numeric_fields(info):
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
field = info.get(numeric_field)
|
||||
if field is None or isinstance(field, compat_numeric_types):
|
||||
continue
|
||||
report_force_conversion(numeric_field, 'numeric', 'int')
|
||||
info[numeric_field] = int_or_none(field)
|
||||
|
||||
sanitize_string_field(info_dict, 'id')
|
||||
sanitize_numeric_fields(info_dict)
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
@@ -1434,6 +1455,8 @@ class YoutubeDL(object):
|
||||
if 'url' not in format:
|
||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||
|
||||
sanitize_string_field(format, 'format_id')
|
||||
sanitize_numeric_fields(format)
|
||||
format['url'] = sanitize_url(format['url'])
|
||||
|
||||
if format.get('format_id') is None:
|
||||
|
@@ -343,6 +343,7 @@ def _real_main(argv=None):
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
|
@@ -2322,6 +2322,19 @@ try:
|
||||
except ImportError: # Python 2
|
||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||
|
||||
try: # Python 2
|
||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||
except ImportError: # Python <3.4
|
||||
try:
|
||||
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||
except ImportError: # Python >3.4
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exceptiong handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
|
@@ -187,6 +187,9 @@ class FileDownloader(object):
|
||||
return filename[:-len('.part')]
|
||||
return filename
|
||||
|
||||
def ytdl_filename(self, filename):
|
||||
return filename + '.ytdl'
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
@@ -327,6 +330,7 @@ class FileDownloader(object):
|
||||
os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True) and
|
||||
os.path.isfile(encodeFilename(filename)) and
|
||||
|
@@ -1,13 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@@ -28,31 +22,24 @@ class DashSegmentsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def process_segment(segment, tmp_filename, num):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % num
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = num == 0 or not skip_unavailable_fragments
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
@@ -63,22 +50,14 @@ class DashSegmentsFD(FragmentFD):
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(segment_name)
|
||||
return True
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
return True
|
||||
|
||||
for i, segment in enumerate(segments):
|
||||
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
||||
|
@@ -29,7 +29,17 @@ class ExternalFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
try:
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
# Live stream downloading cancellation should be considered as
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
@@ -202,6 +212,11 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
for log_level in ('quiet', 'verbose'):
|
||||
if self.params.get(log_level, False):
|
||||
args += ['-loglevel', log_level]
|
||||
break
|
||||
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import division, unicode_literals
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
@@ -16,9 +15,7 @@ from ..compat import (
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
fix_xml_ampersands,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -366,6 +363,7 @@ class F4mFD(FragmentFD):
|
||||
|
||||
dest_stream = ctx['dest_stream']
|
||||
|
||||
if ctx['complete_frags_downloaded_bytes'] == 0:
|
||||
write_flv_header(dest_stream)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
@@ -374,9 +372,12 @@ class F4mFD(FragmentFD):
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
frag_index = 0
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
query = []
|
||||
if base_url_parsed.query:
|
||||
@@ -386,17 +387,10 @@ class F4mFD(FragmentFD):
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
query.append(info_dict['extra_param_to_segment_url'])
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': url_parsed.geturl(),
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
down_data = down.read()
|
||||
down.close()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
try:
|
||||
@@ -411,12 +405,8 @@ class F4mFD(FragmentFD):
|
||||
break
|
||||
raise
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
if live:
|
||||
os.remove(encodeFilename(frag_sanitized))
|
||||
else:
|
||||
frags_filenames.append(frag_sanitized)
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
@@ -436,7 +426,4 @@ class F4mFD(FragmentFD):
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@@ -2,6 +2,7 @@ from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
@@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||
finished
|
||||
|
||||
For each incomplete fragment download youtube-dl keeps on disk a special
|
||||
bookkeeping file with download state and metadata (in future such files will
|
||||
be used for any incomplete download handled by youtube-dl). This file is
|
||||
used to properly handle resuming, check download file consistency and detect
|
||||
potential errors. The file has a .ytdl extension and represents a standard
|
||||
JSON file of the following format:
|
||||
|
||||
extractor:
|
||||
Dictionary of extractor related data. TBD.
|
||||
|
||||
downloader:
|
||||
Dictionary of downloader related data. May contain following data:
|
||||
current_fragment:
|
||||
Dictionary with current (being downloaded) fragment data:
|
||||
index: 0-based index of current fragment among all fragments
|
||||
fragment_count:
|
||||
Total count of fragments
|
||||
|
||||
This feature is experimental and file format may change in future.
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
def report_skip_fragment(self, frag_index):
|
||||
self.to_screen('[download] Skipping fragment %d...' % frag_index)
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
@@ -46,6 +69,51 @@ class FragmentFD(FileDownloader):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
@staticmethod
|
||||
def __do_ytdl_file(ctx):
|
||||
return not ctx['live'] and not ctx['tmpfilename'] == '-'
|
||||
|
||||
def _read_ytdl_file(self, ctx):
|
||||
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
|
||||
ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
|
||||
stream.close()
|
||||
|
||||
def _write_ytdl_file(self, ctx):
|
||||
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
frag_index_stream.close()
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
success = ctx['dl'].download(fragment_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': headers or info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False, None
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
return True, frag_content
|
||||
|
||||
def _append_fragment(self, ctx, frag_content):
|
||||
try:
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
finally:
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(ctx['fragment_filename_sanitized'])
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
@@ -66,11 +134,36 @@ class FragmentFD(FileDownloader):
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
|
||||
open_mode = 'wb'
|
||||
resume_len = 0
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
open_mode = 'ab'
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
||||
# Should be initialized before ytdl file check
|
||||
ctx.update({
|
||||
'tmpfilename': tmpfilename,
|
||||
'fragment_index': 0,
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
self._read_ytdl_file(ctx)
|
||||
else:
|
||||
self._write_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0:
|
||||
assert resume_len > 0
|
||||
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||
|
||||
ctx.update({
|
||||
'dl': dl,
|
||||
'dest_stream': dest_stream,
|
||||
'tmpfilename': tmpfilename,
|
||||
# Total complete fragments downloaded so far in bytes
|
||||
'complete_frags_downloaded_bytes': resume_len,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
@@ -79,9 +172,9 @@ class FragmentFD(FileDownloader):
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
|
||||
'fragment_index': ctx['fragment_index'],
|
||||
'fragment_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
@@ -89,8 +182,6 @@ class FragmentFD(FileDownloader):
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
# Total complete fragments downloaded so far in bytes
|
||||
'complete_frags_downloaded_bytes': 0,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
@@ -106,11 +197,12 @@ class FragmentFD(FileDownloader):
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
(state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['frag_index'] += 1
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
@@ -132,6 +224,10 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import binascii
|
||||
try:
|
||||
@@ -18,8 +17,6 @@ from ..compat import (
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -103,17 +100,18 @@ class HlsFD(FragmentFD):
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
byte_range = {}
|
||||
frags_filenames = []
|
||||
frag_index = 0
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_name = 'Frag%d' % i
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
@@ -122,15 +120,10 @@ class HlsFD(FragmentFD):
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': headers,
|
||||
})
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
@@ -139,28 +132,29 @@ class HlsFD(FragmentFD):
|
||||
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_name)
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_url = decrypt_info.get('URI')
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
@@ -170,7 +164,8 @@ class HlsFD(FragmentFD):
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
@@ -183,7 +178,4 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
@@ -8,10 +7,6 @@ import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
@@ -225,50 +220,39 @@ class IsmFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
track_written = False
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % i
|
||||
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
down_data = down.read()
|
||||
if not track_written:
|
||||
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
|
||||
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
track_written = True
|
||||
ctx['dest_stream'].write(down_data)
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(segment_name)
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
||||
|
@@ -12,7 +12,15 @@ from ..compat import compat_urlparse
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
@@ -29,6 +37,9 @@ class AbcNewsVideoIE(AMPIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/video/embed?id=46979033',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
intlist_to_bytes,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,25 +32,28 @@ class ADNIE(InfoExtractor):
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
}
|
||||
}
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
'http://animedigitalnetwork.fr/' + sub_path,
|
||||
video_id, fatal=False)
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, fatal=False, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
||||
})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
|
||||
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
|
||||
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
|
||||
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
|
||||
None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
@@ -103,9 +107,16 @@ class ADNIE(InfoExtractor):
|
||||
metas = options.get('metas') or {}
|
||||
title = metas.get('title') or video_info['title']
|
||||
links = player_config.get('links') or {}
|
||||
if not links:
|
||||
links_url = player_config['linksurl']
|
||||
links_data = self._download_json(urljoin(
|
||||
self._BASE_URL, links_url), video_id)
|
||||
links = links_data.get('links') or {}
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
if not isinstance(qualities, dict):
|
||||
continue
|
||||
for load_balancer_url in qualities.values():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id, fatal=False) or {}
|
||||
|
@@ -36,6 +36,11 @@ MSO_INFO = {
|
||||
'username_field': 'Ecom_User_ID',
|
||||
'password_field': 'Ecom_Password',
|
||||
},
|
||||
'Brighthouse': {
|
||||
'name': 'Bright House Networks | Spectrum',
|
||||
'username_field': 'j_username',
|
||||
'password_field': 'j_password',
|
||||
},
|
||||
'Charter_Direct': {
|
||||
'name': 'Charter Spectrum',
|
||||
'username_field': 'IDToken1',
|
||||
@@ -1308,6 +1313,12 @@ class AdobePassIE(InfoExtractor):
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
|
@@ -5,91 +5,52 @@ import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '247572debc75c7652f253c8daa51a14d',
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty - Pilot Part 1',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty - Pilot Part 4',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
|
||||
'info_dict': {
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
|
||||
'ext': 'flv',
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
|
||||
'timestamp': 1493267400,
|
||||
'upload_date': '20170427',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
|
||||
'upload_date': '20080124',
|
||||
'timestamp': 1201150800,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# heroMetadata.trailer
|
||||
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||
'info_dict': {
|
||||
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Decker - Inside Decker: A New Hero',
|
||||
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
||||
'duration': 249.008,
|
||||
'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
|
||||
'timestamp': 1469480460,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
|
||||
'url': 'http://www.adultswim.com/videos/attack-on-titan',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
'id': 'b7A69dzfRzuaXIECdxW8XQ',
|
||||
'title': 'Attack on Titan',
|
||||
'description': 'md5:6c8e003ea0777b47013e894767f5e114',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '',
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/streams/williams-stream',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'id': 'd8DEBj7QRfetLsRgFnGEyg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'description': 'original programming',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def find_video_info(collection, slug):
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return video
|
||||
|
||||
@staticmethod
|
||||
def find_collection_by_linkURL(collections, linkURL):
|
||||
for collection in collections:
|
||||
if collection.get('linkURL') == linkURL:
|
||||
return collection
|
||||
|
||||
@staticmethod
|
||||
def find_collection_containing_video(collections, slug):
|
||||
for collection in collections:
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return collection, video
|
||||
return None, None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_path = mobj.group('show_path')
|
||||
episode_path = mobj.group('episode_path')
|
||||
is_playlist = True if mobj.group('is_playlist') else False
|
||||
show_path, episode_path = re.match(self._VALID_URL, url).groups()
|
||||
display_id = episode_path or show_path
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
|
||||
webpage, 'initial data'), display_id)
|
||||
|
||||
webpage = self._download_webpage(url, episode_path)
|
||||
is_stream = show_path == 'streams'
|
||||
if is_stream:
|
||||
if not episode_path:
|
||||
episode_path = 'live-stream'
|
||||
|
||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||
bootstrapped_data = self._parse_json(self._search_regex(
|
||||
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||
|
||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||
if is_playlist:
|
||||
collections = bootstrapped_data['playlists']['collections']
|
||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||
video_info = self.find_video_info(collection, episode_path)
|
||||
|
||||
show_title = video_info['showTitle']
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
collections = bootstrapped_data['show']['collections']
|
||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||
if video_info is None:
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get(
|
||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
stream = video_info.get('stream')
|
||||
if stream and stream.get('videoPlaybackID'):
|
||||
segment_ids = [stream['videoPlaybackID']]
|
||||
elif video_info.get('clips'):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
elif video_info.get('id'):
|
||||
segment_ids = [video_info['id']]
|
||||
else:
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream or clips')
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info.get('description')
|
||||
episode_duration = int_or_none(video_info.get('duration'))
|
||||
view_count = int_or_none(video_info.get('views'))
|
||||
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
|
||||
video_id = video_data.get('stream')
|
||||
|
||||
if not video_id:
|
||||
entries = []
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segement_info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||
segment_id, {
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AdultSwim', episode.get('id')))
|
||||
return self.playlist_result(
|
||||
entries, video_data.get('id'), video_data.get('title'),
|
||||
strip_or_none(video_data.get('description')))
|
||||
else:
|
||||
show_data = initial_data['show']
|
||||
|
||||
if not episode_path:
|
||||
entries = []
|
||||
for video in show_data.get('videos', []):
|
||||
slug = video.get('slug')
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
'AdultSwim', video.get('id')))
|
||||
return self.playlist_result(
|
||||
entries, show_data.get('id'), show_data.get('title'),
|
||||
strip_or_none(show_data.get('metadata', {}).get('description')))
|
||||
|
||||
video_data = show_data['sluggedVideo']
|
||||
video_id = video_data['id']
|
||||
|
||||
info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
|
||||
video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
}, {
|
||||
'url': url,
|
||||
'site_name': 'AdultSwim',
|
||||
'auth_required': video_data.get('auth'),
|
||||
})
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
segement_info.update({
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'description': episode_description,
|
||||
})
|
||||
entries.append(segement_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': episode_id,
|
||||
'display_id': episode_path,
|
||||
'entries': entries,
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'description': info.get('description') or strip_or_none(video_data.get('description')),
|
||||
})
|
||||
if not is_stream:
|
||||
info.update({
|
||||
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
|
||||
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
|
||||
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
|
||||
'episode': info['title'],
|
||||
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
|
||||
})
|
||||
|
||||
info['series'] = video_data.get('collection_title') or info.get('series')
|
||||
if info['series'] and info['series'] != info['title']:
|
||||
info['title'] = '%s - %s' % (info['series'], info['title'])
|
||||
|
||||
return info
|
||||
|
@@ -101,10 +101,14 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
else:
|
||||
# single season
|
||||
url_parts_len = 2
|
||||
if url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
@@ -112,7 +116,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes['data-videoid']))
|
||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
|
@@ -207,11 +207,10 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
title = title if one else '%s (part %d)' % (title, file_num)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title,
|
||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
|
@@ -4,9 +4,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
@@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -7,15 +7,19 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class AMPIE(InfoExtractor):
|
||||
# parse Akamai Adaptive Media Player feed
|
||||
def _extract_feed_info(self, url):
|
||||
item = self._download_json(
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')['channel']['item']
|
||||
'Unable to download Akamai AMP feed')
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
@@ -30,9 +34,12 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_thumbnail, dict):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data['@attributes']
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||
'url': self._proto_relative_url(thumbnail_url, 'http:'),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
@@ -43,9 +50,14 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_subtitle, dict):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data['@attributes']
|
||||
lang = subtitle.get('lang') or 'en'
|
||||
subtitles[lang] = [{'url': subtitle['href']}]
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
'url': subtitle_href,
|
||||
'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_content = get_media_node('content')
|
||||
|
@@ -5,6 +5,7 @@ import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -16,6 +17,7 @@ from ..utils import (
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,6 +28,8 @@ def md5_text(s):
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
|
||||
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
@@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
}
|
||||
|
||||
_MCP_TO_ACCESS_KEY_TABLE = {
|
||||
'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
|
||||
'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
|
||||
'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
|
||||
'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -178,12 +198,7 @@ class AnvatoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
if tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
@@ -222,9 +237,42 @@ class AnvatoIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(ie, webpage, video_id):
|
||||
entries = []
|
||||
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
||||
anvplayer_data = ie._parse_json(
|
||||
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not anvplayer_data:
|
||||
continue
|
||||
video = anvplayer_data.get('video')
|
||||
if not isinstance(video, compat_str) or not video.isdigit():
|
||||
continue
|
||||
access_key = anvplayer_data.get('accessKey')
|
||||
if not access_key:
|
||||
mcp = anvplayer_data.get('mcp')
|
||||
if mcp:
|
||||
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
mcp.lower())
|
||||
if not access_key:
|
||||
continue
|
||||
entries.append(ie.url_result(
|
||||
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
|
||||
video_id=video))
|
||||
return entries
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
anvplayer_data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': '10d0f2799111df4cb1c924520ca78f98',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
|
@@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
'id': 'blackthorn',
|
||||
'id': '4489',
|
||||
'title': 'Blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
@@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Most Popular',
|
||||
'id': 'mostpopular',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=moviestudios',
|
||||
'info_dict': {
|
||||
|
@@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||
'md5': '0869000b4ce265e8ca62738b336b268a',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
@@ -180,7 +180,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
@@ -188,6 +188,9 @@ class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
}, {
|
||||
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
|
||||
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
|
||||
'md5': '6e52cbb513c405e403dbacb7aacf8747',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-112-david-bustamante',
|
||||
'ext': 'flv',
|
||||
|
@@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'title': '3/09/2016 Czaban Hour 3',
|
||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
||||
'duration': 2245.72,
|
||||
'uploader': 'Steve Czaban',
|
||||
'uploader': 'SB Nation A.M.',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
}
|
||||
}, {
|
||||
|
@@ -1,140 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
|
||||
'info_dict': {
|
||||
'id': '15575',
|
||||
'ext': 'mp4',
|
||||
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
|
||||
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1417523507.334,
|
||||
'upload_date': '20141202',
|
||||
'duration': 9988.7,
|
||||
'uploader': 'GSL',
|
||||
'uploader_id': 414310,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
|
||||
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
|
||||
'info_dict': {
|
||||
'id': '9344',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
|
||||
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1410530893.320,
|
||||
'upload_date': '20140912',
|
||||
'duration': 172.385,
|
||||
'uploader': 'FnaticTV',
|
||||
'uploader_id': 272749,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Channel offline',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||
|
||||
title = data['title'].strip()
|
||||
description = data.get('description')
|
||||
thumbnail = data.get('thumbnail')
|
||||
view_count = data.get('view_count')
|
||||
user = data.get('user', {})
|
||||
uploader = user.get('username')
|
||||
uploader_id = user.get('id')
|
||||
|
||||
stream_params = json.loads(data['stream_params'])
|
||||
|
||||
timestamp = float_or_none(stream_params.get('creationDate'), 1000)
|
||||
duration = float_or_none(stream_params.get('length'), 1000)
|
||||
|
||||
renditions = stream_params.get('renditions') or []
|
||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||
if video:
|
||||
renditions.append(video)
|
||||
|
||||
if not renditions and not user.get('channel', {}).get('is_live', True):
|
||||
raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': fmt['url'],
|
||||
'width': fmt['frameWidth'],
|
||||
'height': fmt['frameHeight'],
|
||||
'vbr': float_or_none(fmt['encodingRate'], 1000),
|
||||
'filesize': fmt['size'],
|
||||
'vcodec': fmt['videoCodec'],
|
||||
'container': fmt['videoContainer'],
|
||||
} for fmt in renditions if fmt['url']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://azubu.uol.com.br/adolfz',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||
user)['data']
|
||||
if info['type'] != 'STREAM':
|
||||
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||
bc_info = self._download_json(req, user)
|
||||
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['id'],
|
||||
'title': self._live_title(info['title']),
|
||||
'uploader_id': user,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'thumbnail': bc_info['poster'],
|
||||
}
|
@@ -14,14 +14,16 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@@ -34,12 +36,12 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lanius (Battle)',
|
||||
'uploader': 'Ben Prunty Music',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'uploader': 'Ben Prunty',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -47,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
@@ -75,6 +78,7 @@ class BandcampIE(InfoExtractor):
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
@@ -143,7 +147,7 @@ class BandcampIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url'),
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
@@ -153,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -220,6 +224,12 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
@@ -248,3 +258,92 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://bandcamp.com/?show=224',
|
||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||
'info_dict': {
|
||||
'id': '224',
|
||||
'ext': 'opus',
|
||||
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||
'duration': 5829.77,
|
||||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_number': 208,
|
||||
'episode_id': '224',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
|
||||
show = blob['bcw_show']
|
||||
|
||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||
# which happens to expose the latest Bandcamp Weekly episode.
|
||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
ext = known_ext
|
||||
break
|
||||
else:
|
||||
ext = None
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
episode_number = None
|
||||
seq = blob.get('bcw_seq')
|
||||
|
||||
if seq and isinstance(seq, list):
|
||||
try:
|
||||
episode_number = next(
|
||||
int_or_none(e.get('episode_number'))
|
||||
for e in seq
|
||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
'is_live': False,
|
||||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_number': episode_number,
|
||||
'episode_id': compat_str(video_id),
|
||||
'formats': formats
|
||||
}
|
||||
|
@@ -6,14 +6,18 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
@@ -38,6 +42,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
''' % _ID_REGEX
|
||||
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
@@ -227,6 +234,39 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading signin page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
post_url = urljoin(self._LOGIN_URL, self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url'))
|
||||
|
||||
response, urlh = self._download_webpage_handle(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
|
@@ -6,18 +6,33 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BeamProLiveIE(InfoExtractor):
|
||||
IE_NAME = 'Beam:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
||||
class BeamProBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://mixer.com/api/v1'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
|
||||
def _extract_channel_info(self, chan):
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
return {
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
}
|
||||
|
||||
|
||||
class BeamProLiveIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.beam.pro/niterhayven',
|
||||
'url': 'http://mixer.com/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
@@ -38,11 +53,17 @@ class BeamProLiveIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
@@ -50,24 +71,118 @@ class BeamProLiveIE(InfoExtractor):
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
def manifest_url(kind):
|
||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
||||
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||
fatal=False)
|
||||
formats.extend(self._extract_smil_formats(
|
||||
manifest_url('smil'), channel_name, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'thumbnail': try_get(
|
||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(chan))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
'id': '2259830',
|
||||
'ext': 'mp4',
|
||||
'title': 'willow8714\'s Channel',
|
||||
'duration': 6828.15,
|
||||
'thumbnail': r're:https://.*source\.png$',
|
||||
'timestamp': 1494046474,
|
||||
'upload_date': '20170506',
|
||||
'uploader': 'willow8714',
|
||||
'uploader_id': '6085379',
|
||||
'age_limit': 13,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
if not vod.get('baseUrl'):
|
||||
return []
|
||||
|
||||
if vod_type == 'hls':
|
||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||
elif vod_type == 'raw':
|
||||
filename, protocol = 'source.mp4', 'https'
|
||||
else:
|
||||
assert False
|
||||
|
||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||
|
||||
format_id = [vod_type]
|
||||
if isinstance(data.get('Height'), compat_str):
|
||||
format_id.append('%sp' % data['Height'])
|
||||
|
||||
return [{
|
||||
'url': urljoin(vod['baseUrl'], filename),
|
||||
'format_id': '-'.join(format_id),
|
||||
'ext': 'mp4',
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(data.get('Width')),
|
||||
'height': int_or_none(data.get('Height')),
|
||||
'fps': int_or_none(data.get('Fps')),
|
||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||
|
||||
state = vod_info.get('state')
|
||||
if state != 'AVAILABLE':
|
||||
raise ExtractorError(
|
||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
thumbnail_url = None
|
||||
|
||||
for vod in vod_info['vods']:
|
||||
vod_type = vod.get('format')
|
||||
if vod_type in ('hls', 'raw'):
|
||||
formats.extend(self._extract_format(vod, vod_type))
|
||||
elif vod_type == 'thumbnail':
|
||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'title': vod_info.get('name') or vod_id,
|
||||
'duration': float_or_none(vod_info.get('duration')),
|
||||
'thumbnail': thumbnail_url,
|
||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||
|
||||
return info
|
||||
|
@@ -16,7 +16,7 @@ class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '46c384def73b33dbc581262e5ee67cef',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
|
@@ -122,6 +122,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
|
@@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
|
||||
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
|
||||
'uploader_id': 6466954,
|
||||
'upload_date': '20151011',
|
||||
},
|
||||
@@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'md5': '8c2c12e3af7805152675446c905d159b',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -77,7 +77,7 @@ class BRIE(InfoExtractor):
|
||||
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20140117',
|
||||
'upload_date': '20170208',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
@@ -131,6 +132,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
{
|
||||
# playerID inferred from bcpid
|
||||
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
|
||||
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
@@ -266,9 +273,13 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@@ -285,6 +296,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if videoPlayer:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
query['playerID'] = [mobj.group(1)]
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
@@ -434,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
return info
|
||||
|
||||
|
||||
class BrightcoveNewIE(InfoExtractor):
|
||||
class BrightcoveNewIE(AdobePassIE):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_TESTS = [{
|
||||
@@ -484,8 +499,8 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = BrightcoveNewIE._extract_urls(webpage)
|
||||
def _extract_url(ie, webpage):
|
||||
urls = BrightcoveNewIE._extract_urls(ie, webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
@@ -508,7 +523,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video\s+[^>]+>)
|
||||
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
@@ -588,6 +603,20 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': 'application/json;pk=%s' % policy_key
|
||||
}, query={
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats = []
|
||||
@@ -653,7 +682,6 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
@@ -670,7 +698,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
if duration and duration < 0:
|
||||
if duration is not None and duration <= 0:
|
||||
is_live = True
|
||||
|
||||
return {
|
||||
|
@@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique',
|
||||
'duration': 122,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
|
||||
'only_matching': True,
|
||||
|
@@ -96,6 +96,7 @@ class CBCIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
@@ -165,12 +166,11 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
'md5': '17a61eb813539abea40618d6323a7f82',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
|
@@ -49,13 +49,13 @@ class CBSIE(CBSBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id):
|
||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
content_id, query={'partner': site, 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
||||
tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
|
||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
|
@@ -3,17 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from .cbs import CBSIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CBSInteractiveIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
|
||||
class CBSInteractiveIE(CBSIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'info_dict': {
|
||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||
'ext': 'flv',
|
||||
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
|
||||
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||
@@ -22,13 +23,19 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
'timestamp': 1396479627,
|
||||
'upload_date': '20140402',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
|
||||
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
|
||||
'info_dict': {
|
||||
'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
|
||||
'ext': 'flv',
|
||||
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
|
||||
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
|
||||
'ext': 'mp4',
|
||||
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
|
||||
'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
|
||||
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
|
||||
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
|
||||
'uploader': 'Ashley Esqueda',
|
||||
'duration': 1482,
|
||||
@@ -38,23 +45,28 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
}, {
|
||||
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
|
||||
'info_dict': {
|
||||
'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
|
||||
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
|
||||
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video: Keeping Android smartphones and tablets secure',
|
||||
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
|
||||
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
|
||||
'uploader': 'Adrian Kingsley-Hughes',
|
||||
'timestamp': 1448961720,
|
||||
'upload_date': '20151201',
|
||||
'duration': 731,
|
||||
'timestamp': 1449129925,
|
||||
'upload_date': '20151203',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
|
||||
|
||||
MPX_ACCOUNTS = {
|
||||
'cnet': 2288573011,
|
||||
'cnet': 2198311517,
|
||||
'zdnet': 2387448114,
|
||||
}
|
||||
|
||||
@@ -68,7 +80,8 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
|
||||
video_id = vdata['id']
|
||||
video_id = vdata['mpxRefId']
|
||||
|
||||
title = vdata['title']
|
||||
author = vdata.get('author')
|
||||
if author:
|
||||
@@ -78,20 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
||||
formats, subtitles = [], {}
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
release_url = self.TP_RELEASE_URL_TEMPLATE % vid
|
||||
if fkey == 'hds':
|
||||
release_url += '&manifest=f4m'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -99,7 +99,5 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
return info
|
||||
|
@@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1479962220,
|
||||
'upload_date': '20161124',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
|
@@ -61,11 +61,17 @@ class CBSNewsIE(CBSIE):
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info'), video_id)
|
||||
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||
|
||||
if video_info:
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
guid = item['mpxRefId']
|
||||
return self._extract_video_info(guid)
|
||||
else:
|
||||
state = self._parse_json(self._search_regex(
|
||||
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||
'playlist JSON info', group='json'), video_id)['state']
|
||||
item = state['playlist'][state['pid']]
|
||||
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
|
@@ -9,7 +9,10 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
|
||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'average_rating': float,
|
||||
'duration': 39
|
||||
'duration': 39,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cda.pl/video/57413289',
|
||||
@@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
}, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
@@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
|
||||
uploader = self._search_regex(r'''(?x)
|
||||
@@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
@@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):
|
||||
for href, resolution in re.findall(
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
if need_confirm_age:
|
||||
handler = self._download_age_confirm_page
|
||||
else:
|
||||
handler = self._download_webpage
|
||||
|
||||
webpage = handler(
|
||||
self._BASE_URL + href, video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
@@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):
|
||||
# invalid version is requested.
|
||||
self.report_warning('Unable to download %s version information' % resolution)
|
||||
continue
|
||||
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
|
||||
'md5': '720563e467b86374c194bdead08d207d',
|
||||
'md5': 'b9a5dc46294154c1193e2d10e0c95693',
|
||||
'info_dict': {
|
||||
'id': '4343170',
|
||||
'ext': 'mp4',
|
||||
|
@@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
@@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
|
@@ -245,6 +245,10 @@ class InfoExtractor(object):
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
chapters: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
* "title" (optional, string)
|
||||
|
||||
The following fields should only be used when the video belongs to some logical
|
||||
chapter or section:
|
||||
@@ -372,7 +376,7 @@ class InfoExtractor(object):
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
m = cls._VALID_URL_RE.match(url)
|
||||
assert m
|
||||
return m.group('id')
|
||||
return compat_str(m.group('id'))
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
@@ -976,6 +980,23 @@ class InfoExtractor(object):
|
||||
return info
|
||||
if isinstance(json_ld, dict):
|
||||
json_ld = [json_ld]
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
'view_count': int_or_none(e.get('interactionCount')),
|
||||
})
|
||||
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
item_type = e.get('@type')
|
||||
@@ -1000,18 +1021,11 @@ class InfoExtractor(object):
|
||||
'description': unescapeHTML(e.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
})
|
||||
extract_video_object(e)
|
||||
elif item_type == 'WebPage':
|
||||
video = e.get('video')
|
||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||
extract_video_object(video)
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@@ -1303,40 +1317,50 @@ class InfoExtractor(object):
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
|
||||
if res is False:
|
||||
return []
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
return self._parse_m3u8_formats(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
preference=preference, m3u8_id=m3u8_id, live=live)
|
||||
|
||||
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, live=False):
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
formats = []
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
# without qualities renditions.
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||
|
||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||
# i.e. playlists that describe available qualities. On the other hand
|
||||
# media playlists [1, 4.3.3] should be returned as is since they contain
|
||||
# just the media without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 2] master
|
||||
# playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
|
||||
# and MUST NOT appear in master playlist thus we can clearly detect media
|
||||
# playlist with this criterion.
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
@@ -1345,26 +1369,29 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
audio_in_video_stream = {}
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
group_id = media.get('GROUP-ID')
|
||||
|
||||
groups = {}
|
||||
last_stream_inf = {}
|
||||
|
||||
def extract_media(x_media_line):
|
||||
media = parse_m3u8_attributes(x_media_line)
|
||||
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
|
||||
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
|
||||
if not (media_type and group_id and name):
|
||||
return
|
||||
groups.setdefault(group_id, []).append(media)
|
||||
if media_type not in ('VIDEO', 'AUDIO'):
|
||||
return
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (group_id, media.get('NAME')):
|
||||
for v in (group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
@@ -1372,25 +1399,42 @@ class InfoExtractor(object):
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
if group_id and not audio_in_video_stream.get(group_id):
|
||||
audio_in_video_stream[group_id] = False
|
||||
formats.append(f)
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
if media_type == 'AUDIO' and group_id:
|
||||
audio_in_video_stream[group_id] = True
|
||||
|
||||
def build_stream_name():
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
|
||||
# or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
|
||||
# 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
|
||||
stream_name = last_stream_inf.get('NAME')
|
||||
if stream_name:
|
||||
return stream_name
|
||||
# If there is no NAME in EXT-X-STREAM-INF it will be obtained
|
||||
# from corresponding rendition group
|
||||
stream_group_id = last_stream_inf.get('VIDEO')
|
||||
if not stream_group_id:
|
||||
return
|
||||
stream_group = groups.get(stream_group_id)
|
||||
if not stream_group:
|
||||
return stream_group_id
|
||||
rendition = stream_group[0]
|
||||
return rendition.get('NAME') or stream_group_id
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_stream_inf = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
tbr = float_or_none(
|
||||
last_stream_inf.get('AVERAGE-BANDWIDTH') or
|
||||
last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
stream_name = build_stream_name()
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
@@ -1400,14 +1444,14 @@ class InfoExtractor(object):
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
@@ -1423,13 +1467,26 @@ class InfoExtractor(object):
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||
codecs = parse_codecs(last_stream_inf.get('CODECS'))
|
||||
f.update(codecs)
|
||||
audio_group_id = last_stream_inf.get('AUDIO')
|
||||
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
|
||||
# references a rendition group MUST have a CODECS attribute.
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing audio group an audio group, it represents
|
||||
# a complete (with audio and video) format. So, for such cases
|
||||
# we will ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
if audio_group and audio_group[0].get('URI'):
|
||||
# TODO: update acodec for audio only formats with
|
||||
# the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
last_stream_inf = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@@ -1803,7 +1860,7 @@ class InfoExtractor(object):
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(bandwidth, 1000),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
@@ -1944,6 +2001,12 @@ class InfoExtractor(object):
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
Parse formats from ISM manifest.
|
||||
References:
|
||||
1. [MS-SSTR]: Smooth Streaming Protocol,
|
||||
https://msdn.microsoft.com/en-us/library/ff469518.aspx
|
||||
"""
|
||||
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
||||
return []
|
||||
|
||||
@@ -1965,8 +2028,11 @@ class InfoExtractor(object):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
continue
|
||||
tbr = int(track.attrib['Bitrate']) // 1000
|
||||
width = int_or_none(track.get('MaxWidth'))
|
||||
height = int_or_none(track.get('MaxHeight'))
|
||||
# [1] does not mention Width and Height attributes. However,
|
||||
# they're often present while MaxWidth and MaxHeight are
|
||||
# missing, so should be used as fallbacks
|
||||
width = int_or_none(track.get('MaxWidth') or track.get('Width'))
|
||||
height = int_or_none(track.get('MaxHeight') or track.get('Height'))
|
||||
sampling_rate = int_or_none(track.get('SamplingRate'))
|
||||
|
||||
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
||||
@@ -2117,7 +2183,7 @@ class InfoExtractor(object):
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
formats = []
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
hds_host = hosts.get('hds')
|
||||
if hds_host:
|
||||
f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
|
||||
@@ -2139,8 +2205,9 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
||||
http_base_url = 'http' + url_base
|
||||
url_base = self._search_regex(
|
||||
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
|
||||
http_base_url = '%s:%s' % ('http', url_base)
|
||||
formats = []
|
||||
if 'm3u8' not in skip_protocols:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -2174,7 +2241,7 @@ class InfoExtractor(object):
|
||||
for protocol in ('rtmp', 'rtsp'):
|
||||
if protocol not in skip_protocols:
|
||||
formats.append({
|
||||
'url': protocol + url_base,
|
||||
'url': '%s:%s' % (protocol, url_base),
|
||||
'format_id': protocol,
|
||||
'protocol': protocol,
|
||||
})
|
||||
@@ -2182,7 +2249,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
@@ -2258,11 +2325,17 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
|
@@ -16,7 +16,6 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,10 +49,17 @@ class CondeNastIE(InfoExtractor):
|
||||
'wmagazine': 'W Magazine',
|
||||
}
|
||||
|
||||
_VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
_VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/
|
||||
(?:
|
||||
(?:
|
||||
embed(?:js)?|
|
||||
(?:script|inline)/video
|
||||
)/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?|
|
||||
(?P<type>watch|series|video)/(?P<display_id>[^/?#]+)
|
||||
)''' % '|'.join(_SITES.keys())
|
||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
|
||||
EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys())
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||
@@ -89,6 +95,12 @@ class CondeNastIE(InfoExtractor):
|
||||
'upload_date': '20150916',
|
||||
'timestamp': 1442434955,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
@@ -104,7 +116,7 @@ class CondeNastIE(InfoExtractor):
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video(self, webpage, url_type):
|
||||
def _extract_video_params(self, webpage):
|
||||
query = {}
|
||||
params = self._search_regex(
|
||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||
@@ -123,17 +135,30 @@ class CondeNastIE(InfoExtractor):
|
||||
'playerId': params['data-player'],
|
||||
'target': params['id'],
|
||||
})
|
||||
video_id = query['videoId']
|
||||
return query
|
||||
|
||||
def _extract_video(self, params):
|
||||
video_id = params['videoId']
|
||||
|
||||
video_info = None
|
||||
if params.get('playerId'):
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', fatal=False, query=query)
|
||||
video_id, 'Downloading video info', fatal=False, query=params)
|
||||
if info_page:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=query)
|
||||
video_id, 'Downloading loader info', query=params)
|
||||
else:
|
||||
info_page = self._download_webpage(
|
||||
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||
video_id, 'Downloading inline info', query={
|
||||
'target': params.get('target', 'embedplayer')
|
||||
})
|
||||
|
||||
if not video_info:
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
|
||||
@@ -161,9 +186,7 @@ class CondeNastIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(
|
||||
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
@@ -174,22 +197,26 @@ class CondeNastIE(InfoExtractor):
|
||||
'series': video_info.get('series_title'),
|
||||
'season': video_info.get('season_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||
})
|
||||
return info
|
||||
'categories': video_info.get('categories'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
||||
video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
# Convert JS embed to regular embed
|
||||
if url_type == 'embedjs':
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
|
||||
url_type = 'embed'
|
||||
if video_id:
|
||||
return self._extract_video({
|
||||
'videoId': video_id,
|
||||
'playerId': player_id,
|
||||
'target': target,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
return self._extract_video(webpage, url_type)
|
||||
params = self._extract_video_params(webpage)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
info.update(self._extract_video(params))
|
||||
return info
|
||||
|
@@ -24,12 +24,11 @@ class CoubIE(InfoExtractor):
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader': 'Artyom Loskutnikov',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
@@ -118,7 +117,6 @@ class CoubIE(InfoExtractor):
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
@@ -137,7 +135,6 @@ class CoubIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -21,9 +21,10 @@ class CrackleIE(InfoExtractor):
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
'en-US': [
|
||||
{'ext': 'vtt'},
|
||||
{'ext': 'tt'},
|
||||
]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
|
@@ -171,7 +171,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
@@ -179,7 +179,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
smuggle_url,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
@@ -68,6 +69,7 @@ class CSpanIE(InfoExtractor):
|
||||
'uploader_id': '12987475',
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -78,6 +80,19 @@ class CSpanIE(InfoExtractor):
|
||||
if ustream_url:
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
if '&vod' not in url:
|
||||
bc = self._search_regex(
|
||||
r"(<[^>]+id='brightcove-player-embed'[^>]+>)",
|
||||
webpage, 'brightcove embed', default=None)
|
||||
if bc:
|
||||
bc_attr = extract_attributes(bc)
|
||||
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (
|
||||
bc_attr.get('data-bcaccountid', '3162030207001'),
|
||||
bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'),
|
||||
bc_attr.get('data-newbcplayerid', 'default'),
|
||||
bc_attr['data-bcid'])
|
||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||
|
@@ -2,9 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@@ -28,8 +30,14 @@ class DailyMailIE(InfoExtractor):
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||
title = unescapeHTML(video_data['title'])
|
||||
video_sources = self._download_json(video_data.get(
|
||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||
|
||||
sources_url = (try_get(
|
||||
video_data,
|
||||
(lambda x: x['plugins']['sources']['url'],
|
||||
lambda x: x['sources']['url']), compat_str) or
|
||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||
|
||||
video_sources = self._download_json(sources_url, video_id)
|
||||
|
||||
formats = []
|
||||
for rendition in video_sources['renditions']:
|
||||
|
@@ -38,7 +38,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
|
||||
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
_FORMATS = [
|
||||
@@ -49,8 +49,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
('stream_h264_hd1080_url', 'hd180'),
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||
'info_dict': {
|
||||
'id': 'x5kesuj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 187,
|
||||
'timestamp': 1493651285,
|
||||
'upload_date': '20170501',
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
'info_dict': {
|
||||
@@ -66,10 +82,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
}, {
|
||||
# Vevo video
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
|
||||
'info_dict': {
|
||||
'title': 'Roar (Official)',
|
||||
@@ -82,9 +98,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
},
|
||||
}, {
|
||||
# age-restricted video
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||
'md5': '0d667a7b9cebecc3c89ee93099c4159d',
|
||||
'info_dict': {
|
||||
@@ -95,22 +110,21 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
}, {
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/xhza0o',
|
||||
'only_matching': True,
|
||||
},
|
||||
}, {
|
||||
# with subtitles
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
|
@@ -21,7 +21,8 @@ class DemocracynowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daily Show',
|
||||
'title': 'Daily Show for July 03, 2015',
|
||||
'description': 'md5:80eb927244d6749900de6072c7cc2c86',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
|
@@ -35,7 +35,7 @@ class DotsubIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20160525',
|
||||
'upload_date': '20161018',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
|
@@ -3,11 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,7 +23,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
@@ -51,7 +54,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': '17732',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
@@ -117,3 +120,82 @@ class DouyuTVIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
|
||||
video_info = None
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
}
|
||||
|
@@ -1,135 +1,59 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DRBonanzaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
|
||||
'info_dict': {
|
||||
'id': '65517',
|
||||
'id': '40312',
|
||||
'display_id': 'matador---0824-komme-fremmede-',
|
||||
'ext': 'mp4',
|
||||
'title': 'Talkshowet - Leonard Cohen',
|
||||
'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
|
||||
'title': 'MATADOR - 08:24. "Komme fremmede".',
|
||||
'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
|
||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||
'timestamp': 1295537932,
|
||||
'upload_date': '20110120',
|
||||
'duration': 3664,
|
||||
'duration': 4613,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmp
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||
'info_dict': {
|
||||
'id': '59410',
|
||||
'ext': 'mp3',
|
||||
'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
|
||||
'description': 'md5:501e5a195749480552e214fbbed16c4e',
|
||||
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
|
||||
'timestamp': 1223274900,
|
||||
'upload_date': '20081006',
|
||||
'duration': 7369,
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, url_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
if url_id:
|
||||
info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
|
||||
else:
|
||||
# Just fetch the first video on that page
|
||||
info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
asset_id = str(info['AssetId'])
|
||||
title = info['Title'].rstrip(' \'\"-,.:;!?')
|
||||
duration = int_or_none(info.get('Duration'), scale=1000)
|
||||
# First published online. "FirstPublished" contains the date for original airing.
|
||||
timestamp = parse_iso8601(
|
||||
re.sub(r'\.\d+$', '', info['Created']))
|
||||
info = self._parse_html5_media_entries(
|
||||
url, webpage, display_id, m3u8_id='hls',
|
||||
m3u8_entry_protocol='m3u8_native')[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
def parse_filename_info(url):
|
||||
match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
||||
if match:
|
||||
return {
|
||||
'width': int(match.group('width')),
|
||||
'height': int(match.group('height')),
|
||||
'vbr': int(match.group('bitrate')),
|
||||
'ext': match.group('ext')
|
||||
}
|
||||
match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
|
||||
if match:
|
||||
return {
|
||||
'vbr': int(match.group('bitrate')),
|
||||
'ext': match.group(2)
|
||||
}
|
||||
return {}
|
||||
asset = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||
display_id, transform_source=js_to_json)
|
||||
|
||||
video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
|
||||
preferencemap = {
|
||||
'VideoHigh': -1,
|
||||
'VideoMid': -2,
|
||||
'VideoLow': -3,
|
||||
'Audio': -4,
|
||||
}
|
||||
title = unescapeHTML(asset['AssetTitle']).strip()
|
||||
|
||||
formats = []
|
||||
for file in info['Files']:
|
||||
if info['Type'] == 'Video':
|
||||
if file['Type'] in video_types:
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
'url': file['Location'],
|
||||
'format_id': file['Type'].replace('Video', ''),
|
||||
'preference': preferencemap.get(file['Type'], -10),
|
||||
})
|
||||
if format['url'].startswith('rtmp'):
|
||||
rtmp_url = format['url']
|
||||
format['rtmp_live'] = True # --resume does not work
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
elif info['Type'] == 'Audio':
|
||||
if file['Type'] == 'Audio':
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
'url': file['Location'],
|
||||
'format_id': file['Type'],
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
def extract(field):
|
||||
return self._search_regex(
|
||||
r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
|
||||
webpage, field, default=None)
|
||||
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
info['Description'], info['Actors'], info['Colophon'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||
display_id = re.sub(r'-+', '-', display_id)
|
||||
|
||||
return {
|
||||
'id': asset_id,
|
||||
info.update({
|
||||
'id': asset.get('AssetId') or video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
}
|
||||
'description': extract('Programinfo'),
|
||||
'duration': parse_duration(extract('Tid')),
|
||||
'thumbnail': asset.get('AssetImageUrl'),
|
||||
})
|
||||
return info
|
||||
|
@@ -20,7 +20,7 @@ class DRTVIE(InfoExtractor):
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'md5': '7ae17b4e18eb5d29212f424a7511c184',
|
||||
'info_dict': {
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
@@ -30,21 +30,37 @@ class DRTVIE(InfoExtractor):
|
||||
'upload_date': '20160823',
|
||||
'duration': 606.84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# embed
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'md5': '2c37175c718155930f939ef59952474a',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
||||
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with SignLanguage formats
|
||||
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||
'info_dict': {
|
||||
'id': 'historien-om-danmark-stenalder',
|
||||
'ext': 'mp4',
|
||||
'title': 'Historien om Danmark: Stenalder (1)',
|
||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||
'timestamp': 1490401996,
|
||||
'upload_date': '20170325',
|
||||
'duration': 3502.04,
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -88,7 +104,7 @@ class DRTVIE(InfoExtractor):
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
asset_target = asset.get('Target')
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
@@ -96,9 +112,9 @@ class DRTVIE(InfoExtractor):
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if spoken_subtitles:
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
||||
preference = -1
|
||||
format_id += '-spoken-subtitles'
|
||||
format_id += '-%s' % asset_target
|
||||
if target == 'HDS':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
|
@@ -5,9 +5,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,14 +27,7 @@ class DVTVIE(InfoExtractor):
|
||||
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
||||
'md5': '6388f1941b48537dbd28791f712af8bf',
|
||||
'info_dict': {
|
||||
'id': '72c02230849211e49f60002590604f2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
||||
'duration': 1484,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||
@@ -44,55 +40,100 @@ class DVTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne',
|
||||
'description': 'md5:0916925dea8e30fe84222582280b47a0',
|
||||
'timestamp': 1418760010,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}, {
|
||||
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||
'info_dict': {
|
||||
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka',
|
||||
'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
|
||||
'timestamp': 1418760010,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}, {
|
||||
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||
'info_dict': {
|
||||
'id': '95d35580846a11e4b6d20025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?',
|
||||
'description': 'md5:889fe610a70fee5511dc3326a089188e',
|
||||
'timestamp': 1418760010,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}, {
|
||||
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||
'info_dict': {
|
||||
'id': '6fe14d66853511e4833a0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády',
|
||||
'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
|
||||
'timestamp': 1418760010,
|
||||
'upload_date': '20141216',
|
||||
}
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
|
||||
'md5': 'f8efe9656017da948369aa099788c8ea',
|
||||
'info_dict': {
|
||||
'id': '3c496fec365911e7a6500025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
|
||||
'duration': 1103,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_video_metadata(self, js, video_id):
|
||||
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
|
||||
title = unescapeHTML(data['title'])
|
||||
|
||||
formats = []
|
||||
for video in metadata['sources']:
|
||||
ext = video['type'][6:]
|
||||
for video in data['sources']:
|
||||
video_url = video.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
video_type = video.get('type')
|
||||
ext = determine_ext(video_url, mimetype2ext(video_type))
|
||||
if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif video_type == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
label = video.get('label')
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]', label or '', 'height', default=None)
|
||||
format_id = ['http']
|
||||
for f in (ext, label):
|
||||
if f:
|
||||
format_id.append(f)
|
||||
formats.append({
|
||||
'url': video['file'],
|
||||
'ext': ext,
|
||||
'format_id': '%s-%s' % (ext, video['label']),
|
||||
'height': int(video['label'].rstrip('p')),
|
||||
'fps': 25,
|
||||
'url': video_url,
|
||||
'format_id': '-'.join(format_id),
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': metadata['mediaid'],
|
||||
'title': unescapeHTML(metadata['title']),
|
||||
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
||||
'id': data.get('mediaid') or video_id,
|
||||
'title': title,
|
||||
'description': data.get('description'),
|
||||
'thumbnail': data.get('image'),
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
'timestamp': int_or_none(data.get('pubtime')),
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
@@ -103,7 +144,7 @@ class DVTVIE(InfoExtractor):
|
||||
|
||||
# single video
|
||||
item = self._search_regex(
|
||||
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
||||
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
|
||||
webpage, 'video', default=None, fatal=False)
|
||||
|
||||
if item:
|
||||
@@ -113,6 +154,8 @@ class DVTVIE(InfoExtractor):
|
||||
items = re.findall(
|
||||
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||
webpage)
|
||||
if not items:
|
||||
items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
|
||||
|
||||
if items:
|
||||
return {
|
||||
|
@@ -41,6 +41,7 @@ from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anvato import AnvatoIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
@@ -87,10 +88,9 @@ from .azmedien import (
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
@@ -98,7 +98,10 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beampro import BeamProLiveIE
|
||||
from .beampro import (
|
||||
BeamProLiveIE,
|
||||
BeamProVodIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
@@ -251,7 +254,10 @@ from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
)
|
||||
from .dplay import (
|
||||
DPlayIE,
|
||||
DPlayItIE,
|
||||
@@ -350,9 +356,9 @@ from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
FranceTvInfoIE,
|
||||
FranceTVIE,
|
||||
FranceTVEmbedIE,
|
||||
FranceTVInfoIE,
|
||||
GenerationQuoiIE,
|
||||
CultureboxIE,
|
||||
)
|
||||
@@ -386,7 +392,6 @@ from .globo import (
|
||||
from .go import GoIE
|
||||
from .go90 import Go90IE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -542,6 +547,7 @@ from .mangomolo import (
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .mediaset import MediasetIE
|
||||
from .medici import MediciIE
|
||||
from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
@@ -630,7 +636,10 @@ from .neteasemusic import (
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
)
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newgrounds import (
|
||||
NewgroundsIE,
|
||||
NewgroundsPlaylistIE,
|
||||
)
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
@@ -663,6 +672,8 @@ from .nintendo import NintendoIE
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
@@ -731,8 +742,8 @@ from .openload import OpenloadIE
|
||||
from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
ORFFM4IE,
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .packtpub import (
|
||||
@@ -1013,11 +1024,6 @@ from .trilulilu import TriluliluIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tudou import (
|
||||
TudouIE,
|
||||
TudouPlaylistIE,
|
||||
TudouAlbumIE,
|
||||
)
|
||||
from .tumblr import TumblrIE
|
||||
from .tunein import (
|
||||
TuneInClipIE,
|
||||
@@ -1097,6 +1103,10 @@ from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .upskill import (
|
||||
UpskillIE,
|
||||
UpskillCourseIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
@@ -1124,6 +1134,7 @@ from .vgtv import (
|
||||
from .vh1 import VH1IE
|
||||
from .vice import (
|
||||
ViceIE,
|
||||
ViceArticleIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
@@ -1299,5 +1310,6 @@ from .youtube import (
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
|
@@ -102,6 +102,8 @@ class FirstTVIE(InfoExtractor):
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'source_preference': quality(f.get('name')),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'preference': -1,
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -81,7 +84,7 @@ class FlickrIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for stream in streams['stream']:
|
||||
stream_type = str(stream.get('type'))
|
||||
stream_type = compat_str(stream.get('type'))
|
||||
formats.append({
|
||||
'format_id': stream_type,
|
||||
'url': stream['_content'],
|
||||
|
@@ -5,6 +5,7 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
@@ -46,7 +47,7 @@ class FoxgayIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'url': source,
|
||||
'height': resolution,
|
||||
'height': int_or_none(resolution),
|
||||
} for source, resolution in zip(
|
||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||
|
||||
|
@@ -11,10 +11,10 @@ class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'i0qKWsk3qJaM',
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
@@ -31,8 +31,9 @@ class FoxSportsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
|
@@ -21,11 +21,13 @@ from .dailymotion import (
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id, catalogue):
|
||||
def _extract_video(self, video_id, catalogue=None):
|
||||
info = self._download_json(
|
||||
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
||||
% (video_id, catalogue),
|
||||
video_id, 'Downloading video JSON')
|
||||
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'idDiffusion': video_id,
|
||||
'catalogue': catalogue or '',
|
||||
})
|
||||
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
@@ -109,27 +111,100 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'pluzz.francetv.fr'
|
||||
_VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
# Can't use tests, videos expire in 7 days
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': '157550144',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
'timestamp': 1494156300,
|
||||
'upload_date': '20170507',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france4
|
||||
'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france5
|
||||
'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# franceo
|
||||
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# france2 live
|
||||
'url': 'https://www.france.tv/france-2/direct.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.france.tv/142749-rouge-sang.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'id_video', webpage, 'video id', default=None)
|
||||
if not video_id:
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion=["\'](\d+)', webpage, 'video id')
|
||||
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
if not video_id:
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
|
||||
'info_dict': {
|
||||
'id': 'NI_983319',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Pen Reims',
|
||||
'upload_date': '20170505',
|
||||
'timestamp': 1493981780,
|
||||
'duration': 16,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
return self._extract_video(video['video_id'], video.get('catalog'))
|
||||
|
||||
|
||||
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
|
||||
|
||||
@@ -233,124 +308,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetv'
|
||||
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/[^/]+/(?:videos|diffusions)|
|
||||
emission/[^/]+|
|
||||
videos|
|
||||
jt
|
||||
)
|
||||
/|
|
||||
embed\.francetv\.fr/\?ue=
|
||||
)
|
||||
(?P<id>[^/?]+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
# france2
|
||||
{
|
||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
||||
'info_dict': {
|
||||
'id': '109169362',
|
||||
'ext': 'flv',
|
||||
'title': '13h15, le dimanche...',
|
||||
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
||||
'upload_date': '20140914',
|
||||
'timestamp': 1410693600,
|
||||
},
|
||||
},
|
||||
# france3
|
||||
{
|
||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
||||
'info_dict': {
|
||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le scandale du prix des médicaments',
|
||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
'upload_date': '20131113',
|
||||
'timestamp': 1384380000,
|
||||
},
|
||||
},
|
||||
# france4
|
||||
{
|
||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
||||
'info_dict': {
|
||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hero Corp Making of - Extrait 1',
|
||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||
'upload_date': '20131106',
|
||||
'timestamp': 1383766500,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
|
||||
'md5': 'f6c577df3806e26471b3d21631241fd0',
|
||||
'info_dict': {
|
||||
'id': '123327454',
|
||||
'ext': 'flv',
|
||||
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
|
||||
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
|
||||
'upload_date': '20150831',
|
||||
'timestamp': 1441035120,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
|
||||
'md5': '47d5816d3b24351cdce512ad7ab31da8',
|
||||
'info_dict': {
|
||||
'id': '125377621',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô soir',
|
||||
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
|
||||
'upload_date': '20150718',
|
||||
'timestamp': 1437241200,
|
||||
'duration': 414,
|
||||
},
|
||||
},
|
||||
{
|
||||
# francetv embed
|
||||
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
|
||||
'info_dict': {
|
||||
'id': 'EV_30231',
|
||||
'ext': 'flv',
|
||||
'title': 'Alcaline, le concert avec Calogero',
|
||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||
'upload_date': '20150226',
|
||||
'timestamp': 1424989860,
|
||||
'duration': 5400,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.franceo.fr/videos/125377617',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationQuoiIE(InfoExtractor):
|
||||
IE_NAME = 'france2.fr:generation-quoi'
|
||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
|
||||
|
@@ -2,15 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
ExtractorError,
|
||||
urlencode_postdata
|
||||
)
|
||||
@@ -20,6 +16,7 @@ class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
|
||||
@@ -38,56 +35,38 @@ class FunimationIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
|
||||
'info_dict': {
|
||||
'id': '9635',
|
||||
'id': '210051',
|
||||
'display_id': 'broadcast-dub-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
|
||||
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||
|
||||
def _extract_cloudflare_session_ua(self, url):
|
||||
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||
if ci_session_cookie:
|
||||
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
|
||||
# ci_session is a string serialized by PHP function serialize()
|
||||
# This case is simple enough to use regular expressions only
|
||||
return self._search_regex(
|
||||
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
|
||||
default=None)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
data = urlencode_postdata({
|
||||
'email_field': username,
|
||||
'password_field': password,
|
||||
})
|
||||
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
|
||||
if not user_agent:
|
||||
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
|
||||
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
|
||||
'User-Agent': user_agent,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
login_page = self._download_webpage(
|
||||
login_request, None, 'Logging in as %s' % username)
|
||||
if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')):
|
||||
return
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>',
|
||||
login_page, 'error messages', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
try:
|
||||
data = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
|
||||
None, 'Logging in as %s' % username, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))
|
||||
self._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)['error']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -125,9 +104,12 @@ class FunimationIE(InfoExtractor):
|
||||
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
|
||||
|
||||
try:
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||
video_id)['items']
|
||||
video_id, headers=headers)['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
|
@@ -58,8 +58,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
source_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
@@ -78,8 +78,7 @@ class GameSpotIE(OnceIE):
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
|
@@ -6,16 +6,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GaskrankIE(InfoExtractor):
|
||||
"""InfoExtractor for gaskrank.tv"""
|
||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||
_TESTS = [
|
||||
{
|
||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||
'info_dict': {
|
||||
@@ -29,8 +26,7 @@ class GaskrankIE(InfoExtractor):
|
||||
'upload_date': '20170110',
|
||||
'uploader_url': None,
|
||||
}
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||
'info_dict': {
|
||||
@@ -41,27 +37,21 @@ class GaskrankIE(InfoExtractor):
|
||||
'categories': ['racing'],
|
||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||
'uploader_id': 'IOM',
|
||||
'upload_date': '20160506',
|
||||
'upload_date': '20170523',
|
||||
'uploader_url': 'www.iomtt.com',
|
||||
}
|
||||
}
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""extract information from gaskrank.tv"""
|
||||
def fix_json(code):
|
||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'title', webpage, fatal=True)
|
||||
|
||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||
title = self._search_regex(
|
||||
r'movieName\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
mobj = re.search(
|
||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||
@@ -89,29 +79,14 @@ class GaskrankIE(InfoExtractor):
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||
webpage, 'playlist', default='{}'),
|
||||
display_id, transform_source=fix_json, fatal=False)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||
playlist.get('0').get('src'), 'video id')
|
||||
webpage, 'video id', default=display_id)
|
||||
|
||||
formats = []
|
||||
for key in playlist:
|
||||
formats.append({
|
||||
'url': playlist[key]['src'],
|
||||
'format_id': key,
|
||||
'quality': playlist[key].get('quality')})
|
||||
self._sort_formats(formats, field_preference=['format_id'])
|
||||
|
||||
return {
|
||||
entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
entry.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'display_id': display_id,
|
||||
'uploader_id': uploader_id,
|
||||
@@ -120,4 +95,7 @@ class GaskrankIE(InfoExtractor):
|
||||
'tags': tags,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
}
|
||||
})
|
||||
self._sort_formats(entry['formats'])
|
||||
|
||||
return entry
|
||||
|
@@ -75,6 +75,19 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format': 'jp', # The japanese audio
|
||||
}
|
||||
},
|
||||
{
|
||||
# gdc-player.html
|
||||
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
|
||||
'info_dict': {
|
||||
'id': '1435',
|
||||
'display_id': 'An-American-engine-in-Tokyo',
|
||||
'ext': 'flv',
|
||||
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
@@ -128,7 +141,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
|
||||
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
PLAYER_REGEX, start_page, 'xml root', default=None)
|
||||
|
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
@@ -86,6 +87,10 @@ from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
from .anvato import AnvatoIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wistia import WistiaIE
|
||||
from .mediaset import MediasetIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -430,6 +435,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove video in <iframe>
|
||||
'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
|
||||
'md5': '36d74ef5e37c8b4a2ce92880d208b968',
|
||||
'info_dict': {
|
||||
'id': '5360463607001',
|
||||
'ext': 'mp4',
|
||||
'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
|
||||
'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
|
||||
'uploader': 'United Nations',
|
||||
'uploader_id': '1362235914001',
|
||||
'timestamp': 1489593889,
|
||||
'upload_date': '20170315',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
{
|
||||
# Brightcove with alternative playerID key
|
||||
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
|
||||
@@ -1411,6 +1432,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove embed with whitespace around attribute names
|
||||
'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
|
||||
'info_dict': {
|
||||
'id': '3167554373001',
|
||||
'ext': 'mp4',
|
||||
'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
|
||||
'description': 'md5:57bacb0e0f29349de4972bfda3191713',
|
||||
'uploader_id': '1079349493',
|
||||
'upload_date': '20140207',
|
||||
'timestamp': 1391810548,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Another form of arte.tv embed
|
||||
{
|
||||
'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
|
||||
@@ -1661,6 +1698,42 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
{
|
||||
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
|
||||
'info_dict': {
|
||||
'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
|
||||
'title': 'Standoff with Walnut Creek murder suspect ends',
|
||||
'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
},
|
||||
{
|
||||
# WashingtonPost embed
|
||||
'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
|
||||
'info_dict': {
|
||||
'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
|
||||
'ext': 'mp4',
|
||||
'title': "No one has seen the drama series based on Trump's life \u2014 until now",
|
||||
'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
|
||||
'timestamp': 1455216756,
|
||||
'uploader': 'The Washington Post',
|
||||
'upload_date': '20160211',
|
||||
},
|
||||
'add_ie': [WashingtonPostIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# Mediaset embed
|
||||
'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
|
||||
'info_dict': {
|
||||
'id': '720642',
|
||||
'ext': 'mp4',
|
||||
'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [MediasetIE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -1835,14 +1908,14 @@ class GenericIE(InfoExtractor):
|
||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||
if m:
|
||||
format_id = m.group('format_id')
|
||||
format_id = compat_str(m.group('format_id'))
|
||||
if format_id.endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
elif format_id == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
@@ -2054,57 +2127,20 @@ class GenericIE(InfoExtractor):
|
||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
match = re.search(
|
||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||
if match:
|
||||
embed_url = self._proto_relative_url(
|
||||
unescapeHTML(match.group('url')))
|
||||
wistia_url = WistiaIE._extract_url(webpage)
|
||||
if wistia_url:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'Wistia',
|
||||
'url': self._proto_relative_url(wistia_url),
|
||||
'ie_key': WistiaIE.ie_key(),
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'wistia:%s' % match.group('id'),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
match = re.search(
|
||||
r'''(?sx)
|
||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
||||
''', webpage)
|
||||
if match:
|
||||
return self.url_result(self._proto_relative_url(
|
||||
'wistia:%s' % match.group('id')), 'Wistia')
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
return self.url_result(svt_url, 'SVT')
|
||||
|
||||
# Look for embedded condenast player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
||||
webpage)
|
||||
if matches:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [{
|
||||
'_type': 'url',
|
||||
'ie_key': 'CondeNast',
|
||||
'url': ma,
|
||||
} for ma in matches],
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
@@ -2498,28 +2534,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
limelight_urls, video_id, video_title, video_description)
|
||||
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
||||
'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||
''', webpage)
|
||||
if mobj:
|
||||
return self.url_result(smuggle_url(
|
||||
'limelight:media:%s' % mobj.group('id'),
|
||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
||||
# Look for Anvato embeds
|
||||
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
||||
if anvato_urls:
|
||||
return self.playlist_result(
|
||||
anvato_urls, video_id, video_title, video_description)
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
@@ -2638,6 +2657,18 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
rutube_urls, ie=RutubeIE.ie_key())
|
||||
|
||||
# Look for WashingtonPost embeds
|
||||
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
||||
if wapo_urls:
|
||||
return self.playlist_from_matches(
|
||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||
|
||||
# Look for Mediaset embeds
|
||||
mediaset_urls = MediasetIE._extract_urls(webpage)
|
||||
if mediaset_urls:
|
||||
return self.playlist_from_matches(
|
||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
@@ -82,7 +82,7 @@ class GfycatIE(InfoExtractor):
|
||||
video_url = gfy.get('%sUrl' % format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
filesize = gfy.get('%sSize' % format_id)
|
||||
filesize = int_or_none(gfy.get('%sSize' % format_id))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
|
@@ -36,22 +36,26 @@ class GoIE(AdobePassIE):
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
'id': '0_g86w5onx',
|
||||
'id': 'VDKA3807643',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sneak Peek: Language Arts',
|
||||
'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
|
||||
'title': 'The Traitor in the White House',
|
||||
'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||
'only_matching': True,
|
||||
'url': 'http://watchdisneyxd.go.com/doraemon',
|
||||
'info_dict': {
|
||||
'title': 'Doraemon',
|
||||
'id': 'SH55574025',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@@ -60,19 +64,36 @@ class GoIE(AdobePassIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
display_id = video_id if video_id != '-1' else show_id
|
||||
return self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
|
||||
display_id)['video']
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
|
||||
videos = self._extract_videos(brand, show_id=show_id)
|
||||
show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
|
||||
entries = []
|
||||
for video in videos:
|
||||
entries.append(self.url_result(
|
||||
video['url'], 'Go', video.get('id'), video.get('title')))
|
||||
entries.reverse()
|
||||
return self.playlist_result(entries, show_id, show_title)
|
||||
video_data = self._extract_videos(brand, video_id)[0]
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
@@ -105,7 +126,7 @@ class GoIE(AdobePassIE):
|
||||
self._initialize_geo_bypass(['US'])
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||
video_id, data=urlencode_postdata(data))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
for error in errors:
|
||||
|
@@ -1,66 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class GodTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
||||
'info_dict': {
|
||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Randy Needham',
|
||||
'duration': 3615.08,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://god.tv/playlist/bible-study',
|
||||
'info_dict': {
|
||||
'id': 'bible-study',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
'url': 'http://god.tv/node/15097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/live/africa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/liveevents',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'settings', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
ooyala_id = None
|
||||
|
||||
if settings:
|
||||
playlist = settings.get('playlist')
|
||||
if playlist and isinstance(playlist, list):
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(video['content_id'])
|
||||
for video in playlist if video.get('content_id')]
|
||||
if entries:
|
||||
return self.playlist_result(entries, display_id)
|
||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
||||
|
||||
if not ooyala_id:
|
||||
ooyala_id = self._search_regex(
|
||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_id)
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -46,7 +47,7 @@ class GolemIE(InfoExtractor):
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'format_id': e.tag,
|
||||
'format_id': compat_str(e.tag),
|
||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||
'height': self._int(e.get('height'), 'height'),
|
||||
'width': self._int(e.get('width'), 'width'),
|
||||
|
@@ -16,8 +16,8 @@ from ..utils import (
|
||||
|
||||
class HitboxIE(InfoExtractor):
|
||||
IE_NAME = 'hitbox'
|
||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hitbox.tv/video/203213',
|
||||
'info_dict': {
|
||||
'id': '203213',
|
||||
@@ -38,13 +38,15 @@ class HitboxIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_metadata(self, url, video_id):
|
||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||
metadata = self._download_json(
|
||||
'%s/%s' % (url, video_id), video_id,
|
||||
'Downloading metadata JSON')
|
||||
'%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
|
||||
|
||||
date = 'media_live_since'
|
||||
media_type = 'livestream'
|
||||
@@ -63,14 +65,15 @@ class HitboxIE(InfoExtractor):
|
||||
views = int_or_none(video_meta.get('media_views'))
|
||||
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||
categories = [video_meta.get('category_name')]
|
||||
thumbs = [
|
||||
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||
thumbs = [{
|
||||
'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||
'width': 320,
|
||||
'height': 180},
|
||||
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||
'height': 180
|
||||
}, {
|
||||
'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||
'width': 768,
|
||||
'height': 432},
|
||||
]
|
||||
'height': 432
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -90,7 +93,7 @@ class HitboxIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||
'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
@@ -121,8 +124,7 @@ class HitboxIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.hitbox.tv/api/media/video',
|
||||
video_id)
|
||||
'https://www.smashcast.tv/api/media/video', video_id)
|
||||
metadata['formats'] = formats
|
||||
|
||||
return metadata
|
||||
@@ -130,8 +132,8 @@ class HitboxIE(InfoExtractor):
|
||||
|
||||
class HitboxLiveIE(HitboxIE):
|
||||
IE_NAME = 'hitbox:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hitbox.tv/dimak',
|
||||
'info_dict': {
|
||||
'id': 'dimak',
|
||||
@@ -146,13 +148,20 @@ class HitboxLiveIE(HitboxIE):
|
||||
# live
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.smashcast.tv/dimak',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
||||
'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
@@ -197,8 +206,7 @@ class HitboxLiveIE(HitboxIE):
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.hitbox.tv/api/media/live',
|
||||
video_id)
|
||||
'https://www.smashcast.tv/api/media/live', video_id)
|
||||
metadata['formats'] = formats
|
||||
metadata['is_live'] = True
|
||||
metadata['title'] = self._live_title(metadata.get('title'))
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title).+?[/-]vi(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@@ -35,6 +35,9 @@ class ImdbIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -87,8 +87,8 @@ class InfoQIE(BokeCCBaseIE):
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
@@ -112,7 +112,8 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, height, width) = [None] * 10
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -121,7 +122,10 @@ class InstagramIE(InfoExtractor):
|
||||
video_id, fatal=False)
|
||||
if shared_data:
|
||||
media = try_get(
|
||||
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
|
||||
shared_data,
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
|
@@ -189,7 +189,11 @@ class IqiyiIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://yule.iqiyi.com/pcb.html',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '4a0af228fddb55ec96398a364248ed7f',
|
||||
'ext': 'mp4',
|
||||
'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
|
||||
},
|
||||
}, {
|
||||
# VIP-only video. The first 2 parts (6 minutes) are available without login
|
||||
# MD5 sums omitted as values are different on Travis CI and my machine
|
||||
@@ -337,15 +341,18 @@ class IqiyiIE(InfoExtractor):
|
||||
url, 'temp_id', note='download video page')
|
||||
|
||||
# There's no simple way to determine whether an URL is a playlist or not
|
||||
# So detect it
|
||||
# Sometimes there are playlist links in individual videos, so treat it
|
||||
# as a single video first
|
||||
tvid = self._search_regex(
|
||||
r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
|
||||
if tvid is None:
|
||||
playlist_result = self._extract_playlist(webpage)
|
||||
if playlist_result:
|
||||
return playlist_result
|
||||
raise ExtractorError('Can\'t find any video')
|
||||
|
||||
tvid = self._search_regex(
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
|
||||
formats = []
|
||||
for _ in range(5):
|
||||
@@ -377,7 +384,8 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
title = (get_element_by_id('widget-videotitle', webpage) or
|
||||
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
|
||||
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or
|
||||
self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -65,9 +65,9 @@ class JoveIE(InfoExtractor):
|
||||
webpage, 'description', fatal=False)
|
||||
publish_date = unified_strdate(self._html_search_meta(
|
||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||
comment_count = self._html_search_regex(
|
||||
comment_count = int(self._html_search_regex(
|
||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||
webpage, 'comment count', fatal=False)
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -8,15 +10,15 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
urljoin,
|
||||
update_url_query,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class Laola1TvEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'laola1tv:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# flashvars.premium = "false";
|
||||
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
|
||||
'info_dict': {
|
||||
@@ -26,7 +28,30 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
'uploader': 'ITTF - International Table Tennis Federation',
|
||||
'upload_date': '20161211',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||
return self._download_json(
|
||||
stream_access_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||
|
||||
def _extract_formats(self, token_url, video_id):
|
||||
token_doc = self._download_xml(
|
||||
token_url, video_id, 'Downloading token',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||
|
||||
if token_attrib['status'] != '0':
|
||||
raise ExtractorError(
|
||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -68,29 +93,16 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
else:
|
||||
data_abo = urlencode_postdata(
|
||||
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
|
||||
token_url = self._download_json(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
|
||||
video_id, query={
|
||||
stream_access_url = update_url_query(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
|
||||
'videoId': _v('id'),
|
||||
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
|
||||
'label': _v('label'),
|
||||
'area': _v('area'),
|
||||
}, data=data_abo)['data']['stream-access'][0]
|
||||
})
|
||||
token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
|
||||
|
||||
token_doc = self._download_xml(
|
||||
token_url, video_id, 'Downloading token',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||
|
||||
if token_attrib['status'] != '0':
|
||||
raise ExtractorError(
|
||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
categories_str = _v('meta_sports')
|
||||
categories = categories_str.split(',') if categories_str else []
|
||||
@@ -107,7 +119,7 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(InfoExtractor):
|
||||
class Laola1TvIE(Laola1TvEmbedIE):
|
||||
IE_NAME = 'laola1tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@@ -164,13 +176,42 @@ class Laola1TvIE(InfoExtractor):
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
iframe_url = urljoin(url, self._search_regex(
|
||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||
webpage, 'iframe url'))
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id, js_to_json)
|
||||
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': iframe_url,
|
||||
'ie_key': 'Laola1TvEmbed',
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -23,7 +23,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +50,7 @@ class LeIE(InfoExtractor):
|
||||
'id': '1415246',
|
||||
'ext': 'mp4',
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
@@ -69,7 +68,6 @@ class LeIE(InfoExtractor):
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
'skip': 'Only available in China',
|
||||
}, {
|
||||
'url': 'http://sports.le.com/video/25737697.html',
|
||||
'only_matching': True,
|
||||
@@ -81,7 +79,7 @@ class LeIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
|
||||
def ror(self, param1, param2):
|
||||
_loc3_ = 0
|
||||
while _loc3_ < param2:
|
||||
@@ -90,15 +88,8 @@ class LeIE(InfoExtractor):
|
||||
return param1
|
||||
|
||||
def calc_time_key(self, param1):
|
||||
_loc2_ = 773625421
|
||||
_loc3_ = self.ror(param1, _loc2_ % 13)
|
||||
_loc3_ = _loc3_ ^ _loc2_
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||
def get_mms_key(self, time):
|
||||
return self.ror(time, 8) ^ 185025305
|
||||
_loc2_ = 185025305
|
||||
return self.ror(param1, _loc2_ % 17) ^ _loc2_
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
@@ -122,7 +113,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
def _check_errors(self, play_json):
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
playstatus = play_json['msgs']['playstatus']
|
||||
if playstatus['status'] == 0:
|
||||
flag = playstatus['flag']
|
||||
if flag == 1:
|
||||
@@ -134,59 +125,32 @@ class LeIE(InfoExtractor):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
|
||||
play_json_h5 = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJsonH5',
|
||||
media_id, 'Downloading html5 playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 3,
|
||||
'splatid': 304,
|
||||
'format': 1,
|
||||
'tkey': self.get_mms_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'tss': 'no',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_h5)
|
||||
|
||||
play_json_flash = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
'http://player-pc.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading flash playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'source': 1000,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'region': 'cn',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_flash)
|
||||
|
||||
def get_h5_urls(media_url, format_id):
|
||||
location = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id, query={
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'tss': 'no',
|
||||
})['location']
|
||||
|
||||
return {
|
||||
'http': update_url_query(location, {'tss': 'no'}),
|
||||
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||
}
|
||||
|
||||
def get_flash_urls(media_url, format_id):
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id,
|
||||
query={
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
'tss': 'ios',
|
||||
})
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
@@ -199,8 +163,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
extracted_formats = []
|
||||
formats = []
|
||||
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||
playurl = play_json['playurl']
|
||||
playurl = play_json_flash['msgs']['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
@@ -209,7 +172,7 @@ class LeIE(InfoExtractor):
|
||||
extracted_formats.append(format_id)
|
||||
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||
for protocol, format_url in get_flash_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
|
@@ -86,7 +86,7 @@ class LEGOIE(InfoExtractor):
|
||||
formats = self._extract_akamai_formats(
|
||||
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
|
||||
formats))
|
||||
if len(m3u8_formats) == len(self._BITRATES):
|
||||
self._sort_formats(m3u8_formats)
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,10 +10,10 @@ class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '50f79e05ba149149c1b4ea961223d5b3',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident',
|
||||
@@ -22,7 +21,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'b13a29626183c9d33944e6a04f41aafc',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
@@ -32,6 +31,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Prochan embed
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
@@ -41,11 +41,13 @@ class LiveLeakIE(InfoExtractor):
|
||||
'uploader': 'CapObveus',
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'Video is dead',
|
||||
}, {
|
||||
# Covers https://github.com/rg3/youtube-dl/pull/5983
|
||||
# Multiple resolutions
|
||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||
'md5': '0b3bec2d888c20728ca2ad3642f0ef15',
|
||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
@@ -93,17 +95,11 @@ class LiveLeakIE(InfoExtractor):
|
||||
webpage, 'age limit', default=None))
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
sources_raw = self._search_regex(
|
||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||
if sources_raw is None:
|
||||
alt_source = self._search_regex(
|
||||
r'(file: ".*?"),', webpage, 'video URL', default=None)
|
||||
if alt_source:
|
||||
sources_raw = '[{ %s}]' % alt_source
|
||||
else:
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
if not entries:
|
||||
# Maybe an embed?
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||
webpage, 'embed URL')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -115,35 +111,23 @@ class LiveLeakIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||
sources = json.loads(sources_json)
|
||||
info_dict = entries[0]
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s' % i,
|
||||
'format_note': s.get('label'),
|
||||
'url': s['file'],
|
||||
} for i, s in enumerate(sources)]
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
|
||||
for i, s in enumerate(sources):
|
||||
# Removing '.h264_*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
|
||||
if s['file'] != orig_url:
|
||||
formats.append({
|
||||
'format_id': 'original-%s' % i,
|
||||
'format_note': s.get('label'),
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': video_thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
class MedialaanIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:www\.|nieuws\.)?
|
||||
(?:
|
||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||
(?:
|
||||
@@ -85,6 +85,22 @@ class MedialaanIE(InfoExtractor):
|
||||
# clip
|
||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# http/s redirect
|
||||
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||
'info_dict': {
|
||||
'id': '257136373657000',
|
||||
'ext': 'mp4',
|
||||
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
# nieuws.vtm.be
|
||||
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -146,6 +162,8 @@ class MedialaanIE(InfoExtractor):
|
||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||
if player:
|
||||
video = player[-1]
|
||||
if video['videoUrl'] in ('http', 'https'):
|
||||
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video['videoUrl'],
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user