Compare commits
152 Commits
2017.09.10
...
2017.10.15
Author | SHA1 | Date | |
---|---|---|---|
|
7e721e35da | ||
|
bd7e1406b3 | ||
|
74c42d9ec3 | ||
|
5efaf43c93 | ||
|
4827270526 | ||
|
ee093a0ea0 | ||
|
9bb2c7673e | ||
|
715534083d | ||
|
ee88c1cbc6 | ||
|
57eb45b111 | ||
|
b21ab85088 | ||
|
210a2720bc | ||
|
685e87b61f | ||
|
c9bd503e7d | ||
|
94a530c6cb | ||
|
e650659b94 | ||
|
2637fadc38 | ||
|
50d808f5c9 | ||
|
7a64c33aee | ||
|
b0def2c297 | ||
|
81ce479f4d | ||
|
414e709405 | ||
|
645ed3e7c9 | ||
|
c0bddd6d65 | ||
|
1baba7f4a8 | ||
|
344d1a6794 | ||
|
76581082f6 | ||
|
2f0eb0a68a | ||
|
7fee3377dc | ||
|
ff3f1a62f0 | ||
|
694b61545c | ||
|
af0f74288d | ||
|
9e38dbb19c | ||
|
782195a9d4 | ||
|
26bae2d965 | ||
|
5fe75f976f | ||
|
4fe4bda287 | ||
|
cdab1df912 | ||
|
dfc80bdd2e | ||
|
04af3aca04 | ||
|
d0f2d64114 | ||
|
01c742ecd0 | ||
|
9e71f88105 | ||
|
ae5af89079 | ||
|
197224b7a4 | ||
|
8992331621 | ||
|
b0dde6686c | ||
|
a22ccac1f0 | ||
|
8b561bfc9d | ||
|
8e751a185c | ||
|
3fc8f5b7c2 | ||
|
665f42d8c1 | ||
|
e952847541 | ||
|
b1a7bf44b9 | ||
|
2e2a8e97d5 | ||
|
ac93c09ab2 | ||
|
cd6fc19ed7 | ||
|
86a15ed64b | ||
|
7e85e8729f | ||
|
6be08ce602 | ||
|
cf5f6ed5be | ||
|
6b46285e85 | ||
|
6e736d86e7 | ||
|
c110944fa2 | ||
|
9524dca3ac | ||
|
3e4cedf9e8 | ||
|
bfd484ccff | ||
|
b7e14f06a4 | ||
|
d2ae7e24e5 | ||
|
544ffb7790 | ||
|
117589dfa2 | ||
|
839728f5bf | ||
|
fcdd37d053 | ||
|
1dd126180e | ||
|
4e599194d6 | ||
|
c5b7014a9c | ||
|
c8da40d834 | ||
|
b69ca0ccfc | ||
|
2c53bd51c6 | ||
|
3836b02ce8 | ||
|
fa3fdeb41f | ||
|
eb9a15be60 | ||
|
3600fd591d | ||
|
63d990d285 | ||
|
b14b2283a0 | ||
|
02d01e15f1 | ||
|
db96252831 | ||
|
8b389f7e3c | ||
|
9fc41bcb6b | ||
|
10cab6613f | ||
|
4d182955a2 | ||
|
011da618bd | ||
|
4c54b89e03 | ||
|
a87d7b4953 | ||
|
2f3933aa1e | ||
|
aab20aabfc | ||
|
16f54d0751 | ||
|
07d1344c85 | ||
|
47b5dfb047 | ||
|
e3440d824a | ||
|
136507b39a | ||
|
7f4921b38d | ||
|
f70ddd4aeb | ||
|
1c22d7a7f3 | ||
|
5c1452e8f1 | ||
|
4bb58fa118 | ||
|
13de91c9e9 | ||
|
9ce1ac4046 | ||
|
095774e591 | ||
|
2384f5a64e | ||
|
8c2895305d | ||
|
8c6919e433 | ||
|
f6ff52b473 | ||
|
12ea5c79fb | ||
|
3b65a6fbf3 | ||
|
dc76eef092 | ||
|
8a1a60d173 | ||
|
4d8c4b46d5 | ||
|
9c2a17f2ce | ||
|
4ed2d7b7d1 | ||
|
8251af63a1 | ||
|
790d379e4d | ||
|
3869028ffb | ||
|
68d43a61b5 | ||
|
a88d461dff | ||
|
a4245acef8 | ||
|
6be44a50ed | ||
|
b763e1d68c | ||
|
cbf85239bb | ||
|
159d304a9f | ||
|
86e55e317c | ||
|
c46680fb2a | ||
|
fad9fc537d | ||
|
0732a90579 | ||
|
319fc70676 | ||
|
e7c3e33456 | ||
|
757984af90 | ||
|
2f483758bc | ||
|
018cc61549 | ||
|
2709d9fa28 | ||
|
7dacceae75 | ||
|
43df248f10 | ||
|
f12a6e88b2 | ||
|
feee8d32e4 | ||
|
c89267d31a | ||
|
5ff1bc0cc1 | ||
|
7552f96352 | ||
|
98f9d87381 | ||
|
fcace2d1ad | ||
|
40e41780f1 | ||
|
da57ebaf84 | ||
|
47e0cef46e |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.09.10
|
||||
[debug] youtube-dl version 2017.10.15.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -9,6 +9,7 @@
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -22,6 +22,7 @@ cover/
|
||||
updates_key.pem
|
||||
*.egg-info
|
||||
*.srt
|
||||
*.ttml
|
||||
*.sbv
|
||||
*.vtt
|
||||
*.flv
|
||||
|
7
AUTHORS
7
AUTHORS
@@ -224,3 +224,10 @@ Giuseppe Fabiano
|
||||
Örn Guðjónsson
|
||||
Parmjit Virk
|
||||
Genki Sky
|
||||
Ľuboš Katrinec
|
||||
Corey Nicholson
|
||||
Ashutosh Chaudhary
|
||||
John Dong
|
||||
Tatsuyuki Ishi
|
||||
Daniel Weber
|
||||
Kay Bouché
|
||||
|
152
ChangeLog
152
ChangeLog
@@ -1,3 +1,155 @@
|
||||
version 2017.10.15.1
|
||||
|
||||
Core
|
||||
* [downloader/hls] Ignore anvato ad fragments (#14496)
|
||||
* [downloader/fragment] Output ad fragment count
|
||||
|
||||
Extractors
|
||||
* [scrippsnetworks:watch] Bypass geo restriction
|
||||
+ [anvato] Add ability to bypass geo restriction
|
||||
* [redditr] Fix extraction for URLs with query (#14495)
|
||||
|
||||
|
||||
version 2017.10.15
|
||||
|
||||
Core
|
||||
+ [common] Add support for jwplayer youtube embeds
|
||||
|
||||
Extractors
|
||||
* [scrippsnetworks:watch] Fix extraction (#14389)
|
||||
* [anvato] Process master m3u8 manifests
|
||||
* [youtube] Fix relative URLs in description
|
||||
* [spike] Bypass geo restriction
|
||||
+ [howstuffworks] Add support for more domains
|
||||
* [infoq] Fix http format downloading
|
||||
+ [rtlnl] Add support for another type of embeds
|
||||
+ [onionstudios] Add support for bulbs-video embeds
|
||||
* [udn] Fix extraction
|
||||
* [shahid] Fix extraction (#14448)
|
||||
* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
|
||||
* [vh1] Fix extraction (#9613)
|
||||
|
||||
|
||||
version 2017.10.12
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Improve _default_format_spec (#14461)
|
||||
|
||||
Extractors
|
||||
* [steam] Fix extraction (#14067)
|
||||
+ [funk] Add support for funk.net (#14464)
|
||||
+ [nexx] Add support for shortcuts and relax domain id extraction
|
||||
+ [voxmedia] Add support for recode.net (#14173)
|
||||
+ [once] Add support for vmap URLs
|
||||
+ [generic] Add support for channel9 embeds (#14469)
|
||||
* [tva] Fix extraction (#14328)
|
||||
+ [tubitv] Add support for new URL format (#14460)
|
||||
- [afreecatv:global] Remove extractor
|
||||
- [youtube:shared] Removed extractor (#14420)
|
||||
+ [slideslive] Add support for slideslive.com (#2680)
|
||||
+ [facebook] Support thumbnails (#14416)
|
||||
* [vvvvid] Fix episode number extraction (#14456)
|
||||
* [hrti:playlist] Relax URL regular expression
|
||||
* [wdr] Relax media link regular expression (#14447)
|
||||
* [hrti] Relax URL regular expression (#14443)
|
||||
* [fox] Delegate extraction to uplynk:preplay (#14147)
|
||||
+ [youtube] Add support for hooktube.com (#14437)
|
||||
|
||||
|
||||
version 2017.10.07
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Ignore duplicates in --playlist-items
|
||||
* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
|
||||
reduce code duplication (#14425)
|
||||
+ [utils] Use cache in OnDemandPagedList by default
|
||||
* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
|
||||
|
||||
Extractors
|
||||
* [reddit] Sort formats (#14430)
|
||||
* [lnkgo] Relax URL regular expression (#14423)
|
||||
* [pornflip] Extend URL regular expression (#14405, #14406)
|
||||
+ [xtube] Add support for embed URLs (#14417)
|
||||
+ [xvideos] Add support for embed URLs and improve extraction (#14409)
|
||||
* [beeg] Fix extraction (#14403)
|
||||
* [tvn24] Relax URL regular expression (#14395)
|
||||
* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
|
||||
#14392, #14414, #14419, #14431)
|
||||
+ [ketnet] Add support for videos without direct sources (#14377)
|
||||
* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
|
||||
+ [afreecatv] Add support for adult videos (#14376)
|
||||
|
||||
|
||||
version 2017.10.01
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Document youtube_include_dash_manifest
|
||||
|
||||
Extractors
|
||||
+ [tvp] Add support for new URL schema (#14368)
|
||||
+ [generic] Add support for single format Video.js embeds (#14371)
|
||||
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||
* [yahoo] Use extracted brightcove account id (#14210)
|
||||
* [rtve:alacarta] Fix extraction (#14290)
|
||||
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
||||
+ [generic] Add support for Video.js embeds
|
||||
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||
* [xhamsterembed] Fix extraction (#14308)
|
||||
|
||||
|
||||
version 2017.09.24
|
||||
|
||||
Core
|
||||
+ [options] Accept lrc as a subtitle conversion target format (#14292)
|
||||
* [utils] Fix handling raw TTML subtitles (#14191)
|
||||
|
||||
Extractors
|
||||
* [24video] Fix timestamp extraction and make non fatal (#14295)
|
||||
+ [24video] Add support for 24video.adult (#14295)
|
||||
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
|
||||
+ [twitter] Add support for URLs without user id (#14270)
|
||||
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
|
||||
#13996)
|
||||
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
|
||||
* [mixcloud] Fix extraction (#14088, #14132)
|
||||
+ [lynda] Add support for educourse.ga (#14286)
|
||||
* [beeg] Fix extraction (#14275)
|
||||
* [nbcsports:vplayer] Correct theplatform URL (#13873)
|
||||
* [twitter] Fix duration extraction (#14141)
|
||||
* [tvplay] Bypass geo restriction
|
||||
+ [heise] Add support for YouTube embeds (#14109)
|
||||
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
|
||||
* [viki] Update app data (#14181)
|
||||
* [morningstar] Relax URL regular expression (#14222)
|
||||
* [openload] Fix extraction (#14225, #14257)
|
||||
* [noovo] Fix extraction (#14214)
|
||||
* [dailymotion:playlist] Relax URL regular expression (#14219)
|
||||
+ [twitch] Add support for go.twitch.tv URLs (#14215)
|
||||
* [vgtv] Relax URL regular expression (#14223)
|
||||
|
||||
|
||||
version 2017.09.15
|
||||
|
||||
Core
|
||||
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
|
||||
(#13731)
|
||||
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
|
||||
|
||||
Extractors
|
||||
* [condenast] Fix extraction (#14196, #14207)
|
||||
+ [orf] Add support for f4m stories
|
||||
* [tv4] Relax URL regular expression (#14206)
|
||||
* [animeondemand] Bypass geo restriction
|
||||
+ [animeondemand] Add support for flash videos (#9944)
|
||||
|
||||
|
||||
version 2017.09.11
|
||||
|
||||
Extractors
|
||||
* [rutube:playlist] Fix suitable (#14166)
|
||||
|
||||
|
||||
version 2017.09.10
|
||||
|
||||
Core
|
||||
|
@@ -427,7 +427,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
syntax. Example: --exec 'adb push {}
|
||||
/sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
(currently supported: srt|ass|vtt)
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -1167,7 +1167,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
@@ -14,7 +14,7 @@ import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_testcases
|
||||
from test.helper import gettestcases
|
||||
from youtube_dl.utils import compat_urllib_parse_urlparse
|
||||
from youtube_dl.utils import compat_urllib_request
|
||||
|
||||
@@ -24,7 +24,7 @@ if len(sys.argv) > 1:
|
||||
else:
|
||||
METHOD = 'EURISTIC'
|
||||
|
||||
for test in get_testcases():
|
||||
for test in gettestcases():
|
||||
if METHOD == 'EURISTIC':
|
||||
try:
|
||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||
|
@@ -36,13 +36,13 @@
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **afreecatv:global**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
@@ -129,7 +129,8 @@
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**: canvas.be and een.be
|
||||
- **Canvas**
|
||||
- **CanvasEen**: canvas.be and een.be
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
@@ -294,6 +295,7 @@
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
@@ -378,6 +380,7 @@
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
@@ -593,6 +596,7 @@
|
||||
- **Openload**
|
||||
- **OraTV**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:fm4:story**: fm4.orf.at stories
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
@@ -626,6 +630,7 @@
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
@@ -733,6 +738,7 @@
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
@@ -964,6 +970,7 @@
|
||||
- **VoiceRepublic**
|
||||
- **Voot**
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
@@ -1039,7 +1046,6 @@
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:shared**
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
|
@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL({'simulate': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
|
||||
|
||||
ydl = YDL({'is_live': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||
|
||||
ydl = YDL({'simulate': True, 'is_live': True})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
|
||||
|
||||
ydl = YDL({'outtmpl': '-'})
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best')
|
||||
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||
|
||||
ydl = YDL({})
|
||||
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
|
||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
|
||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
result = get_ids({'playlist_items': '10'})
|
||||
self.assertEqual(result, [])
|
||||
|
||||
result = get_ids({'playlist_items': '3-10'})
|
||||
self.assertEqual(result, [3, 4])
|
||||
|
||||
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
||||
self.assertEqual(result, [2, 3, 4])
|
||||
|
||||
def test_urlopen_no_file_protocol(self):
|
||||
# see https://github.com/rg3/youtube-dl/issues/8227
|
||||
ydl = YDL()
|
||||
|
@@ -1064,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
<p begin="3" dur="-1">Ignored, three</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The following line contains Chinese characters and special symbols
|
||||
@@ -1089,7 +1089,7 @@ Line
|
||||
<p begin="0" end="1">The first line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The first line
|
||||
@@ -1115,7 +1115,7 @@ The first line
|
||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
</tt>'''.encode('utf-8')
|
||||
srt_data = '''1
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||
@@ -1138,6 +1138,26 @@ part 3</font></u>
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||
|
||||
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||
<body>
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">Line 1</p>
|
||||
<p begin="1" end="2">第二行</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''.encode('utf-16')
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
Line 1
|
||||
|
||||
2
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
第二行
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
|
||||
|
||||
def test_cli_option(self):
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
|
@@ -65,6 +65,7 @@ from .utils import (
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
orderedSet,
|
||||
PagedList,
|
||||
parse_filesize,
|
||||
PerRequestProxyHandler,
|
||||
@@ -92,6 +93,7 @@ from .utils import (
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import (
|
||||
@@ -303,6 +305,12 @@ class YoutubeDL(object):
|
||||
otherwise prefer avconv.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
The following options are used by the Youtube extractor:
|
||||
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
||||
data will be downloaded and processed by extractor.
|
||||
You can reduce network I/O by disabling it if you don't
|
||||
care about DASH.
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = set((
|
||||
@@ -901,15 +909,25 @@ class YoutubeDL(object):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = iter_playlistitems(playlistitems_str)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = [
|
||||
ie_entries[i - 1] for i in playlistitems
|
||||
if -n_all_entries <= i - 1 < n_all_entries]
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
@@ -927,20 +945,15 @@ class YoutubeDL(object):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entry_list = list(ie_entries)
|
||||
entries = [entry_list[i - 1] for i in playlistitems]
|
||||
entries = make_playlistitems_entries(list(ie_entries))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
@@ -1065,22 +1078,27 @@ class YoutubeDL(object):
|
||||
return _filter
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
req_format_list = []
|
||||
|
||||
def can_have_partial_formats():
|
||||
if self.params.get('simulate', False):
|
||||
return True
|
||||
if not download:
|
||||
return True
|
||||
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
|
||||
return False
|
||||
if info_dict.get('is_live'):
|
||||
return False
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
if can_have_partial_formats():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
|
||||
def prefer_best():
|
||||
if self.params.get('simulate', False):
|
||||
return False
|
||||
if not download:
|
||||
return False
|
||||
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
|
||||
return True
|
||||
if info_dict.get('is_live'):
|
||||
return True
|
||||
if not can_merge():
|
||||
return True
|
||||
return False
|
||||
|
||||
req_format_list = ['bestvideo+bestaudio', 'best']
|
||||
if prefer_best():
|
||||
req_format_list.reverse()
|
||||
return '/'.join(req_format_list)
|
||||
|
||||
def build_format_selector(self, format_spec):
|
||||
@@ -1763,29 +1781,30 @@ class YoutubeDL(object):
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
if sub_info.get('data') is not None:
|
||||
sub_data = sub_info['data']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
else:
|
||||
try:
|
||||
sub_data = ie._download_webpage(
|
||||
sub_info['url'], info_dict['id'], note=False)
|
||||
except ExtractorError as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, error_to_compat_str(err.cause)))
|
||||
continue
|
||||
try:
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
if sub_info.get('data') is not None:
|
||||
try:
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_info['data'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
try:
|
||||
sub_data = ie._request_webpage(
|
||||
sub_info['url'], info_dict['id'], note=False).read()
|
||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, error_to_compat_str(err)))
|
||||
continue
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||
@@ -2216,6 +2235,7 @@ class YoutubeDL(object):
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
for exe, v in sorted(exe_versions.items())
|
||||
|
@@ -206,7 +206,7 @@ def _real_main(argv=None):
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
|
||||
parser.error('invalid subtitle format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
|
@@ -6,6 +6,7 @@ import collections
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import itertools
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
@@ -15,7 +16,6 @@ import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
import xml.etree.ElementTree
|
||||
|
||||
|
||||
@@ -2898,6 +2898,13 @@ else:
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
try:
|
||||
from itertools import izip as compat_zip # < 2.5 or 3.x
|
||||
except ImportError:
|
||||
compat_zip = zip
|
||||
|
||||
__all__ = [
|
||||
'compat_HTMLParseError',
|
||||
@@ -2948,5 +2955,6 @@ __all__ = [
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_xpath',
|
||||
'compat_zip',
|
||||
'workaround_optparse_bug9161',
|
||||
]
|
||||
|
@@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
if not ctx['live']:
|
||||
total_frags_str = '%d' % ctx['total_frags']
|
||||
ad_frags = ctx.get('ad_frags', 0)
|
||||
if ad_frags:
|
||||
total_frags_str += ' (not including %d ad)' % ad_frags
|
||||
else:
|
||||
total_frags_str = 'unknown (live)'
|
||||
self.to_screen(
|
||||
'[%s] Total fragments: %s'
|
||||
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
|
||||
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
@@ -151,10 +157,15 @@ class FragmentFD(FileDownloader):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
self._read_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0 and resume_len == 0:
|
||||
self.report_error(
|
||||
'Inconsistent state of incomplete fragment download. '
|
||||
'Restarting from the beginning...')
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
self._write_ytdl_file(ctx)
|
||||
else:
|
||||
self._write_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0:
|
||||
assert resume_len > 0
|
||||
assert ctx['fragment_index'] == 0
|
||||
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||
|
||||
|
@@ -75,15 +75,29 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
total_frags = 0
|
||||
def anvato_ad(s):
|
||||
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
total_frags += 1
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if anvato_ad(line):
|
||||
ad_frags += 1
|
||||
continue
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
continue
|
||||
media_frags += 1
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
'total_frags': media_frags,
|
||||
'ad_frags': ad_frags,
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
@@ -101,10 +115,14 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
byte_range = {}
|
||||
frag_index = 0
|
||||
ad_frag_next = False
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
continue
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
@@ -175,6 +193,8 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif anvato_ad(line):
|
||||
ad_frag_next = True
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
|
@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# adult video
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
|
||||
'info_dict': {
|
||||
'id': '20171001_F1AE1711_196617479_1',
|
||||
'ext': 'mp4',
|
||||
'title': '[생]서아 초심 찾기 방송 (part 1)',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'BJ서아',
|
||||
'uploader_id': 'bjdyrksu',
|
||||
'upload_date': '20171001',
|
||||
'duration': 3600,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
||||
video_id, query={'nTitleNo': video_id})
|
||||
video_id, query={
|
||||
'nTitleNo': video_id,
|
||||
'partialView': 'SKIP_ADULT',
|
||||
})
|
||||
|
||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
||||
if flag and flag != 'SUCCEED':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, flag), expected=True)
|
||||
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
|
||||
if video_element is None or video_element.text is None:
|
||||
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class AfreecaTVGlobalIE(AfreecaTVIE):
|
||||
IE_NAME = 'afreecatv:global'
|
||||
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://afreeca.tv/36853014/v/58301',
|
||||
'info_dict': {
|
||||
'id': '58301',
|
||||
'title': 'tryhard top100',
|
||||
'uploader_id': '36853014',
|
||||
'uploader': 'makgi Hearthstone Live!',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_type = 'video' if video_id else 'live'
|
||||
query = {
|
||||
'pt': 'view',
|
||||
'bid': channel_id,
|
||||
}
|
||||
if video_id:
|
||||
query['vno'] = video_id
|
||||
video_data = self._download_json(
|
||||
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
|
||||
video_id or channel_id, query=query)['channel']
|
||||
|
||||
if video_data.get('result') != 1:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
info = {
|
||||
'thumbnail': video_data.get('thumb'),
|
||||
'view_count': int_or_none(video_data.get('vcnt')),
|
||||
'age_limit': int_or_none(video_data.get('grade')),
|
||||
'uploader_id': channel_id,
|
||||
'uploader': video_data.get('cname'),
|
||||
}
|
||||
|
||||
if video_id:
|
||||
entries = []
|
||||
for i, f in enumerate(video_data.get('flist', [])):
|
||||
video_key = self.parse_video_key(f.get('key', ''))
|
||||
f_url = f.get('file')
|
||||
if not video_key or not f_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(f.get('length')),
|
||||
'url': f_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
})
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
i = entries[0].copy()
|
||||
i.update(info)
|
||||
info = i
|
||||
else:
|
||||
formats = []
|
||||
for s in video_data.get('strm', []):
|
||||
s_url = s.get('purl')
|
||||
if not s_url:
|
||||
continue
|
||||
stype = s.get('stype')
|
||||
if stype == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||
elif stype == 'RTMP':
|
||||
format_id = [stype]
|
||||
label = s.get('label')
|
||||
if label:
|
||||
format_id.append(label)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': s_url,
|
||||
'tbr': int_or_none(s.get('bps')),
|
||||
'height': int_or_none(s.get('brt')),
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': channel_id,
|
||||
'title': self._live_title(title),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return info
|
||||
|
85
youtube_dl/extractor/americastestkitchen.py
Executable file
85
youtube_dl/extractor/americastestkitchen.py
Executable file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '1_5g5zua6e',
|
||||
'title': 'Summer Dinner Party',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1497285541,
|
||||
'upload_date': '20170612',
|
||||
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
||||
'release_date': '20170617',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 17,
|
||||
'episode': 'Summer Dinner Party',
|
||||
'episode_number': 24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
'description') or ep_meta.get('description'))
|
||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||
|
||||
season_number = int_or_none(ep_meta.get('season_number'))
|
||||
episode = ep_meta.get('title')
|
||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': release_date,
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
@@ -3,16 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
# German-speaking countries of Europe
|
||||
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
@@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Flash videos
|
||||
'url': 'https://www.anime-on-demand.de/anime/12',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -72,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(login_form))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
post_url = urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
post_url, None, 'Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form), headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
@@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
title = attributes.get('data-dialog-header')
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
|
||||
playlist_url = attributes.get(playlist_key)
|
||||
if isinstance(playlist_url, compat_str) and re.match(
|
||||
r'/?[\da-zA-Z]+', playlist_url):
|
||||
@@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
request = sanitized_Request(
|
||||
compat_urlparse.urljoin(url, playlist_url),
|
||||
item_id_list = []
|
||||
if format_id:
|
||||
item_id_list.append(format_id)
|
||||
item_id_list.append('videomaterial')
|
||||
playlist = self._download_json(
|
||||
urljoin(url, playlist_url), video_id,
|
||||
'Downloading %s JSON' % ' '.join(item_id_list),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
})
|
||||
playlist = self._download_json(
|
||||
request, video_id, 'Downloading %s playlist JSON' % format_id,
|
||||
fatal=False)
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
stream_url)
|
||||
if rtmp:
|
||||
formats.append({
|
||||
'url': rtmp.group('url'),
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'page_url': url,
|
||||
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
|
||||
'rtmp_real_time': True,
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
continue
|
||||
start_video = playlist.get('startvideo', 0)
|
||||
playlist = playlist.get('playlist')
|
||||
if not playlist or not isinstance(playlist, list):
|
||||
@@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
f.update({
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
|
||||
if tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
if media_format == 'm3u8' and tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
|
@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
|
@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
|
@@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
|
@@ -9,6 +9,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
cpl_url = urljoin(url, cpl_url)
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
@@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '2000'
|
||||
beeg_version = beeg_version or '2185'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.')
|
||||
if video:
|
||||
break
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
|
@@ -3,24 +3,104 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_type, fatal=False))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id=format_type, fatal=False))
|
||||
elif format_type == 'HSS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CanvasEenIE(InfoExtractor):
|
||||
IE_DESC = 'canvas.be and een.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
'md5': 'ed66976748d12350b118455979cca293',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'De afspraak veilt voor de Warmste Week',
|
||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||
@@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Pagina niet gevonden',
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'info_dict': {
|
||||
@@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Episode no longer available',
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
@@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = (self._search_regex(
|
||||
title = strip_or_none(self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage)).strip()
|
||||
webpage, default=None))
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id=format_type, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
|
||||
webpage)
|
||||
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
|
@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':theopposition',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
@@ -2322,7 +2322,6 @@ class InfoExtractor(object):
|
||||
formats = self._parse_jwplayer_formats(
|
||||
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
|
||||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
@@ -2339,16 +2338,25 @@ class InfoExtractor(object):
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entries.append({
|
||||
entry = {
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
}
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
|
||||
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
|
||||
entry.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': formats[0]['url'],
|
||||
})
|
||||
else:
|
||||
self._sort_formats(formats)
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
@@ -2449,10 +2457,12 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||
cookie = compat_cookiejar.Cookie(
|
||||
0, name, value, None, None, domain, None,
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
0, name, value, port, port is not None, domain, True,
|
||||
domain.startswith('.'), path, True, secure, expire_time,
|
||||
discard, None, None, rest)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
|
@@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video_params(self, webpage):
|
||||
query = {}
|
||||
params = self._search_regex(
|
||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||
if params:
|
||||
query.update({
|
||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||
})
|
||||
def _extract_video_params(self, webpage, display_id):
|
||||
query = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
|
||||
default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
if query:
|
||||
query['videoId'] = self._search_regex(
|
||||
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
|
||||
webpage, 'video id', default=None)
|
||||
else:
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||
@@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
|
||||
video_id = params['videoId']
|
||||
|
||||
video_info = None
|
||||
if params.get('playerId'):
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', fatal=False, query=params)
|
||||
if info_page:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=params)
|
||||
else:
|
||||
|
||||
# New API path
|
||||
query = params.copy()
|
||||
query['embedType'] = 'inline'
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/embed-api.json', video_id,
|
||||
'Downloading embed info', fatal=False, query=query)
|
||||
|
||||
# Old fallbacks
|
||||
if not info_page:
|
||||
if params.get('playerId'):
|
||||
info_page = self._download_json(
|
||||
'http://player.cnevids.com/player/video.js', video_id,
|
||||
'Downloading video info', fatal=False, query=params)
|
||||
if info_page:
|
||||
video_info = info_page.get('video')
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=params)
|
||||
if not video_info:
|
||||
info_page = self._download_webpage(
|
||||
'https://player.cnevids.com/inline/video/%s.js' % video_id,
|
||||
video_id, 'Downloading inline info', query={
|
||||
@@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
|
||||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
params = self._extract_video_params(webpage)
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
info.update(self._extract_video(params))
|
||||
|
@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
# vevo embed
|
||||
vevo_id = self._search_regex(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage, 'vevo embed', default=None)
|
||||
if vevo_id:
|
||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||
@@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_TESTS = [{
|
||||
|
@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
|
||||
'id': '176747451',
|
||||
'title': 'Best!',
|
||||
'uploader': 'Anonymous',
|
||||
'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
|
||||
'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'skip': 'Only available in .de',
|
||||
|
@@ -31,14 +31,12 @@ from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVGlobalIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anvato import AnvatoIE
|
||||
@@ -149,7 +147,10 @@ from .camdemy import (
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
@@ -380,6 +381,7 @@ from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
@@ -482,6 +484,7 @@ from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
@@ -768,6 +771,7 @@ from .ora import OraTVIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4IE,
|
||||
ORFFM4StoryIE,
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
@@ -807,6 +811,7 @@ from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
@@ -936,6 +941,7 @@ from .skynewsarabia import (
|
||||
)
|
||||
from .skysports import SkySportsIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
@@ -1239,7 +1245,10 @@ from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voot import VootIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vrak import VrakIE
|
||||
@@ -1338,7 +1347,6 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSharedVideoIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
|
@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10153664894881749',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1456259628,
|
||||
'upload_date': '20160223',
|
||||
'uploader': 'Barack Obama',
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
'title': 'Holocaust survivor becomes US citizen',
|
||||
'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
|
||||
'timestamp': 1477818095,
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': 1477305000,
|
||||
'upload_date': '20161024',
|
||||
'uploader': 'La Guía Del Varón',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
@@ -53,14 +56,7 @@ class FOXIE(AdobePassIE):
|
||||
})
|
||||
|
||||
title = video['name']
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
video['videoRelease']['url'], video_id)['playURL']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
release_url = video['videoRelease']['url']
|
||||
|
||||
description = video.get('description')
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
@@ -84,7 +80,7 @@ class FOXIE(AdobePassIE):
|
||||
# TODO: AP
|
||||
pass
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
@@ -97,5 +93,22 @@ class FOXIE(AdobePassIE):
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
||||
video_url = compat_str(urlh.geturl())
|
||||
|
||||
if UplynkPreplayIE.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
|
||||
info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
|
||||
info = json.loads(info_json)
|
||||
|
||||
return {
|
||||
|
43
youtube_dl/extractor/funk.py
Normal file
43
youtube_dl/extractor/funk.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
|
||||
'md5': '4d40974481fa3475f8bccfd20c5361f8',
|
||||
'info_dict': {
|
||||
'id': '716599',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neue Rechte Welle',
|
||||
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
|
||||
'timestamp': 1501337639,
|
||||
'upload_date': '20170729',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
domain_id = NexxIE._extract_domain_id(webpage) or '741'
|
||||
nexx_id = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
|
||||
webpage, 'media player'))['data-id']
|
||||
|
||||
return self.url_result(
|
||||
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
|
||||
video_id=nexx_id)
|
@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
|
@@ -22,6 +22,8 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
is_html,
|
||||
js_to_json,
|
||||
KNOWN_EXTENSIONS,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
|
||||
from .joj import JojIE
|
||||
from .megaphone import MegaphoneIE
|
||||
from .vzaar import VzaarIE
|
||||
from .channel9 import Channel9IE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1088,7 +1091,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150212',
|
||||
'uploader': 'The National Archives UK',
|
||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||
'description': 'md5:8078af856dca76edc42910b61273dbbf',
|
||||
'uploader_id': 'NationalArchives08',
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
@@ -1104,7 +1107,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'does not contain a video anymore',
|
||||
},
|
||||
# Complex jwplayer
|
||||
{
|
||||
@@ -1113,6 +1117,7 @@ class GenericIE(InfoExtractor):
|
||||
'id': 'videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'king machine trailer 1',
|
||||
'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
@@ -1130,13 +1135,42 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# Video.js embed, multiple formats
|
||||
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||
'info_dict': {
|
||||
'id': 'yygqldloqIk',
|
||||
'ext': 'mp4',
|
||||
'title': 'SolidWorks. Урок 6 Настройка чертежа',
|
||||
'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
|
||||
'upload_date': '20130314',
|
||||
'uploader': 'PROстое3D',
|
||||
'uploader_id': 'PROstoe3D',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Video.js embed, single format
|
||||
'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
|
||||
'info_dict': {
|
||||
'id': 'watch',
|
||||
'ext': 'mp4',
|
||||
'title': 'Step 1 - Good Foundation',
|
||||
'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'aanslagen-kopenhagen',
|
||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||
'title': 'Aanslagen Kopenhagen',
|
||||
}
|
||||
},
|
||||
# Zapiks embed
|
||||
@@ -1268,6 +1302,7 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable.',
|
||||
},
|
||||
# Pladform embed
|
||||
{
|
||||
@@ -1281,6 +1316,7 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
# Playwire embed
|
||||
{
|
||||
@@ -1301,6 +1337,14 @@ class GenericIE(InfoExtractor):
|
||||
'id': '518726732',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
|
||||
'timestamp': 1427237531,
|
||||
'uploader': 'Crunch Report',
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
@@ -1352,16 +1396,20 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20140107',
|
||||
'timestamp': 1389118457,
|
||||
},
|
||||
'skip': 'Invalid Page URL',
|
||||
},
|
||||
# NBC News embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
|
||||
'md5': '1aa589c675898ae6d37a17913cf68d66',
|
||||
'info_dict': {
|
||||
'id': '701714499682',
|
||||
'id': 'x_dtl_oa_LettermanliftPR_160608',
|
||||
'ext': 'mp4',
|
||||
'title': 'PREVIEW: On Assignment: David Letterman',
|
||||
'title': 'David Letterman: A Preview',
|
||||
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
|
||||
'upload_date': '20160609',
|
||||
'timestamp': 1465431544,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
@@ -1378,6 +1426,7 @@ class GenericIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON Expecting value'],
|
||||
},
|
||||
# Ooyala embed
|
||||
{
|
||||
@@ -1385,7 +1434,7 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
|
||||
'ext': 'mp4',
|
||||
'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
|
||||
'description': 'Index/Match versus VLOOKUP.',
|
||||
'title': 'This is what separates the Excel masters from the wannabes',
|
||||
'duration': 191.933,
|
||||
},
|
||||
@@ -1423,7 +1472,8 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20150622',
|
||||
'uploader': 'Public Sénat',
|
||||
'uploader_id': 'xa9gza',
|
||||
}
|
||||
},
|
||||
'skip': 'File not found.',
|
||||
},
|
||||
# OnionStudios embed
|
||||
{
|
||||
@@ -1581,22 +1631,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
# Nexx embed
|
||||
{
|
||||
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
|
||||
'info_dict': {
|
||||
'id': '247746',
|
||||
'ext': 'mp4',
|
||||
'title': "Yesterday's Jam (OV)",
|
||||
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
|
||||
'timestamp': 1492594816,
|
||||
'upload_date': '20170419',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Facebook <iframe> embed
|
||||
{
|
||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||
@@ -1879,6 +1913,15 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Building A Business Online: Principal Chairs Q & A',
|
||||
},
|
||||
},
|
||||
{
|
||||
# multiple HTML5 videos on one page
|
||||
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
|
||||
'info_dict': {
|
||||
'id': 'keyscenarios',
|
||||
'title': 'Rescue Kit 14 Free Edition - Getting started',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2166,7 +2209,7 @@ class GenericIE(InfoExtractor):
|
||||
# And then there are the jokers who advertise that they use RTA,
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
|
||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
@@ -2228,7 +2271,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||
@@ -2627,7 +2670,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for UDN embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
|
||||
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
|
||||
@@ -2831,6 +2874,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
|
||||
|
||||
channel9_urls = Channel9IE._extract_urls(webpage)
|
||||
if channel9_urls:
|
||||
return self.playlist_from_matches(
|
||||
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
@@ -2849,13 +2897,20 @@ class GenericIE(InfoExtractor):
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
for entry in entries:
|
||||
entry.update({
|
||||
if len(entries) == 1:
|
||||
entries[0].update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
})
|
||||
else:
|
||||
for num, entry in enumerate(entries, start=1):
|
||||
entry.update({
|
||||
'id': '%s-%s' % (video_id, num),
|
||||
'title': '%s (%d)' % (video_title, num),
|
||||
})
|
||||
for entry in entries:
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
@@ -2864,6 +2919,46 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
|
||||
# Video.js embed
|
||||
mobj = re.search(
|
||||
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
sources = self._parse_json(
|
||||
mobj.group(1), video_id, transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
if not isinstance(sources, list):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
if isinstance(src_type, compat_str):
|
||||
src_type = src_type.lower()
|
||||
ext = determine_ext(src).lower()
|
||||
if src_type == 'video/youtube':
|
||||
return self.url_result(src, YoutubeIE.ie_key())
|
||||
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': (mimetype2ext(src_type) or
|
||||
ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@@ -2957,7 +3052,7 @@ class GenericIE(InfoExtractor):
|
||||
# be supported by youtube-dl thus this is checked the very last (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
if embed_url and embed_url != url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not found:
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
@@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor):
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
}, {
|
||||
# YouTube embed
|
||||
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
|
||||
'md5': 'e403d2b43fea8e405e88e3f8623909f1',
|
||||
'info_dict': {
|
||||
'id': '6kmWbXleKW4',
|
||||
'ext': 'mp4',
|
||||
'title': 'NEU IM SEPTEMBER | Netflix',
|
||||
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
|
||||
'upload_date': '20170830',
|
||||
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
|
||||
'uploader_id': 'netflixdach',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
@@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
@@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor):
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query={
|
||||
'container': container_id,
|
||||
|
@@ -11,45 +11,20 @@ from ..utils import (
|
||||
|
||||
|
||||
class HowStuffWorksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
|
||||
_VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||
'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
|
||||
'md5': '76646a5acc0c92bf7cd66751ca5db94d',
|
||||
'info_dict': {
|
||||
'id': '450221',
|
||||
'ext': 'flv',
|
||||
'title': 'Cool Jobs - Iditarod Musher',
|
||||
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
|
||||
'display_id': 'cool-jobs-iditarod-musher',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 161,
|
||||
},
|
||||
'skip': 'Video broken',
|
||||
},
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
|
||||
'info_dict': {
|
||||
'id': '453464',
|
||||
'id': '855410',
|
||||
'ext': 'mp4',
|
||||
'title': 'Survival Zone: Food and Water In the Savanna',
|
||||
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
|
||||
'display_id': 'survival-zone-food-and-water-in-the-savanna',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
|
||||
'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||
'info_dict': {
|
||||
'id': '440011',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
|
||||
'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
|
||||
(?:
|
||||
hrti:(?P<short_id>[0-9]+)|
|
||||
https?://
|
||||
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
|
||||
}, {
|
||||
'url': 'hrti:2181385',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
|
||||
|
||||
|
||||
class HRTiPlaylistIE(HRTiBaseIE):
|
||||
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
|
||||
'info_dict': {
|
||||
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = 'pcmag'
|
||||
|
||||
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
|
||||
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
|
@@ -8,7 +8,10 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import determine_ext
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
)
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
|
||||
@@ -68,21 +71,22 @@ class InfoQIE(BokeCCBaseIE):
|
||||
'play_path': playpath,
|
||||
}]
|
||||
|
||||
def _extract_cookies(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id)
|
||||
def _extract_cf_auth(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return {
|
||||
'Policy': policy,
|
||||
'Signature': signature,
|
||||
'Key-Pair-Id': key_pair_id,
|
||||
}
|
||||
|
||||
def _extract_http_video(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
|
||||
return [{
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {
|
||||
'Cookie': self._extract_cookies(webpage)
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
@@ -91,22 +95,20 @@ class InfoQIE(BokeCCBaseIE):
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
# so probe URL to make sure
|
||||
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||
if not self._is_valid_url(http_audio_url, video_id):
|
||||
return []
|
||||
|
||||
return [{
|
||||
'format_id': 'http_audio',
|
||||
'url': http_audio_url,
|
||||
'vcodec': 'none',
|
||||
'http_headers': cookies_header,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
|
||||
config_url = self._html_search_regex(
|
||||
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
|
||||
r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
|
||||
webpage, 'config URL')
|
||||
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||
|
||||
|
149
youtube_dl/extractor/kakao.py
Normal file
149
youtube_dl/extractor/kakao.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class KakaoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
||||
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||
'md5': '702b2fbdeb51ad82f5c904e8c0766340',
|
||||
'info_dict': {
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
|
||||
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||
'info_dict': {
|
||||
'id': '300103180',
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader': '쇼 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_header = {
|
||||
'Referer': update_url_query(
|
||||
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
|
||||
'service': 'kakao_tv',
|
||||
'autoplay': '1',
|
||||
'profile': 'HIGH',
|
||||
'wmode': 'transparent',
|
||||
})
|
||||
}
|
||||
|
||||
QUERY_COMMON = {
|
||||
'player': 'monet_html5',
|
||||
'referer': url,
|
||||
'uuid': '',
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'dteType': 'PC',
|
||||
}
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
||||
impress = self._download_json(
|
||||
'%s/%s/impress' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video info',
|
||||
query=query, headers=player_header)
|
||||
|
||||
clip_link = impress['clipLink']
|
||||
clip = clip_link['clip']
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
tid = impress.get('tid', '')
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query.update({
|
||||
'tid': tid,
|
||||
'profile': 'HIGH',
|
||||
})
|
||||
raw = self._download_json(
|
||||
'%s/%s/raw' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video formats info',
|
||||
query=query, headers=player_header)
|
||||
|
||||
formats = []
|
||||
for fmt in raw.get('outputList', []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
fmt_url_json = self._download_json(
|
||||
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
||||
video_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query={
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'tid': tid,
|
||||
'profile': profile_name
|
||||
}, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
continue
|
||||
|
||||
fmt_url = fmt_url_json['url']
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'format_id': profile_name,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'format_note': fmt.get('label'),
|
||||
'filesize': int_or_none(fmt.get('filesize'))
|
||||
})
|
||||
except KeyError:
|
||||
pass
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbs = []
|
||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
if top_thumbnail:
|
||||
thumbs.append({
|
||||
'url': top_thumbnail,
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clip.get('description'),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'view_count': int_or_none(clip.get('playCount')),
|
||||
'like_count': int_or_none(clip.get('likeCount')),
|
||||
'comment_count': int_or_none(clip.get('commentCount')),
|
||||
'formats': formats,
|
||||
}
|
@@ -287,6 +287,9 @@ class KalturaIE(InfoExtractor):
|
||||
# skip for now.
|
||||
if f.get('fileExt') == 'chun':
|
||||
continue
|
||||
# DRM-protected video, cannot be decrypted
|
||||
if f.get('fileExt') == 'wvm':
|
||||
continue
|
||||
if not f.get('fileExt'):
|
||||
# QT indicates QuickTime; some videos have broken fileExt
|
||||
if f.get('containerFormat') == 'qt':
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .canvas import CanvasIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
|
||||
'md5': '6bdeb65998930251bbd1c510750edba9',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'ext': 'mp4',
|
||||
@@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
# mzid in playerConfig instead of sources
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
mzid = config.get('mzid')
|
||||
if mzid:
|
||||
return self.url_result(
|
||||
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
||||
CanvasIE.ie_key(), video_id=mzid)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = []
|
||||
|
@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
info = {
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
|
||||
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
|
||||
}
|
||||
video_data = self._download_json(stream_url, content_id)
|
||||
is_live = video_data.get('isLive')
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||
'info_dict': {
|
||||
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_AGE_LIMITS = {
|
||||
'N-7': 7,
|
||||
|
@@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
|
||||
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
@@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class MakerTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
|
||||
_TEST = {
|
||||
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
|
||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||
|
@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
|
||||
|
||||
format_url = self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats = self._extract_wowza_formats(
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class MeipaiIE(InfoExtractor):
|
||||
IE_DESC = '美拍'
|
||||
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# regular uploaded video
|
||||
'url': 'http://www.meipai.com/media/531697625',
|
||||
|
@@ -12,12 +12,16 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
compat_zip
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
str_to_int,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -54,27 +58,12 @@ class MixcloudIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_keys = [
|
||||
'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
|
||||
'pleasedontdownloadourmusictheartistswontgetpaid',
|
||||
'window.addEventListener = window.addEventListener || function() {};',
|
||||
'(function() { return new Date().toLocaleDateString(); })()'
|
||||
]
|
||||
_current_key = None
|
||||
|
||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
||||
def _decrypt_play_info(self, play_info, video_id):
|
||||
play_info = base64.b64decode(play_info.encode('ascii'))
|
||||
for num, key in enumerate(self._keys, start=1):
|
||||
try:
|
||||
return self._parse_json(
|
||||
''.join([
|
||||
compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
|
||||
for idx, ch in enumerate(play_info)]),
|
||||
video_id)
|
||||
except ExtractorError:
|
||||
if num == len(self._keys):
|
||||
raise
|
||||
@staticmethod
|
||||
def _decrypt_xor_cipher(key, ciphertext):
|
||||
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||
return ''.join([
|
||||
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -84,54 +73,119 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
if not self._current_key:
|
||||
js_url = self._search_regex(
|
||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
|
||||
webpage, 'js url', default=None)
|
||||
if js_url:
|
||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
||||
if js:
|
||||
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
||||
for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
|
||||
key = self._search_regex(
|
||||
KEY_RE_TEMPLATE % key_name, js, 'key',
|
||||
default=None, group='key')
|
||||
if key and isinstance(key, compat_str):
|
||||
self._keys.insert(0, key)
|
||||
self._current_key = key
|
||||
# Legacy path
|
||||
encrypted_play_info = self._search_regex(
|
||||
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
|
||||
|
||||
if encrypted_play_info is not None:
|
||||
# Decode
|
||||
encrypted_play_info = base64.b64decode(encrypted_play_info)
|
||||
else:
|
||||
# New path
|
||||
full_info_json = self._parse_json(self._html_search_regex(
|
||||
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
||||
webpage, 'play info'), 'play info')
|
||||
for item in full_info_json:
|
||||
item_data = try_get(
|
||||
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||
dict)
|
||||
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
||||
info_json = item_data
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Failed to extract matching stream info')
|
||||
|
||||
message = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||
webpage, 'error message', default=None)
|
||||
|
||||
encrypted_play_info = self._search_regex(
|
||||
r'm-play-info="([^"]+)"', webpage, 'play info')
|
||||
js_url = self._search_regex(
|
||||
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
|
||||
webpage, 'js url')
|
||||
js = self._download_webpage(js_url, track_id, 'Downloading JS')
|
||||
# Known plaintext attack
|
||||
if encrypted_play_info:
|
||||
kps = ['{"stream_url":']
|
||||
kpa_target = encrypted_play_info
|
||||
else:
|
||||
kps = ['https://', 'http://']
|
||||
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
|
||||
for kp in kps:
|
||||
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
|
||||
for quote in ["'", '"']:
|
||||
key = self._search_regex(
|
||||
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
|
||||
js, 'encryption key', default=None)
|
||||
if key is not None:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Failed to extract encryption key')
|
||||
|
||||
play_info = self._decrypt_play_info(encrypted_play_info, track_id)
|
||||
if encrypted_play_info is not None:
|
||||
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
|
||||
if message and 'stream_url' not in play_info:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
song_url = play_info['stream_url']
|
||||
formats = [{
|
||||
'format_id': 'normal',
|
||||
'url': song_url
|
||||
}]
|
||||
|
||||
if message and 'stream_url' not in play_info:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>',
|
||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||
webpage, 'play count', default=None))
|
||||
|
||||
song_url = play_info['stream_url']
|
||||
else:
|
||||
title = info_json['name']
|
||||
thumbnail = urljoin(
|
||||
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
|
||||
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
|
||||
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
|
||||
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
|
||||
description = try_get(info_json, lambda x: x['description'])
|
||||
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
|
||||
|
||||
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>',
|
||||
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
|
||||
webpage, 'play count', default=None))
|
||||
stream_info = info_json['streamInfo']
|
||||
formats = []
|
||||
|
||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif url_key == 'dashUrl':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
decrypted, track_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': decrypted,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'url': song_url,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
@@ -237,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
functools.partial(
|
||||
self._tracks_page_func,
|
||||
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, video_id, '%s (%s)' % (username, list_type), description)
|
||||
|
@@ -8,8 +8,8 @@ from .common import InfoExtractor
|
||||
|
||||
class MorningstarIE(InfoExtractor):
|
||||
IE_DESC = 'morningstar.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor):
|
||||
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
|
||||
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, 'mgid', default=None)
|
||||
|
||||
if not mgid:
|
||||
|
@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
|
||||
else:
|
||||
video_playpath = ''
|
||||
|
||||
video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
|
@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
video_id = theplatform_path.split('/')[-1]
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
|
@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
error = self._html_search_regex(
|
||||
|
@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NBCIE(AdobePassIE):
|
||||
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||
permalink = 'http' + permalink
|
||||
video_data = self._download_json(
|
||||
'https://api.nbc.com/v3/videos', video_id, query={
|
||||
'filter[permalink]': permalink,
|
||||
@@ -109,10 +110,10 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
'info_dict': {
|
||||
'id': '9CsDKds0kvHI',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
'timestamp': 1426270238,
|
||||
@@ -120,7 +121,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -134,7 +135,8 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._og_search_video_url(webpage)
|
||||
theplatform_url = self._og_search_video_url(webpage).replace(
|
||||
'vplayer.nbcsports.com', 'player.theplatform.com')
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
|
||||
|
||||
|
@@ -18,7 +18,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class NexxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
|
||||
nexx:(?P<domain_id_s>\d+):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# movie
|
||||
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||
@@ -62,8 +68,18 @@ class NexxIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nexx:748:128907',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_domain_id(webpage):
|
||||
mobj = re.search(
|
||||
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
||||
webpage)
|
||||
return mobj.group('id') if mobj else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Reference:
|
||||
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
|
||||
entries = []
|
||||
|
||||
# JavaScript Integration
|
||||
mobj = re.search(
|
||||
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
|
||||
webpage)
|
||||
if mobj:
|
||||
domain_id = mobj.group('id')
|
||||
domain_id = NexxIE._extract_domain_id(webpage)
|
||||
if domain_id:
|
||||
for video_id in re.findall(
|
||||
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
|
||||
webpage):
|
||||
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain_id, video_id = mobj.group('domain_id', 'id')
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Reverse engineered from JS code (see getDeviceID function)
|
||||
device_id = '%d:%d:%d%d' % (
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
@@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
|
||||
'timestamp': 1491399228,
|
||||
'upload_date': '20170405',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': 'RPM+',
|
||||
},
|
||||
'params': {
|
||||
@@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '5395865725001',
|
||||
'title': 'Épisode 13 : Les retrouvailles',
|
||||
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
|
||||
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492019320,
|
||||
'upload_date': '20170412',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': "L'amour est dans le pré",
|
||||
'season_number': 5,
|
||||
'episode': 'Épisode 13',
|
||||
@@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
|
||||
video_id)['data']
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content = try_get(data, lambda x: x['contents'][0])
|
||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||
|
||||
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
title = try_get(
|
||||
data, lambda x: x['video']['nom'],
|
||||
compat_str) or self._html_search_meta(
|
||||
'dcterms.Title', webpage, 'title', fatal=True)
|
||||
|
||||
description = self._html_search_meta(
|
||||
('dcterms.Description', 'description'), webpage, 'description')
|
||||
|
||||
series = try_get(
|
||||
data, (
|
||||
lambda x: x['show']['title'],
|
||||
lambda x: x['season']['show']['title']),
|
||||
compat_str)
|
||||
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||
webpage, 'series', default=None)
|
||||
|
||||
episode = None
|
||||
og = data.get('og')
|
||||
if isinstance(og, dict) and og.get('type') == 'video.episode':
|
||||
episode = og.get('title')
|
||||
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||
season = try_get(season_el, lambda x: x['nom'], compat_str)
|
||||
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||
|
||||
video = content or data
|
||||
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
|
||||
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title'),
|
||||
'creator': video.get('source'),
|
||||
'view_count': int_or_none(video.get('viewsCount')),
|
||||
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season_number': int_or_none(try_get(
|
||||
data, lambda x: x['season']['seasonNumber'])),
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(data.get('episodeNumber')),
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
|
||||
|
||||
class HetKlokhuisIE(NPODataMidEmbedIE):
|
||||
IE_NAME = 'hetklokhuis'
|
||||
_VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||
|
||||
|
@@ -13,11 +13,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class OnionStudiosIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
|
||||
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
||||
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
|
||||
'md5': '719d1f8c32094b8c33902c17bcae5e34',
|
||||
'info_dict': {
|
||||
'id': '2937',
|
||||
'ext': 'mp4',
|
||||
@@ -29,12 +29,15 @@ class OnionStudiosIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/video/6139.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
|
||||
r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
|
@@ -1,14 +1,244 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_kwargs,
|
||||
)
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
determine_ext,
|
||||
encodeArgument,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
def cookie_to_dict(cookie):
|
||||
cookie_dict = {
|
||||
'name': cookie.name,
|
||||
'value': cookie.value,
|
||||
}
|
||||
if cookie.port_specified:
|
||||
cookie_dict['port'] = cookie.port
|
||||
if cookie.domain_specified:
|
||||
cookie_dict['domain'] = cookie.domain
|
||||
if cookie.path_specified:
|
||||
cookie_dict['path'] = cookie.path
|
||||
if cookie.expires is not None:
|
||||
cookie_dict['expires'] = cookie.expires
|
||||
if cookie.secure is not None:
|
||||
cookie_dict['secure'] = cookie.secure
|
||||
if cookie.discard is not None:
|
||||
cookie_dict['discard'] = cookie.discard
|
||||
try:
|
||||
if (cookie.has_nonstandard_attr('httpOnly') or
|
||||
cookie.has_nonstandard_attr('httponly') or
|
||||
cookie.has_nonstandard_attr('HttpOnly')):
|
||||
cookie_dict['httponly'] = True
|
||||
except TypeError:
|
||||
pass
|
||||
return cookie_dict
|
||||
|
||||
|
||||
def cookie_jar_to_list(cookie_jar):
|
||||
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
||||
|
||||
|
||||
class PhantomJSwrapper(object):
|
||||
"""PhantomJS wrapper class
|
||||
|
||||
This class is experimental.
|
||||
"""
|
||||
|
||||
_TEMPLATE = r'''
|
||||
phantom.onError = function(msg, trace) {{
|
||||
var msgStack = ['PHANTOM ERROR: ' + msg];
|
||||
if(trace && trace.length) {{
|
||||
msgStack.push('TRACE:');
|
||||
trace.forEach(function(t) {{
|
||||
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
|
||||
+ (t.function ? ' (in function ' + t.function +')' : ''));
|
||||
}});
|
||||
}}
|
||||
console.error(msgStack.join('\n'));
|
||||
phantom.exit(1);
|
||||
}};
|
||||
var page = require('webpage').create();
|
||||
var fs = require('fs');
|
||||
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
|
||||
phantom.addCookie(x);
|
||||
}});
|
||||
page.settings.resourceTimeout = {timeout};
|
||||
page.settings.userAgent = "{ua}";
|
||||
page.onLoadStarted = function() {{
|
||||
page.evaluate(function() {{
|
||||
delete window._phantom;
|
||||
delete window.callPhantom;
|
||||
}});
|
||||
}};
|
||||
var saveAndExit = function() {{
|
||||
fs.write("{html}", page.content, write);
|
||||
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
|
||||
phantom.exit();
|
||||
}};
|
||||
page.onLoadFinished = function(status) {{
|
||||
if(page.url === "") {{
|
||||
page.setContent(fs.read("{html}", read), "{url}");
|
||||
}}
|
||||
else {{
|
||||
{jscode}
|
||||
}}
|
||||
}};
|
||||
page.open("");
|
||||
'''
|
||||
|
||||
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
|
||||
|
||||
@staticmethod
|
||||
def _version():
|
||||
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
|
||||
|
||||
def __init__(self, extractor, required_version=None, timeout=10000):
|
||||
self.exe = check_executable('phantomjs', ['-v'])
|
||||
if not self.exe:
|
||||
raise ExtractorError('PhantomJS executable not found in PATH, '
|
||||
'download it from http://phantomjs.org',
|
||||
expected=True)
|
||||
|
||||
self.extractor = extractor
|
||||
|
||||
if required_version:
|
||||
version = self._version()
|
||||
if is_outdated_version(version, required_version):
|
||||
self.extractor._downloader.report_warning(
|
||||
'Your copy of PhantomJS is outdated, update it to version '
|
||||
'%s or newer if you encounter any errors.' % required_version)
|
||||
|
||||
self.options = {
|
||||
'timeout': timeout,
|
||||
}
|
||||
self._TMP_FILES = {}
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmp.close()
|
||||
self._TMP_FILES[name] = tmp
|
||||
|
||||
def __del__(self):
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
try:
|
||||
os.remove(self._TMP_FILES[name].name)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _save_cookies(self, url):
|
||||
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
|
||||
for cookie in cookies:
|
||||
if 'path' not in cookie:
|
||||
cookie['path'] = '/'
|
||||
if 'domain' not in cookie:
|
||||
cookie['domain'] = compat_urlparse.urlparse(url).netloc
|
||||
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
|
||||
f.write(json.dumps(cookies).encode('utf-8'))
|
||||
|
||||
def _load_cookies(self):
|
||||
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
|
||||
cookies = json.loads(f.read().decode('utf-8'))
|
||||
for cookie in cookies:
|
||||
if cookie['httponly'] is True:
|
||||
cookie['rest'] = {'httpOnly': None}
|
||||
if 'expiry' in cookie:
|
||||
cookie['expire_time'] = cookie['expiry']
|
||||
self.extractor._set_cookie(**compat_kwargs(cookie))
|
||||
|
||||
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
||||
"""
|
||||
Downloads webpage (if needed) and executes JS
|
||||
|
||||
Params:
|
||||
url: website url
|
||||
html: optional, html code of website
|
||||
video_id: video id
|
||||
note: optional, displayed when downloading webpage
|
||||
note2: optional, displayed when executing JS
|
||||
headers: custom http headers
|
||||
jscode: code to be executed when page is loaded
|
||||
|
||||
Returns tuple with:
|
||||
* downloaded website (after JS execution)
|
||||
* anything you print with `console.log` (but not inside `page.execute`!)
|
||||
|
||||
In most cases you don't need to add any `jscode`.
|
||||
It is executed in `page.onLoadFinished`.
|
||||
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
|
||||
It is possible to wait for some element on the webpage, for example:
|
||||
var check = function() {
|
||||
var elementFound = page.evaluate(function() {
|
||||
return document.querySelector('#b.done') !== null;
|
||||
});
|
||||
if(elementFound)
|
||||
saveAndExit();
|
||||
else
|
||||
window.setTimeout(check, 500);
|
||||
}
|
||||
|
||||
page.evaluate(function(){
|
||||
document.querySelector('#a').click();
|
||||
});
|
||||
check();
|
||||
"""
|
||||
if 'saveAndExit();' not in jscode:
|
||||
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||
if not html:
|
||||
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
|
||||
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||
f.write(html.encode('utf-8'))
|
||||
|
||||
self._save_cookies(url)
|
||||
|
||||
replaces = self.options
|
||||
replaces['url'] = url
|
||||
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||
replaces['jscode'] = jscode
|
||||
|
||||
for x in self._TMP_FILE_NAMES:
|
||||
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
with open(self._TMP_FILES['script'].name, 'wb') as f:
|
||||
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
|
||||
|
||||
if video_id is None:
|
||||
self.extractor.to_screen('%s' % (note2,))
|
||||
else:
|
||||
self.extractor.to_screen('%s: %s' % (video_id, note2))
|
||||
|
||||
p = subprocess.Popen([
|
||||
self.exe, '--ssl-protocol=any',
|
||||
self._TMP_FILES['script'].name
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
if p.returncode != 0:
|
||||
raise ExtractorError(
|
||||
'Executing JS failed\n:' + encodeArgument(err))
|
||||
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||
html = f.read().decode('utf-8')
|
||||
|
||||
self._load_cookies()
|
||||
|
||||
return (html, encodeArgument(out))
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
@@ -58,6 +288,8 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
@@ -66,47 +298,22 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||
url = 'https://openload.co/embed/%s/' % video_id
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||
raise ExtractorError('File not found', expected=True)
|
||||
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||
|
||||
ol_id = self._search_regex(
|
||||
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
|
||||
webpage, 'openload ID')
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
|
||||
|
||||
decoded = ''
|
||||
a = ol_id[0:24]
|
||||
b = []
|
||||
for i in range(0, len(a), 8):
|
||||
b.append(int(a[i:i + 8] or '0', 16))
|
||||
ol_id = ol_id[24:]
|
||||
j = 0
|
||||
k = 0
|
||||
while j < len(ol_id):
|
||||
c = 128
|
||||
d = 0
|
||||
e = 0
|
||||
f = 0
|
||||
_more = True
|
||||
while _more:
|
||||
if j + 1 >= len(ol_id):
|
||||
c = 143
|
||||
f = int(ol_id[j:j + 2] or '0', 16)
|
||||
j += 2
|
||||
d += (f & 127) << e
|
||||
e += 7
|
||||
_more = f >= c
|
||||
g = d ^ b[k % 3]
|
||||
for i in range(4):
|
||||
char_dec = (g >> 8 * i) & (c + 127)
|
||||
char = compat_chr(char_dec)
|
||||
if char != '#':
|
||||
decoded += char
|
||||
k += 1
|
||||
decoded_id = get_element_by_id('streamurl', webpage)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true'
|
||||
video_url = video_url % decoded
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
@@ -114,15 +321,17 @@ class OpenloadIE(InfoExtractor):
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
subtitles = entries[0]['subtitles'] if entries else None
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||
'url': video_url,
|
||||
# Seems all videos have extensions in their titles
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
}
|
||||
return info_dict
|
||||
|
@@ -6,14 +6,15 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
strip_jsonp,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_end,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ORFFM4StoryIE(InfoExtractor):
|
||||
IE_NAME = 'orf:fm4:story'
|
||||
IE_DESC = 'fm4.orf.at stories'
|
||||
_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fm4.orf.at/stories/2865738/',
|
||||
'playlist': [{
|
||||
'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
|
||||
'info_dict': {
|
||||
'id': '547792',
|
||||
'ext': 'flv',
|
||||
'title': 'Manu Delago und Inner Tongue live',
|
||||
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||
'duration': 1748.52,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170913',
|
||||
},
|
||||
}, {
|
||||
'md5': 'c6dd2179731f86f4f55a7b49899d515f',
|
||||
'info_dict': {
|
||||
'id': '547798',
|
||||
'ext': 'flv',
|
||||
'title': 'Manu Delago und Inner Tongue live (2)',
|
||||
'duration': 1504.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20170913',
|
||||
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
|
||||
},
|
||||
}],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
story_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, story_id)
|
||||
|
||||
entries = []
|
||||
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
||||
for idx, video_id in enumerate(all_ids):
|
||||
data = self._download_json(
|
||||
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||
video_id)[0]
|
||||
|
||||
duration = float_or_none(data['duration'], 1000)
|
||||
|
||||
video = data['sources']['q8c']
|
||||
load_balancer_url = video['loadBalancerUrl']
|
||||
abr = int_or_none(video.get('audioBitrate'))
|
||||
vbr = int_or_none(video.get('bitrate'))
|
||||
fps = int_or_none(video.get('videoFps'))
|
||||
width = int_or_none(video.get('videoWidth'))
|
||||
height = int_or_none(video.get('videoHeight'))
|
||||
thumbnail = video.get('preview')
|
||||
|
||||
rendition = self._download_json(
|
||||
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||
|
||||
f = {
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'fps': fps,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in rendition['redirect'].items():
|
||||
if format_id == 'rtmp':
|
||||
ff = f.copy()
|
||||
ff.update({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(ff)
|
||||
elif determine_ext(format_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id))
|
||||
elif determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
|
||||
if idx >= 1:
|
||||
# Titles are duplicates, make them unique
|
||||
title += ' (' + str(idx + 1) + ')'
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'dc.date', webpage, 'upload date'))
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
78
youtube_dl/extractor/popcorntv.py
Normal file
78
youtube_dl/extractor/popcorntv.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PopcornTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
|
||||
'md5': '47d65a48d147caf692ab8562fe630b45',
|
||||
'info_dict': {
|
||||
'id': '9183',
|
||||
'display_id': 'food-wars-battaglie-culinarie-episodio-01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
|
||||
'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1497610857,
|
||||
'upload_date': '20170616',
|
||||
'duration': 1440,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id, video_id = mobj.group('display_id', 'id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
m3u8_url = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
|
||||
webpage, 'content'
|
||||
))['href']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp'))
|
||||
print(self._html_search_meta(
|
||||
'duration', webpage))
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'duration', webpage), invscale=60)
|
||||
view_count = int_or_none(self._html_search_meta(
|
||||
'interactionCount', webpage, 'view count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PornFlipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
|
||||
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -35,6 +37,8 @@ class RedditIE(InfoExtractor):
|
||||
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
|
||||
mpd_id='dash', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
@@ -43,7 +47,7 @@ class RedditIE(InfoExtractor):
|
||||
|
||||
|
||||
class RedditRIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||
'info_dict': {
|
||||
@@ -81,10 +85,13 @@ class RedditRIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url, video_id = mobj.group('url', 'id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
url + '.json', video_id)[0]['data']['children'][0]['data']
|
||||
url + '/.json', video_id)[0]['data']['children'][0]['data']
|
||||
|
||||
video_url = data['url']
|
||||
|
||||
|
@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
|
||||
IE_NAME = 'rtl.nl'
|
||||
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
https?://(?:(?:www|static)\.)?
|
||||
(?:
|
||||
rtlxl\.nl/[^\#]*\#!/[^/]+/|
|
||||
rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
|
||||
rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
@@ -73,6 +73,9 @@ class RtlNlIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -10,6 +10,7 @@ from ..compat import (
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
@@ -84,6 +85,18 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'title': 'TODO',
|
||||
},
|
||||
'skip': 'The f4m manifest can\'t be used yet',
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104 ',
|
||||
'duration': 3222.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
@@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title']
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png_request = sanitized_Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
formats = []
|
||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
||||
if '?' not in video_url:
|
||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'page_url': url,
|
||||
'subtitles': subtitles,
|
||||
|
@@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor):
|
||||
video_url = self._html_search_regex(
|
||||
r'<param name="src" value="([^"]+)"', webpage, 'video url')
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+) RUHD.ru - Видео Высокого качества №1 в России!</title>',
|
||||
r'<title>([^<]+) RUHD\.ru - Видео Высокого качества №1 в России!</title>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="longdesc">(.+?)<span id="showlink">',
|
||||
|
@@ -265,8 +265,10 @@ class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||
|
||||
@staticmethod
|
||||
def suitable(url):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if not super(RutubePlaylistIE, cls).suitable(url):
|
||||
return False
|
||||
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||
|
||||
|
@@ -1,60 +1,190 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
import datetime
|
||||
import json
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .anvato import AnvatoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class ScrippsNetworksWatchIE(AdobePassIE):
|
||||
class ScrippsNetworksWatchIE(InfoExtractor):
|
||||
IE_NAME = 'scrippsnetworks:watch'
|
||||
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
watch\.
|
||||
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
|
||||
(?:
|
||||
player\.[A-Z0-9]+\.html\#|
|
||||
show/(?:[^/]+/){2}|
|
||||
player/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
|
||||
'md5': '26545fd676d939954c6808274bdb905a',
|
||||
'info_dict': {
|
||||
'id': '0256538',
|
||||
'id': '4173834',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seeking a Wow House',
|
||||
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
|
||||
'uploader': 'SCNI',
|
||||
'upload_date': '20170207',
|
||||
'timestamp': 1486450493,
|
||||
'title': 'Best Ever Treehouses',
|
||||
'description': "We're searching for the most over the top treehouses.",
|
||||
'uploader': 'ANV',
|
||||
'upload_date': '20170922',
|
||||
'timestamp': 1506056400,
|
||||
},
|
||||
'skip': 'requires TV provider authentication',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [AnvatoIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_SNI_TABLE = {
|
||||
'hgtv': 'hgtv',
|
||||
'diynetwork': 'diy',
|
||||
'foodnetwork': 'food',
|
||||
'cookingchanneltv': 'cook',
|
||||
'travelchannel': 'trav',
|
||||
'geniuskitchen': 'genius',
|
||||
}
|
||||
_SNI_HOST = 'web.api.video.snidigital.com'
|
||||
|
||||
_AWS_REGION = 'us-east-1'
|
||||
_AWS_IDENTITY_ID_JSON = json.dumps({
|
||||
'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
|
||||
})
|
||||
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
|
||||
_AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
|
||||
_AWS_SERVICE = 'execute-api'
|
||||
_AWS_REQUEST = 'aws4_request'
|
||||
_AWS_SIGNED_HEADERS = ';'.join([
|
||||
'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
|
||||
_AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
|
||||
%(uri)s
|
||||
|
||||
host:%(host)s
|
||||
x-amz-date:%(date)s
|
||||
x-amz-security-token:%(token)s
|
||||
x-api-key:%(key)s
|
||||
|
||||
%(signed_headers)s
|
||||
%(payload_hash)s'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
channel = self._parse_json(self._search_regex(
|
||||
r'"channels"\s*:\s*(\[.+\])',
|
||||
webpage, 'channels'), video_id)[0]
|
||||
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
|
||||
title = video_data['title']
|
||||
release_url = video_data['releaseUrl']
|
||||
if video_data.get('restricted'):
|
||||
requestor_id = self._search_regex(
|
||||
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id,
|
||||
video_data.get('ratings', [{}])[0].get('rating'))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
release_url = update_url_query(release_url, {'auth': auth})
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site', 'id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnailUrl'),
|
||||
'series': video_data.get('showTitle'),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
|
||||
token = self._download_json(
|
||||
'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
|
||||
data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
|
||||
headers={
|
||||
'Accept': '*/*',
|
||||
'Content-Type': 'application/x-amz-json-1.1',
|
||||
'Referer': url,
|
||||
'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
|
||||
'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
|
||||
'X-Amz-User-Agent': self._AWS_USER_AGENT,
|
||||
})['Token']
|
||||
|
||||
sts = self._download_xml(
|
||||
'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
|
||||
'Action': 'AssumeRoleWithWebIdentity',
|
||||
'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
|
||||
'RoleSessionName': 'web-identity',
|
||||
'Version': '2011-06-15',
|
||||
'WebIdentityToken': token,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Amz-User-Agent': self._AWS_USER_AGENT,
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
||||
})
|
||||
|
||||
def get(key):
|
||||
return xpath_text(
|
||||
sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
|
||||
fatal=True)
|
||||
|
||||
access_key_id = get('AccessKeyId')
|
||||
secret_access_key = get('SecretAccessKey')
|
||||
session_token = get('SessionToken')
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
|
||||
datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
date = datetime_now[:8]
|
||||
canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
|
||||
'uri': uri,
|
||||
'host': self._SNI_HOST,
|
||||
'date': datetime_now,
|
||||
'token': session_token,
|
||||
'key': self._AWS_API_KEY,
|
||||
'signed_headers': self._AWS_SIGNED_HEADERS,
|
||||
'payload_hash': aws_hash(''),
|
||||
}
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
|
||||
string_to_sign = '\n'.join([
|
||||
'AWS4-HMAC-SHA256', datetime_now, credential_string,
|
||||
aws_hash(canonical_string)])
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_secret = 'AWS4' + secret_access_key
|
||||
k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
|
||||
k_region = aws_hmac_digest(k_date, self._AWS_REGION)
|
||||
k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
|
||||
k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
|
||||
|
||||
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
|
||||
|
||||
auth_header = ', '.join([
|
||||
'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
|
||||
[access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
|
||||
'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
|
||||
'Signature=%s' % signature,
|
||||
])
|
||||
|
||||
mcp_id = self._download_json(
|
||||
'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
|
||||
'Accept': '*/*',
|
||||
'Referer': url,
|
||||
'Authorization': auth_header,
|
||||
'X-Amz-Date': datetime_now,
|
||||
'X-Amz-Security-Token': session_token,
|
||||
'X-Api-Key': self._AWS_API_KEY,
|
||||
})['results'][0]['mcpId']
|
||||
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
|
||||
{'geo_countries': ['US']}),
|
||||
AnvatoIE.ie_key(), video_id=mcp_id)
|
||||
|
@@ -18,46 +18,32 @@ from ..utils import (
|
||||
|
||||
class ShahidIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'shahid'
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
|
||||
'info_dict': {
|
||||
'id': '90574',
|
||||
'id': '275286',
|
||||
'ext': 'mp4',
|
||||
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
||||
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
||||
'duration': 2972,
|
||||
'timestamp': 1422057420,
|
||||
'upload_date': '20150123',
|
||||
'title': 'مجلس الشباب الموسم 1 كليب 1',
|
||||
'timestamp': 1506988800,
|
||||
'upload_date': '20171003',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
|
||||
'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# shahid plus subscriber only
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
|
||||
'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
def _api2_request(self, *args, **kwargs):
|
||||
try:
|
||||
user_data = self._download_json(
|
||||
'https://shahid.mbc.net/wd/service/users/login',
|
||||
None, 'Logging in', data=json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
'basic': 'false',
|
||||
}).encode('utf-8'), headers={
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
})['user']
|
||||
return self._download_json(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
fail_data = self._parse_json(
|
||||
@@ -69,6 +55,21 @@ class ShahidIE(InfoExtractor):
|
||||
raise ExtractorError(faults_message, expected=True)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
user_data = self._api2_request(
|
||||
'https://shahid.mbc.net/wd/service/users/login',
|
||||
None, 'Logging in', data=json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
'basic': 'false',
|
||||
}).encode('utf-8'), headers={
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
})['user']
|
||||
|
||||
self._download_webpage(
|
||||
'https://shahid.mbc.net/populateContext',
|
||||
None, 'Populate Context', data=urlencode_postdata({
|
||||
@@ -93,15 +94,17 @@ class ShahidIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
if page_type == 'clip':
|
||||
page_type = 'episode'
|
||||
|
||||
player = self._get_api_data(self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
|
||||
video_id, 'Downloading player JSON'))
|
||||
playout = self._api2_request(
|
||||
'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
|
||||
video_id, 'Downloading player JSON')['playout']
|
||||
|
||||
if player.get('drm'):
|
||||
if playout.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = self._get_api_data(self._download_json(
|
||||
|
34
youtube_dl/extractor/slideslive.py
Normal file
34
youtube_dl/extractor/slideslive.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
|
||||
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
|
||||
'info_dict': {
|
||||
'id': 'LMtgR8ba0b0',
|
||||
'ext': 'mp4',
|
||||
'title': '38902413: external video',
|
||||
'description': '3890241320170925-9-1yd6ech.mp4',
|
||||
'uploader': 'SlidesLive Administrator',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
url, video_id, headers={'Accept': 'application/json'})
|
||||
service_name = video_data['video_service_name']
|
||||
if service_name == 'YOUTUBE':
|
||||
yt_video_id = video_data['video_service_id']
|
||||
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Unsupported service name: {0}'.format(service_name), expected=True)
|
@@ -44,6 +44,7 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
mgid = super(SpikeIE, self)._extract_mgid(webpage)
|
||||
|
@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
r'(?s)<description>([^<]+)</description>',
|
||||
coursepage, 'description', fatal=False)
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
|
||||
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
|
||||
info['entries'] = [self.url_result(
|
||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||
) for l in links]
|
||||
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
rootpage = self._download_webpage(rootURL, info['id'],
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
|
||||
info['entries'] = [self.url_result(
|
||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||
) for l in links]
|
||||
|
@@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
get_element_by_class,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):
|
||||
'url': 'http://store.steampowered.com/video/105600/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'f870007cee7065d7c76b88f0a45ecc07',
|
||||
'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
|
||||
'info_dict': {
|
||||
'id': '81300',
|
||||
'ext': 'flv',
|
||||
'title': 'Terraria 1.1 Trailer',
|
||||
'id': '2040428',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terraria 1.3 Trailer',
|
||||
'playlist_index': 1,
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
|
||||
'md5': '911672b20064ca3263fa89650ba5a7aa',
|
||||
'info_dict': {
|
||||
'id': '80859',
|
||||
'ext': 'flv',
|
||||
'title': 'Terraria Trailer',
|
||||
'id': '2029566',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terraria 1.2 Trailer',
|
||||
'playlist_index': 2,
|
||||
}
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '105600',
|
||||
'title': 'Terraria',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
|
||||
'info_dict': {
|
||||
'id': 'WB5DvDOOvAY',
|
||||
'id': 'X8kpJBlzD2E',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20140329',
|
||||
'title': 'FRONTIERS - Final Greenlight Trailer',
|
||||
'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
|
||||
'upload_date': '20140617',
|
||||
'title': 'FRONTIERS - Trapping',
|
||||
'description': 'md5:bf6f7f773def614054089e5769c12a6e',
|
||||
'uploader': 'AAD Productions',
|
||||
'uploader_id': 'AtomicAgeDogGames',
|
||||
}
|
||||
@@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):
|
||||
self.report_age_confirmation()
|
||||
webpage = self._download_webpage(videourl, playlist_id)
|
||||
|
||||
flash_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
|
||||
'flash vars'), playlist_id, js_to_json)
|
||||
|
||||
playlist_title = None
|
||||
entries = []
|
||||
if fileID:
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
|
||||
mweb = re.finditer(r'''(?x)
|
||||
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
|
||||
''', webpage)
|
||||
videos = [{
|
||||
'_type': 'url',
|
||||
'url': vid.group('youtube_id'),
|
||||
'ie_key': 'Youtube',
|
||||
} for vid in mweb]
|
||||
else:
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
|
||||
|
||||
mweb = re.finditer(r'''(?x)
|
||||
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
|
||||
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
|
||||
''', webpage)
|
||||
titles = re.finditer(
|
||||
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
|
||||
thumbs = re.finditer(
|
||||
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
|
||||
videos = []
|
||||
|
||||
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
|
||||
video_id = vid.group('videoID')
|
||||
title = vtitle.group('videoName')
|
||||
video_url = vid.group('videoURL')
|
||||
video_thumb = thumb.group('thumbnail')
|
||||
if not video_url:
|
||||
raise ExtractorError('Cannot find video url for %s' % video_id)
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': unescapeHTML(title),
|
||||
'thumbnail': video_thumb
|
||||
playlist_title = get_element_by_class('workshopItemTitle', webpage)
|
||||
for movie in flash_vars.values():
|
||||
if not movie:
|
||||
continue
|
||||
youtube_id = movie.get('YOUTUBE_VIDEO_ID')
|
||||
if not youtube_id:
|
||||
continue
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': youtube_id,
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
if not videos:
|
||||
else:
|
||||
playlist_title = get_element_by_class('apphub_AppName', webpage)
|
||||
for movie_id, movie in flash_vars.items():
|
||||
if not movie:
|
||||
continue
|
||||
video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
|
||||
title = movie.get('MOVIE_NAME')
|
||||
if not title or not video_id:
|
||||
continue
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'title': title.replace('+', ' '),
|
||||
}
|
||||
formats = []
|
||||
flv_url = movie.get('FILENAME')
|
||||
if flv_url:
|
||||
formats.append({
|
||||
'format_id': 'flv',
|
||||
'url': flv_url,
|
||||
})
|
||||
highlight_element = self._search_regex(
|
||||
r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
|
||||
webpage, 'highlight element', fatal=False)
|
||||
if highlight_element:
|
||||
highlight_attribs = extract_attributes(highlight_element)
|
||||
if highlight_attribs:
|
||||
entry['thumbnail'] = highlight_attribs.get('data-poster')
|
||||
for quality in ('', '-hd'):
|
||||
for ext in ('webm', 'mp4'):
|
||||
video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': ext + quality,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
continue
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if not entries:
|
||||
raise ExtractorError('Could not find any videos')
|
||||
|
||||
return self.playlist_result(videos, playlist_id, playlist_title)
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
@@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
|
||||
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
|
||||
relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
|
||||
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
|
@@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor):
|
||||
info_dict = self._extract_jwplayer_data(
|
||||
webpage, video_id, require_title=False)
|
||||
uploader = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
webpage, 'uploader name', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
|
||||
info_dict.update({
|
||||
|
@@ -13,11 +13,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class TubiTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
|
||||
_LOGIN_URL = 'http://tubitv.com/login'
|
||||
_NETRC_MACHINE = 'tubitv'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
|
||||
'md5': '43ac06be9326f41912dc64ccf7a80320',
|
||||
'info_dict': {
|
||||
@@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor):
|
||||
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
|
||||
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tubitv.com/movies/383676/tracker',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
|
@@ -18,7 +18,7 @@ class TV4IE(InfoExtractor):
|
||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||
tv4play\.se/
|
||||
(?:
|
||||
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
||||
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
||||
iframe/video/|
|
||||
film/|
|
||||
sport/|
|
||||
@@ -63,6 +63,10 @@ class TV4IE(InfoExtractor):
|
||||
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,52 +3,50 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class TVAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://videos.tva.ca/episode/85538',
|
||||
'url': 'https://videos.tva.ca/details/_5596811470001',
|
||||
'info_dict': {
|
||||
'id': '85538',
|
||||
'id': '5596811470001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode du 25 janvier 2017',
|
||||
'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
|
||||
'upload_date': '20170126',
|
||||
'timestamp': 1485442329,
|
||||
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20171003',
|
||||
'timestamp': 1507064617,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
"https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
|
||||
video_id, query={
|
||||
'$expand': 'Metadata,CustomId',
|
||||
'$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
|
||||
'$format': 'json',
|
||||
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
metadata = video_data.get('Metadata', {})
|
||||
|
||||
def get_attribute(key):
|
||||
for attribute in video_data.get('attributes', []):
|
||||
if attribute.get('key') == key:
|
||||
return attribute.get('value')
|
||||
return None
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_data['Title'],
|
||||
'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
|
||||
'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
|
||||
'series': video_data.get('ShowName'),
|
||||
'episode': metadata.get('EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('EpisodeNumber')),
|
||||
'categories': video_data.get('Categories'),
|
||||
'average_rating': video_data.get('AverageUserRating'),
|
||||
'timestamp': parse_iso8601(video_data.get('CreatedDate')),
|
||||
'ie_key': 'Ooyala',
|
||||
'title': get_attribute('title'),
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
||||
'description': get_attribute('description'),
|
||||
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
|
||||
'duration': float_or_none(get_attribute('video-duration'), 1000),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TVN24IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
|
||||
'md5': 'fbdec753d7bc29d96036808275f2130c',
|
||||
@@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
||||
'thumbnail': 're:http://.*[.]jpeg',
|
||||
'thumbnail': 're:https?://.*[.]jpeg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
||||
@@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
||||
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
|
||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
'ext': 'mp4',
|
||||
'title': 'Czas honoru, I seria – odc. 13',
|
||||
'description': 'md5:76649d2014f65c99477be17f23a4dead',
|
||||
'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||
@@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# page id is not the same as video id(#7799)
|
||||
'url': 'http://vod.tvp.pl/22704887/08122015-1500',
|
||||
'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
|
||||
'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
|
||||
'md5': '84cd3c8aec4840046e5ab712416b73d0',
|
||||
'info_dict': {
|
||||
'id': '22680786',
|
||||
'id': '33908820',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wiadomości, 08.12.2015, 15:00',
|
||||
'title': 'Wiadomości, 28.09.2017, 19:30',
|
||||
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
|
@@ -15,7 +15,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -224,6 +226,9 @@ class TVPlayIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
|
||||
video_id = self._match_id(url)
|
||||
geo_country = self._search_regex(
|
||||
r'https?://[^/]+\.([a-z]{2})', url,
|
||||
@@ -426,4 +431,9 @@ class ViafreeIE(InfoExtractor):
|
||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
'mtg:%s' % video_id,
|
||||
{'geo_countries': [
|
||||
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class TwentyFourVideoIE(InfoExtractor):
|
||||
IE_NAME = '24video'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.24video.net/video/view/1044982',
|
||||
@@ -60,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
|
||||
webpage, 'upload date'))
|
||||
r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
|
||||
@@ -72,7 +72,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
|
||||
webpage, 'comment count', fatal=False))
|
||||
webpage, 'comment count', default=None))
|
||||
|
||||
# Sets some cookies
|
||||
self._download_xml(
|
||||
|
@@ -28,7 +28,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TwitchBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||
@@ -217,7 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
(?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@@ -458,7 +458,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?twitch\.tv/|
|
||||
(?:(?:www|go)\.)?twitch\.tv/|
|
||||
player\.twitch\.tv/\?.*?\bchannel=
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
@@ -489,6 +489,9 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
}, {
|
||||
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.twitch.tv/food',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||
webpage, 'video iframe', default=None)
|
||||
if iframe_url:
|
||||
return self.url_result(iframe_url)
|
||||
@@ -229,7 +229,7 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
|
||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
||||
duration = float_or_none(config.get('duration')) or duration
|
||||
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -242,8 +242,9 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
|
||||
class TwitterIE(InfoExtractor):
|
||||
IE_NAME = 'twitter'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
|
||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||
_TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||
@@ -255,6 +256,7 @@ class TwitterIE(InfoExtractor):
|
||||
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
'duration': 12.922,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -305,11 +307,12 @@ class TwitterIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '700207533655363584',
|
||||
'ext': 'mp4',
|
||||
'title': 'Donte - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'Donte on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||
'title': 'あかさ - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'あかさ on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'Donte',
|
||||
'uploader': 'あかさ',
|
||||
'uploader_id': 'jaydingeer',
|
||||
'duration': 30.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -320,9 +323,9 @@ class TwitterIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'MIOxnrUteUd',
|
||||
'ext': 'mp4',
|
||||
'title': 'FilmDrunk - Vine of the day',
|
||||
'description': 'FilmDrunk on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
||||
'uploader': 'FilmDrunk',
|
||||
'title': 'Vince Mancini - Vine of the day',
|
||||
'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
|
||||
'uploader': 'Vince Mancini',
|
||||
'uploader_id': 'Filmdrunk',
|
||||
'timestamp': 1402826626,
|
||||
'upload_date': '20140615',
|
||||
@@ -337,6 +340,7 @@ class TwitterIE(InfoExtractor):
|
||||
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
||||
'uploader_id': 'captainamerica',
|
||||
'uploader': 'Captain America',
|
||||
'duration': 3.17,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -364,10 +368,26 @@ class TwitterIE(InfoExtractor):
|
||||
'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
|
||||
'uploader': 'عالم الأخبار',
|
||||
'uploader_id': 'news_al3alm',
|
||||
'duration': 277.4,
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=http-]',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||
'info_dict': {
|
||||
'id': '910031516746514432',
|
||||
'ext': 'mp4',
|
||||
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
|
||||
'uploader': 'Préfet de Guadeloupe',
|
||||
'uploader_id': 'Prefet971',
|
||||
'duration': 47.48,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -376,11 +396,15 @@ class TwitterIE(InfoExtractor):
|
||||
twid = mobj.group('id')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
self._TEMPLATE_URL % (user_id, twid), twid)
|
||||
self._TEMPLATE_STATUSES_URL % twid, twid)
|
||||
|
||||
if 'twitter.com/account/suspended' in urlh.geturl():
|
||||
raise ExtractorError('Account suspended by Twitter.', expected=True)
|
||||
|
||||
if user_id is None:
|
||||
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||
user_id = mobj.group('user_id')
|
||||
|
||||
username = remove_end(self._og_search_title(webpage), ' on Twitter')
|
||||
|
||||
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -29,6 +28,7 @@ class UDNEmbedIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON Expecting value'],
|
||||
}, {
|
||||
'url': 'https://video.udn.com/embed/news/300040',
|
||||
'only_matching': True,
|
||||
@@ -43,10 +43,21 @@ class UDNEmbedIE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
options = json.loads(js_to_json(self._html_search_regex(
|
||||
r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
|
||||
video_urls = options['video']
|
||||
options_str = self._html_search_regex(
|
||||
r'var\s+options\s*=\s*([^;]+);', page, 'options')
|
||||
trans_options_str = js_to_json(options_str)
|
||||
options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
|
||||
if options:
|
||||
video_urls = options['video']
|
||||
title = options['title']
|
||||
poster = options.get('poster')
|
||||
else:
|
||||
video_urls = self._parse_json(self._html_search_regex(
|
||||
r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
|
||||
title = self._html_search_regex(
|
||||
r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
|
||||
poster = self._html_search_regex(
|
||||
r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
|
||||
|
||||
if video_urls.get('youtube'):
|
||||
return self.url_result(video_urls.get('youtube'), 'Youtube')
|
||||
@@ -68,7 +79,7 @@ class UDNEmbedIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
else:
|
||||
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
|
||||
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
# video_type may be 'mp4', which confuses YoutubeDL
|
||||
@@ -83,14 +94,9 @@ class UDNEmbedIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': img_url,
|
||||
'id': img_type,
|
||||
} for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': options['title'],
|
||||
'thumbnails': thumbnails,
|
||||
'title': title,
|
||||
'thumbnail': poster,
|
||||
}
|
||||
|
@@ -42,7 +42,7 @@ class VGTVIE(XstreamIE):
|
||||
)
|
||||
/?
|
||||
(?:
|
||||
\#!/(?:video|live)/|
|
||||
(?:\#!/)?(?:video|live)/|
|
||||
embed?.*id=|
|
||||
articles/
|
||||
)|
|
||||
@@ -146,7 +146,11 @@ class VGTVIE(XstreamIE):
|
||||
{
|
||||
'url': 'abtv:140026',
|
||||
'only_matching': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,131 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
|
||||
import re
|
||||
from ..utils import fix_xml_ampersands
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class VH1IE(MTVIE):
|
||||
class VH1IE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'vh1.com'
|
||||
_FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/'
|
||||
_FEED_URL = 'http://www.vh1.com/feeds/mrss/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '7827a7505f59633983165bbd2c119b52',
|
||||
'info_dict': {
|
||||
'id': '731565',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 1',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '34fb4b7321c546b54deda2102a61821f',
|
||||
'info_dict': {
|
||||
'id': '731567',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 2',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '813f38dba4c1b8647196135ebbf7e048',
|
||||
'info_dict': {
|
||||
'id': '731568',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 3',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '51adb72439dfaed11c799115d76e497f',
|
||||
'info_dict': {
|
||||
'id': '731569',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 4',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': '93d554aaf79320703b73a95288c76a6e',
|
||||
'info_dict': {
|
||||
'id': '731570',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Ep. 11 Act 5',
|
||||
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
|
||||
}
|
||||
}
|
||||
],
|
||||
'skip': 'Blocked outside the US',
|
||||
'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
|
||||
'info_dict': {
|
||||
'title': 'Kent Jones vs. Nick Young',
|
||||
'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Clip
|
||||
'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118',
|
||||
'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844',
|
||||
'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
|
||||
'info_dict': {
|
||||
'id': '706675',
|
||||
'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip',
|
||||
'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.'
|
||||
'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
|
||||
'description': 'md5:eff5551a274c473a29463de40f7b09da',
|
||||
'upload_date': '20171009',
|
||||
'timestamp': 1507574700,
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}, {
|
||||
# Short link
|
||||
'url': 'http://www.vh1.com/video/play.jhtml?id=1678353',
|
||||
'md5': '853192b87ad978732b67dd8e549b266a',
|
||||
'info_dict': {
|
||||
'id': '730355',
|
||||
'ext': 'mp4',
|
||||
'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak',
|
||||
'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.'
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml',
|
||||
'md5': 'b1bcb5b4380c9d7f544065589432dee7',
|
||||
'info_dict': {
|
||||
'id': '900535',
|
||||
'ext': 'mp4',
|
||||
'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"',
|
||||
'description': 'The Heist'
|
||||
},
|
||||
'skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://www\.vh1\.com/video/
|
||||
(?:
|
||||
.+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml
|
||||
|
|
||||
(?:
|
||||
play.jhtml\?id=|
|
||||
misc/.+?/.+?\.jhtml\#id=
|
||||
)
|
||||
(?P<video_id>[0-9]+)$
|
||||
|
|
||||
[^/]+/(?P<music_id>[0-9]+)/[^/]+?
|
||||
)
|
||||
'''
|
||||
_VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('music_id'):
|
||||
id_field = 'vid'
|
||||
video_id = mobj.group('music_id')
|
||||
else:
|
||||
video_id = mobj.group('playlist_id') or mobj.group('video_id')
|
||||
id_field = 'id'
|
||||
doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id)
|
||||
|
||||
idoc = self._download_xml(
|
||||
doc_url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
|
||||
entries = []
|
||||
for item in idoc.findall('.//item'):
|
||||
info = self._get_video_info(item)
|
||||
if info:
|
||||
entries.append(info)
|
||||
|
||||
return self.playlist_result(entries, playlist_id=video_id)
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
mgid = self._extract_triforce_mgid(webpage)
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
|
@@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor):
|
||||
|
||||
class ViceArticleIE(InfoExtractor):
|
||||
IE_NAME = 'vice:article'
|
||||
_VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
|
||||
|
@@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor):
|
||||
webpage_url = 'http://videopremium.tv/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
if re.match(r'^<html><head><script[^>]*>window.location\s*=', webpage):
|
||||
if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
|
||||
# Download again, we need a cookie
|
||||
webpage = self._download_webpage(
|
||||
webpage_url, video_id,
|
||||
|
@@ -23,9 +23,9 @@ class VikiBaseIE(InfoExtractor):
|
||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
||||
_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
|
||||
|
||||
_APP = '65535a'
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '2.2.5.1428709186'
|
||||
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
|
||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'viki'
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user