Compare commits
90 Commits
2018.07.21
...
2018.09.08
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad98d2eb74 | ||
|
|
a41a506077 | ||
|
|
9a47fa35dd | ||
|
|
2d4fe594c6 | ||
|
|
09322cccdb | ||
|
|
aa1d5eb905 | ||
|
|
93284ff2ea | ||
|
|
0a9a8118ce | ||
|
|
3d08f63dc5 | ||
|
|
27d8e089a2 | ||
|
|
7bbc1b189a | ||
|
|
0b87e88453 | ||
|
|
4d59db5b90 | ||
|
|
4627995882 | ||
|
|
7f2611cb5b | ||
|
|
54a5be4dba | ||
|
|
ed6919e737 | ||
|
|
2b83da2463 | ||
|
|
c1a37eb24a | ||
|
|
4991e16c2a | ||
|
|
14b7a24c19 | ||
|
|
73f3bdbeb4 | ||
|
|
9e21e6d96b | ||
|
|
8959018a5f | ||
|
|
eebbce5656 | ||
|
|
56213aff1d | ||
|
|
409b9324da | ||
|
|
02df41354c | ||
|
|
dd88fd65a5 | ||
|
|
287cf7e443 | ||
|
|
dac6f7654a | ||
|
|
e0b6e98871 | ||
|
|
beff09505c | ||
|
|
135e6a1c10 | ||
|
|
c707d2067d | ||
|
|
4c86163b60 | ||
|
|
b662273989 | ||
|
|
df4d817bc3 | ||
|
|
db192b2932 | ||
|
|
52007de8ca | ||
|
|
28f96cf407 | ||
|
|
eda86b4335 | ||
|
|
bf1245d236 | ||
|
|
6f356cbbcf | ||
|
|
0a74b45191 | ||
|
|
d6ef8b4dd4 | ||
|
|
60c0856223 | ||
|
|
57c68ec4c3 | ||
|
|
24e0cd709f | ||
|
|
4779420ce8 | ||
|
|
de4c41b437 | ||
|
|
b65e3b0636 | ||
|
|
d37dc6e1c9 | ||
|
|
a62460aa21 | ||
|
|
d588d4a5a6 | ||
|
|
81cc22bab6 | ||
|
|
20f96f64bd | ||
|
|
af322eb830 | ||
|
|
cb1c3a3c07 | ||
|
|
48afc6ca3e | ||
|
|
644921b372 | ||
|
|
19b9de13c4 | ||
|
|
6f2d82a5a0 | ||
|
|
7ff129d3ea | ||
|
|
9d1b213845 | ||
|
|
5484828418 | ||
|
|
4eecef84f3 | ||
|
|
b2286f8fb2 | ||
|
|
4938c8d573 | ||
|
|
1a88fc5a69 | ||
|
|
38e87f6c2a | ||
|
|
ec240a4369 | ||
|
|
cd3a3ff93b | ||
|
|
9a984265b9 | ||
|
|
a098c99f0d | ||
|
|
8e37a7e4cc | ||
|
|
722f1a0f8f | ||
|
|
0c7b4f49eb | ||
|
|
ad1bc71a8a | ||
|
|
b5dec62ca6 | ||
|
|
631f93ee2d | ||
|
|
d4e7065111 | ||
|
|
234a85858c | ||
|
|
a789d1cc90 | ||
|
|
694079dff7 | ||
|
|
d94fb1225e | ||
|
|
7930f91494 | ||
|
|
a702056fbe | ||
|
|
8fd2a7be37 | ||
|
|
6de82b4476 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.07.21
|
||||
[debug] youtube-dl version 2018.09.08
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -48,3 +48,6 @@ youtube-dl.zsh
|
||||
|
||||
tmp/
|
||||
venv/
|
||||
|
||||
# VS Code related files
|
||||
.vscode
|
||||
|
||||
7
AUTHORS
7
AUTHORS
@@ -239,3 +239,10 @@ Martin Weinelt
|
||||
Surya Oktafendri
|
||||
TingPing
|
||||
Alexandre Macabies
|
||||
Bastian de Groot
|
||||
Niklas Haas
|
||||
András Veres-Szentkirályi
|
||||
Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
|
||||
95
ChangeLog
95
ChangeLog
@@ -1,3 +1,98 @@
|
||||
version 2018.09.08
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction (#17457, #17464)
|
||||
+ [pornhub:uservideos] Add support for new URLs (#17388)
|
||||
* [iprima] Confirm adult check (#17437)
|
||||
* [slideslive] Make check for video service name case-insensitive (#17429)
|
||||
* [radiojavan] Fix extraction (#17151)
|
||||
* [generic] Skip unsuccessful jwplayer extraction (#16735)
|
||||
|
||||
|
||||
version 2018.09.01
|
||||
|
||||
Core
|
||||
* [utils] Skip remote IP addresses non matching to source address' IP version
|
||||
when creating a connection (#13422, #17362)
|
||||
|
||||
Extractors
|
||||
+ [ard] Add support for one.ard.de (#17397)
|
||||
* [niconico] Fix extraction on python3 (#17393, #17407)
|
||||
* [ard] Extract f4m formats
|
||||
* [crunchyroll] Parse vilos media data (#17343)
|
||||
+ [ard] Add support for Beta ARD Mediathek
|
||||
+ [bandcamp] Extract more metadata (#13197)
|
||||
* [internazionale] Fix extraction of non-available-abroad videos (#17386)
|
||||
|
||||
|
||||
version 2018.08.28
|
||||
|
||||
Extractors
|
||||
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
|
||||
(#17361)
|
||||
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
|
||||
* [webofstories:playlist] Fix extraction (#16914)
|
||||
+ [tvplayhome] Add support for new tvplay URLs (#17344)
|
||||
+ [generic] Allow relative src for videojs embeds (#17324)
|
||||
+ [xfileshare] Add support for vidto.se (#17317)
|
||||
+ [vidzi] Add support for vidzi.nu (#17316)
|
||||
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
|
||||
|
||||
|
||||
version 2018.08.22
|
||||
|
||||
Core
|
||||
* [utils] Use pure browser header for User-Agent (#17236)
|
||||
|
||||
Extractors
|
||||
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
|
||||
+ [yourporn] Add support for yourporn.sexy (#17298)
|
||||
+ [go] Add support for disneynow.go.com (#16299, #17264)
|
||||
+ [6play] Add support for play.rtl.hr (#17249)
|
||||
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
|
||||
(#16788, #17254)
|
||||
* [lci] Fix extraction (#17274)
|
||||
* [bbccouk] Extend id URL regular expression (#17270)
|
||||
* [cwtv] Fix extraction (#17256)
|
||||
* [nova] Fix extraction (#17241)
|
||||
+ [generic] Add support for expressen embeds
|
||||
* [raywenderlich] Adapt to site redesign (#17225)
|
||||
+ [redbulltv] Add support redbull.com tv URLs (#17218)
|
||||
+ [bitchute] Add support for bitchute.com (#14052)
|
||||
+ [clyp] Add support for token protected media (#17184)
|
||||
* [imdb] Fix extension extraction (#17167)
|
||||
|
||||
|
||||
version 2018.08.04
|
||||
|
||||
Extractors
|
||||
* [funk:channel] Improve byChannelAlias extraction (#17142)
|
||||
* [twitch] Fix authentication (#17024, #17126)
|
||||
* [twitch:vod] Improve URL regular expression (#17135)
|
||||
* [watchbox] Fix extraction (#17107)
|
||||
* [pbs] Fix extraction (#17109)
|
||||
* [theplatform] Relax URL regular expression (#16181, #17097)
|
||||
+ [viqeo] Add support for viqeo.tv (#17066)
|
||||
|
||||
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||
* [dailymotion:playlist] Fix extraction (#16894)
|
||||
* [ted] Improve extraction
|
||||
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||
* [telecinco] Fix extraction (#17080)
|
||||
* [mitele] Reduce number of requests
|
||||
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||
* [vk] Fix extraction for inline only videos (#16923)
|
||||
* [streamcloud] Fix extraction (#17054)
|
||||
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||
|
||||
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
|
||||
@@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
@@ -108,6 +109,8 @@
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
@@ -189,7 +192,7 @@
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
@@ -405,6 +408,7 @@
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KinoPoisk**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
@@ -577,6 +581,7 @@
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -672,6 +677,8 @@
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**: QQ音乐
|
||||
@@ -694,6 +701,7 @@
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@@ -910,6 +918,7 @@
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@@ -999,6 +1008,7 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viqeo**
|
||||
- **Viu**
|
||||
- **viu:ott**
|
||||
- **viu:playlist**
|
||||
@@ -1090,6 +1100,7 @@
|
||||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
|
||||
@@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
@@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
@@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
||||
@@ -21,7 +21,7 @@ from ..compat import compat_etree_fromstring
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -282,3 +285,76 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'title': 'Tatort: Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
||||
|
||||
if '_duration' in widget:
|
||||
res['duration'] = widget['_duration']
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
if '_subtitleUrl' in widget:
|
||||
res['subtitles'] = {'de': [{
|
||||
'ext': 'ttml',
|
||||
'url': widget['_subtitleUrl'],
|
||||
}]}
|
||||
if '_quality' in widget:
|
||||
format_url = widget['_stream']['json'][0]
|
||||
|
||||
if format_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif format_url.endswith('m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http-' + widget['_quality'],
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
res['formats'] = formats
|
||||
|
||||
return res
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -16,15 +15,18 @@ from ..utils import (
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@@ -36,13 +38,44 @@ class BandcampIE(InfoExtractor):
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
|
||||
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
|
||||
'info_dict': {
|
||||
'id': '2584466013',
|
||||
'ext': 'mp3',
|
||||
'title': 'Mastodon - Hail to Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -51,19 +84,23 @@ class BandcampIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
track_id = None
|
||||
track = None
|
||||
track_number = None
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
for format_id, format_url in file_.items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -73,85 +110,110 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
lambda x: x['download_items'][0]), dict)
|
||||
if info:
|
||||
downloads = info.get('downloads')
|
||||
if isinstance(downloads, dict):
|
||||
if not track:
|
||||
track = info.get('title')
|
||||
if not artist:
|
||||
artist = info.get('artist')
|
||||
if not thumbnail:
|
||||
thumbnail = info.get('thumb_url')
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
download_formats = {}
|
||||
download_formats_list = blob.get('download_formats')
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
120
youtube_dl/extractor/bitchute.py
Normal file
120
youtube_dl/extractor/bitchute.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
|
||||
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck bitches get money',
|
||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Victoria X Rave',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
formats = [
|
||||
{'url': mobj.group('url')}
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
},
|
||||
}
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
||||
@@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
@@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
|
||||
'info_dict': {
|
||||
'id': 'b04p1odi',
|
||||
'ext': 'mp3',
|
||||
'title': 'GJ! (Reward Edit)',
|
||||
'description': 'Metal Resistance (THE ONE edition)',
|
||||
'duration': 177.789,
|
||||
'timestamp': 1528241278,
|
||||
'upload_date': '20180605',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
token = qs.get('token', [None])[0]
|
||||
|
||||
query = {}
|
||||
if token:
|
||||
query['token'] = token
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
@@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
timestamp = unified_timestamp(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
|
||||
@@ -8,6 +8,7 @@ import zlib
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
@@ -18,6 +19,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -26,7 +29,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
extract_attributes,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@@ -139,7 +141,8 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@@ -148,7 +151,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||
'upload_date': '20131013',
|
||||
'url': 're:(?!.*&)',
|
||||
@@ -221,7 +224,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
@@ -262,6 +265,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -434,13 +440,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||
webpage, 'vilos media', default='{}'), video_id)
|
||||
media_metadata = media.get('metadata') or {}
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._parse_json(self._html_search_regex(
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id).get('description')
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
@@ -453,91 +464,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
for stream in media.get('streams', []):
|
||||
formats.extend(self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream.get('format'),
|
||||
stream.get('audio_lang'), stream.get('hardsub_lang')))
|
||||
if not formats:
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
if not available_fmts:
|
||||
available_fmts = self._FORMAT_IDS.keys()
|
||||
video_encode_ids = []
|
||||
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
||||
|
||||
metadata = self._call_rpc_api(
|
||||
@@ -546,7 +565,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
subtitles = {}
|
||||
for subtitle in media.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
if not subtitles:
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
@@ -554,8 +583,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
webpage, 'series', fatal=False)
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -565,7 +594,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'duration': float_or_none(media_metadata.get('duration'), 1000),
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
@@ -580,7 +610,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'season': '11',
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
@@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
video_metadata = video_data['assetFields']
|
||||
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||
if ism_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
ism_url, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
video_data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
}],
|
||||
} if video_metadata.get('UnicornCcUrl') else None
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
if episode and season:
|
||||
episode = episode.lstrip(season)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'duration': int_or_none(video_metadata.get('duration')),
|
||||
'series': video_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(video_metadata.get('seasonNumber')),
|
||||
'season': video_metadata.get('seasonName'),
|
||||
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
|
||||
'timestamp': parse_iso8601(video_data.get('startTime')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description_long'),
|
||||
'duration': int_or_none(video_data.get('duration_secs')),
|
||||
'series': video_data.get('series_name'),
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
'timestamp': parse_iso8601(video_data.get('start_time')),
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
@@ -16,11 +17,13 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -343,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
'id': 'xv4bw',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://graphql.api.dailymotion.com',
|
||||
playlist_id, 'Downloading page %d' % page,
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
collection(xid: "%s") {
|
||||
videos(first: %d, page: %d) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
xid
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||
}).encode(), headers={
|
||||
'Authorization': authorizaion,
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
})['data']['collection']['videos']
|
||||
for edge in videos['edges']:
|
||||
node = edge['node']
|
||||
yield self.url_result(
|
||||
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
api = self._parse_json(self._search_regex(
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||
webpage, 'player config'), playlist_id)['context']['api']
|
||||
auth = self._download_json(
|
||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||
playlist_id, data=urlencode_postdata({
|
||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||
'grant_type': 'client_credentials',
|
||||
}))
|
||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
self._og_search_title(webpage))
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = set()
|
||||
@@ -379,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist_id),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -11,7 +13,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?expressen\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
@@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@ from .appletrailers import (
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
)
|
||||
@@ -118,6 +119,10 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
@@ -516,6 +521,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
@@ -736,7 +742,10 @@ from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
@@ -860,6 +869,10 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
@@ -891,7 +904,10 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@@ -1166,6 +1182,7 @@ from .tvp import (
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
@@ -1287,6 +1304,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
@@ -1412,6 +1430,7 @@ from .younow import (
|
||||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
|
||||
@@ -355,7 +355,6 @@ class FacebookIE(InfoExtractor):
|
||||
tahoe_data = self._download_webpage(
|
||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'__user': 0,
|
||||
'__a': 1,
|
||||
'__pc': self._search_regex(
|
||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
@@ -363,6 +362,9 @@ class FacebookIE(InfoExtractor):
|
||||
'__rev': self._search_regex(
|
||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||
'client revision', default='3944515'),
|
||||
'fb_dtsg': self._search_regex(
|
||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||
webpage, 'dtsg token', default=''),
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -125,17 +126,31 @@ class FunkChannelIE(FunkBaseIE):
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON', headers=headers, query={
|
||||
'size': 100,
|
||||
}, fatal=False)
|
||||
if by_channel_alias:
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if video_list:
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
|
||||
@@ -32,7 +32,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -113,6 +112,8 @@ from .peertube import PeerTubeIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2060,6 +2061,30 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
{
|
||||
# Viqeo embeds
|
||||
'url': 'https://viqeo.tv/',
|
||||
'info_dict': {
|
||||
'id': 'viqeo',
|
||||
'title': 'All-new video platform',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
'info_dict': {
|
||||
'id': 'shell',
|
||||
'ext': 'mp4',
|
||||
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
|
||||
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -3094,6 +3119,16 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
viqeo_urls = ViqeoIE._extract_urls(webpage)
|
||||
if viqeo_urls:
|
||||
return self.playlist_from_matches(
|
||||
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
|
||||
|
||||
expressen_urls = ExpressenIE._extract_urls(webpage)
|
||||
if expressen_urls:
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@@ -3115,9 +3150,13 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
except ExtractorError:
|
||||
# See https://github.com/rg3/youtube-dl/pull/16735
|
||||
pass
|
||||
|
||||
# Video.js embed
|
||||
mobj = re.search(
|
||||
@@ -3131,8 +3170,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
||||
@@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
|
||||
@@ -64,7 +64,8 @@ class ImdbIE(InfoExtractor):
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||
ext = mimetype2ext(encoding.get(
|
||||
'mimeType')) or determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
||||
@@ -7,7 +7,7 @@ from ..utils import unified_timestamp
|
||||
|
||||
class InternazionaleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'md5': '3e39d32b66882c1218e305acbf8348ca',
|
||||
'info_dict': {
|
||||
@@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
|
||||
'md5': '9db8663704cab73eb972d1cee0082c79',
|
||||
'info_dict': {
|
||||
'id': '761344',
|
||||
'display_id': 'telefono-stare-con-noi-stessi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Usiamo il telefono per evitare di stare con noi stessi',
|
||||
'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
|
||||
'timestamp': 1535528954,
|
||||
'upload_date': '20180829',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -40,8 +56,13 @@ class InternazionaleIE(InfoExtractor):
|
||||
DATA_RE % 'job-id', webpage, 'video id', group='value')
|
||||
video_path = self._search_regex(
|
||||
DATA_RE % 'video-path', webpage, 'video path', group='value')
|
||||
video_available_abroad = self._search_regex(
|
||||
DATA_RE % 'video-available_abroad', webpage,
|
||||
'video available aboard', default='1', group='value')
|
||||
video_available_abroad = video_available_abroad == '1'
|
||||
|
||||
video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
|
||||
video_base = 'https://video%s.internazionale.it/%s/%s.' % \
|
||||
('' if video_available_abroad else '-ita', video_path, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_base + 'm3u8', display_id, 'mp4',
|
||||
|
||||
@@ -38,6 +38,8 @@ class IPrimaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
|
||||
70
youtube_dl/extractor/kinopoisk.py
Normal file
70
youtube_dl/extractor/kinopoisk.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KinoPoiskIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['RU']
|
||||
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
|
||||
'md5': '4f71c80baea10dfa54a837a46111d326',
|
||||
'info_dict': {
|
||||
'id': '81041',
|
||||
'ext': 'mp4',
|
||||
'title': 'Алеша попович и тугарин змей',
|
||||
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 4533,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kinopoisk.ru/film/81041',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
|
||||
query={'kpId': video_id})
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
|
||||
webpage, 'data'),
|
||||
video_id)['models']
|
||||
|
||||
film = data['filmStatus']
|
||||
title = film.get('title') or film['originalTitle']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['playlistEntity']['uri'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = dict_get(
|
||||
film, ('descriptscription', 'description',
|
||||
'shortDescriptscription', 'shortDescription'))
|
||||
thumbnail = film.get('coverUrl') or film.get('posterUrl')
|
||||
duration = int_or_none(film.get('duration'))
|
||||
age_limit = int_or_none(film.get('restrictionAge'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
|
||||
wat_id = self._search_regex(
|
||||
(r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
|
||||
webpage, 'wat id')
|
||||
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
|
||||
|
||||
@@ -1,84 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
smuggle_url,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleBaseIE(InfoExtractor):
|
||||
def _get_player_info(self, url, webpage):
|
||||
player_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
if player_data.get('data-cms-id') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
mmc_url = config['services']['mmc']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
||||
mmc = self._download_json(
|
||||
m_url, video_id, 'Downloading mmc JSON')
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
@@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
|
||||
'info_dict': {
|
||||
'id': '57b0dfb9c715da65618b4afa',
|
||||
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
@@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
'info_dict': {
|
||||
'id': '57b0de3dc915da14058b4876',
|
||||
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio Temporada 6 Programa 226',
|
||||
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
|
||||
@@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
gigya_url = self._search_regex(
|
||||
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
|
||||
webpage, 'gigya', default=None)
|
||||
gigya_sc = self._download_webpage(
|
||||
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
|
||||
video_id, 'Downloading gigya script')
|
||||
|
||||
# Get a appKey/uuid for getting the session key
|
||||
appKey = self._search_regex(
|
||||
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
|
||||
gigya_sc, 'appKey')
|
||||
|
||||
session_json = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/session',
|
||||
video_id, 'Downloading session keys', query={
|
||||
'appKey': appKey,
|
||||
'uuid': compat_str(uuid.uuid4()),
|
||||
})
|
||||
|
||||
paths = self._download_json(
|
||||
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
|
||||
video_id, 'Downloading paths JSON',
|
||||
query={'sessionKey': compat_str(session_json['sessionKey'])})
|
||||
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
|
||||
video_id, 'Downloading paths JSON')
|
||||
|
||||
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
|
||||
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
|
||||
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
|
||||
source = self._download_json(
|
||||
'http://%s%s%s/docs/%s' % (
|
||||
ooyala_s['base_url'], ooyala_s['full_path'],
|
||||
ooyala_s['provider_id'], video_id),
|
||||
'%s://%s%s%s/docs/%s' % (
|
||||
ooyala_s.get('protocol', 'https'), base_url, full_path,
|
||||
ooyala_s.get('provider_id', '104951'), video_id),
|
||||
video_id, 'Downloading data JSON', query={
|
||||
'include_titles': 'Series,Season',
|
||||
'product_name': 'test',
|
||||
'product_name': ooyala_s.get('product_name', 'test'),
|
||||
'format': 'full',
|
||||
})['hits']['hits'][0]['_source']
|
||||
|
||||
|
||||
@@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor):
|
||||
},
|
||||
'timing_constraint': 'unlimited'
|
||||
}
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
resolution = video_quality.get('resolution', {})
|
||||
|
||||
|
||||
@@ -6,28 +6,90 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NovaEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
||||
'md5': 'b3834f6de5401baabf31ed57456463f7',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
(r'<value>(?P<title>[^<]+)',
|
||||
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='value')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NovaIE(InfoExtractor):
|
||||
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
|
||||
'info_dict': {
|
||||
'id': '1608920',
|
||||
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
|
||||
'ext': 'flv',
|
||||
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
|
||||
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
||||
'info_dict': {
|
||||
@@ -38,33 +100,6 @@ class NovaIE(InfoExtractor):
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'info_dict': {
|
||||
'id': '1756825',
|
||||
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'ext': 'flv',
|
||||
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
|
||||
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
|
||||
'info_dict': {
|
||||
'id': '1756858',
|
||||
'ext': 'flv',
|
||||
'title': 'Televizní noviny - 30. 5. 2015',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'upload_date': '20150530',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
'info_dict': {
|
||||
@@ -79,6 +114,20 @@ class NovaIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# media.cms.nova.cz embed
|
||||
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [NovaEmbedIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||
'only_matching': True,
|
||||
@@ -103,6 +152,15 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return self.url_result(
|
||||
'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||
r'media=(\d+)',
|
||||
@@ -111,8 +169,21 @@ class NovaIE(InfoExtractor):
|
||||
webpage, 'video id')
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
|
||||
r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
|
||||
webpage, 'config url', default=None)
|
||||
config_params = {}
|
||||
|
||||
if not config_url:
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
|
||||
'player', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if player:
|
||||
config_url = url_or_none(player.get('configUrl'))
|
||||
params = player.get('configParams')
|
||||
if isinstance(params, dict):
|
||||
config_params = params
|
||||
|
||||
if not config_url:
|
||||
DEFAULT_SITE_ID = '23000'
|
||||
@@ -127,14 +198,20 @@ class NovaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
site_id = self._search_regex(
|
||||
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
|
||||
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
|
||||
site, DEFAULT_SITE_ID)
|
||||
|
||||
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
|
||||
% (site_id, video_id))
|
||||
config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
|
||||
config_params = {
|
||||
'site': site_id,
|
||||
'media': video_id,
|
||||
'quality': 3,
|
||||
'version': 1,
|
||||
}
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id,
|
||||
'Downloading config JSON',
|
||||
'Downloading config JSON', query=config_params,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
mediafile = config['mediafile']
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
@@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
|
||||
if redirect_url and redirect_url not in redirect_urls:
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
encodings = info.get('encodings')
|
||||
if isinstance(encodings, list):
|
||||
for encoding in encodings:
|
||||
encoding_url = url_or_none(encoding)
|
||||
if encoding_url and encoding_url not in redirect_urls:
|
||||
redirects.append({'url': encoding_url})
|
||||
redirect_urls.add(encoding_url)
|
||||
|
||||
chapters = []
|
||||
# Player pages may also serve different qualities
|
||||
|
||||
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -68,6 +69,31 @@ class PornHubIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
|
||||
'info_dict': {
|
||||
'id': 'ph5af5fef7c2aa7',
|
||||
'ext': 'mp4',
|
||||
'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
|
||||
'uploader': 'BFFs',
|
||||
'duration': 622,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
"ext": 'srt'
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
@@ -139,12 +165,19 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_urls = []
|
||||
video_urls_set = set()
|
||||
subtitles = {}
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
@@ -221,7 +254,7 @@ class PornHubIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = self._extract_count(
|
||||
@@ -256,6 +289,7 @@ class PornHubIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
@@ -312,7 +346,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
@@ -344,6 +378,12 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
247
youtube_dl/extractor/puhutv.py
Normal file
247
youtube_dl/extractor/puhutv.py
Normal file
@@ -0,0 +1,247 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PuhuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
|
||||
IE_NAME = 'puhutv'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://puhutv.com/sut-kardesler-izle',
|
||||
'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
|
||||
'info_dict': {
|
||||
'id': '5085',
|
||||
'display_id': 'sut-kardesler',
|
||||
'ext': 'mp4',
|
||||
'title': 'Süt Kardeşler',
|
||||
'description': 'md5:405fd024df916ca16731114eb18e511a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 4832.44,
|
||||
'creator': 'Arzu Film',
|
||||
'timestamp': 1469778212,
|
||||
'upload_date': '20160729',
|
||||
'release_year': 1976,
|
||||
'view_count': int,
|
||||
'tags': ['Aile', 'Komedi', 'Klasikler'],
|
||||
},
|
||||
}, {
|
||||
# episode, geo restricted, bypassable with --geo-verification-proxy
|
||||
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# 4k, with subtitles
|
||||
'url': 'https://puhutv.com/dip-1-bolum-izle',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUBTITLE_LANGS = {
|
||||
'English': 'en',
|
||||
'Deutsch': 'de',
|
||||
'عربى': 'ar'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-izle' % display_id),
|
||||
display_id)['data']
|
||||
|
||||
video_id = compat_str(info['id'])
|
||||
title = info.get('name') or info['title']['name']
|
||||
if info.get('display_name'):
|
||||
title = '%s %s' % (title, info.get('display_name'))
|
||||
|
||||
try:
|
||||
videos = self._download_json(
|
||||
'https://puhutv.com/api/assets/%s/videos' % video_id,
|
||||
display_id, 'Downloading video JSON',
|
||||
headers=self.geo_verification_headers())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for video in videos['data']['videos']:
|
||||
media_url = url_or_none(video.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
playlist = video.get('is_playlist')
|
||||
if video.get('stream_type') == 'hls' and playlist is True:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
quality = int_or_none(video.get('quality'))
|
||||
f = {
|
||||
'url': media_url,
|
||||
'ext': 'mp4',
|
||||
'height': quality
|
||||
}
|
||||
video_format = video.get('video_format')
|
||||
if video_format == 'hls' and playlist is False:
|
||||
format_id = 'hls'
|
||||
f['protocol'] = 'm3u8_native'
|
||||
elif video_format == 'mp4':
|
||||
format_id = 'http'
|
||||
|
||||
else:
|
||||
continue
|
||||
if quality:
|
||||
format_id += '-%sp' % quality
|
||||
f['format_id'] = format_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = try_get(
|
||||
info, lambda x: x['title']['description'],
|
||||
compat_str) or info.get('description')
|
||||
timestamp = unified_timestamp(info.get('created_at'))
|
||||
creator = try_get(
|
||||
info, lambda x: x['title']['producer']['name'], compat_str)
|
||||
|
||||
duration = float_or_none(
|
||||
try_get(info, lambda x: x['content']['duration_in_ms'], int),
|
||||
scale=1000)
|
||||
view_count = try_get(info, lambda x: x['content']['watch_count'], int)
|
||||
|
||||
images = try_get(
|
||||
info, lambda x: x['content']['images']['wide'], dict) or {}
|
||||
thumbnails = []
|
||||
for image_id, image_url in images.items():
|
||||
if not isinstance(image_url, compat_str):
|
||||
continue
|
||||
if not image_url.startswith(('http', '//')):
|
||||
image_url = 'https://%s' % image_url
|
||||
t = parse_resolution(image_id)
|
||||
t.update({
|
||||
'id': image_id,
|
||||
'url': image_url
|
||||
})
|
||||
thumbnails.append(t)
|
||||
|
||||
release_year = try_get(info, lambda x: x['title']['released_at'], int)
|
||||
|
||||
season_number = int_or_none(info.get('season_number'))
|
||||
season_id = str_or_none(info.get('season_id'))
|
||||
episode_number = int_or_none(info.get('episode_number'))
|
||||
|
||||
tags = []
|
||||
for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
|
||||
if not isinstance(genre, dict):
|
||||
continue
|
||||
genre_name = genre.get('name')
|
||||
if genre_name and isinstance(genre_name, compat_str):
|
||||
tags.append(genre_name)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in try_get(
|
||||
info, lambda x: x['content']['subtitles'], list) or []:
|
||||
if not isinstance(subtitle, dict):
|
||||
continue
|
||||
lang = subtitle.get('language')
|
||||
sub_url = url_or_none(subtitle.get('url'))
|
||||
if not lang or not isinstance(lang, compat_str) or not sub_url:
|
||||
continue
|
||||
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
|
||||
'url': sub_url
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'timestamp': timestamp,
|
||||
'creator': creator,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class PuhuTVSerieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
|
||||
IE_NAME = 'puhutv:serie'
|
||||
_TESTS = [{
|
||||
'url': 'https://puhutv.com/deniz-yildizi-detay',
|
||||
'info_dict': {
|
||||
'title': 'Deniz Yıldızı',
|
||||
'id': 'deniz-yildizi',
|
||||
},
|
||||
'playlist_mincount': 205,
|
||||
}, {
|
||||
# a film detail page which is using same url with serie page
|
||||
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_entries(self, seasons):
|
||||
for season in seasons:
|
||||
season_id = season.get('id')
|
||||
if not season_id:
|
||||
continue
|
||||
page = 1
|
||||
has_more = True
|
||||
while has_more is True:
|
||||
season = self._download_json(
|
||||
'https://galadriel.puhutv.com/seasons/%s' % season_id,
|
||||
season_id, 'Downloading page %s' % page, query={
|
||||
'page': page,
|
||||
'per': 40,
|
||||
})
|
||||
episodes = season.get('episodes')
|
||||
if isinstance(episodes, list):
|
||||
for ep in episodes:
|
||||
slug_path = str_or_none(ep.get('slugPath'))
|
||||
if not slug_path:
|
||||
continue
|
||||
video_id = str_or_none(int_or_none(ep.get('id')))
|
||||
yield self.url_result(
|
||||
'https://puhutv.com/%s' % slug_path,
|
||||
ie=PuhuTVIE.ie_key(), video_id=video_id,
|
||||
video_title=ep.get('name') or ep.get('eventLabel'))
|
||||
page += 1
|
||||
has_more = season.get('hasMore')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
urljoin(url, '/api/slug/%s-detay' % playlist_id),
|
||||
playlist_id)['data']
|
||||
|
||||
seasons = info.get('seasons')
|
||||
if seasons:
|
||||
return self.playlist_result(
|
||||
self._extract_entries(seasons), playlist_id, info.get('name'))
|
||||
|
||||
# For films, these are using same url with series
|
||||
video_id = info.get('slug') or info['assets'][0]['slug']
|
||||
return self.url_result(
|
||||
'https://puhutv.com/%s-izle' % video_id,
|
||||
PuhuTVIE.ie_key(), video_id)
|
||||
@@ -4,8 +4,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,13 +32,26 @@ class RadioJavanIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
download_host = self._download_json(
|
||||
'https://www.radiojavan.com/videos/video_host', video_id,
|
||||
data=urlencode_postdata({'id': video_id}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
}).get('host', 'https://host1.rjmusicmedia.com')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
|
||||
formats = []
|
||||
for format_id, _, video_path in re.findall(
|
||||
r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage):
|
||||
f = parse_resolution(format_id)
|
||||
f.update({
|
||||
'url': urljoin(download_host, video_path),
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
@@ -32,6 +32,9 @@ class RaiBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _extract_relinker_info(self, relinker_url, video_id):
|
||||
if not re.match(r'https?://', relinker_url):
|
||||
return {'formats': [{'url': relinker_url}]}
|
||||
|
||||
formats = []
|
||||
geoprotection = None
|
||||
is_live = None
|
||||
@@ -369,6 +372,10 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_content_id(self, content_id, url):
|
||||
|
||||
@@ -4,24 +4,37 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RayWenderlichIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
videos\.raywenderlich\.com/courses|
|
||||
(?:www\.)?raywenderlich\.com
|
||||
)/
|
||||
(?P<course_id>[^/]+)/lessons/(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'id': '248377018',
|
||||
'ext': 'mp4',
|
||||
'title': 'Testing In iOS Episode 1: Introduction',
|
||||
'title': 'Introduction',
|
||||
'description': 'md5:804d031b3efa9fcb49777d512d74f722',
|
||||
'timestamp': 1513906277,
|
||||
'upload_date': '20171222',
|
||||
'duration': 133,
|
||||
'uploader': 'Ray Wenderlich',
|
||||
'uploader_id': 'user3304672',
|
||||
@@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_video_id(data, lesson_id):
|
||||
if not data:
|
||||
return
|
||||
groups = try_get(data, lambda x: x['groups'], list) or []
|
||||
if not groups:
|
||||
return
|
||||
for group in groups:
|
||||
if not isinstance(group, dict):
|
||||
continue
|
||||
contents = try_get(data, lambda x: x['contents'], list) or []
|
||||
for content in contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
ordinal = int_or_none(content.get('ordinal'))
|
||||
if ordinal != lesson_id:
|
||||
continue
|
||||
video_id = content.get('identifier')
|
||||
if video_id:
|
||||
return compat_str(video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
display_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image', webpage, 'thumbnail')
|
||||
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
|
||||
info = {
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
|
||||
|
||||
if not vimeo_id:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
|
||||
'data collection', default='{}', group='data'),
|
||||
display_id, transform_source=unescapeHTML, fatal=False)
|
||||
video_id = self._extract_video_id(
|
||||
data, lesson_id) or self._search_regex(
|
||||
r'/videos/(\d+)/', thumbnail, 'video id')
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', default=None)
|
||||
if csrf_token:
|
||||
headers['X-CSRF-Token'] = csrf_token
|
||||
video = self._download_json(
|
||||
'https://videos.raywenderlich.com/api/v1/videos/%s.json'
|
||||
% video_id, display_id, headers=headers)['video']
|
||||
vimeo_id = video['clips'][0]['provider_id']
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'title': video.get('name'),
|
||||
'description': video.get('description') or video.get(
|
||||
'meta_description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('created_at')),
|
||||
})
|
||||
|
||||
return merge_dicts(info, self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id))
|
||||
|
||||
|
||||
class RayWenderlichCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
videos\.raywenderlich\.com/courses|
|
||||
(?:www\.)?raywenderlich\.com
|
||||
)/
|
||||
(?P<id>[^/]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
|
||||
'info_dict': {
|
||||
'title': 'Testing in iOS',
|
||||
'id': '105-testing-in-ios',
|
||||
'id': '3530-testing-in-ios',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}]
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RayWenderlichIE.suitable(url) else super(
|
||||
RayWenderlichCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
course_id = self._match_id(url)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
video_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_playlist = self._downloader.params.get('noplaylist')
|
||||
if no_playlist or smuggled_data.get('force_video', False):
|
||||
if no_playlist:
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
% course_id)
|
||||
|
||||
lesson_ids = set((lesson_id, ))
|
||||
for lesson in re.findall(
|
||||
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
|
||||
attrs = extract_attributes(lesson)
|
||||
if not attrs:
|
||||
continue
|
||||
lesson_url = attrs.get('href')
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = self._search_regex(
|
||||
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
|
||||
if not lesson_id:
|
||||
continue
|
||||
lesson_ids.add(lesson_id)
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
|
||||
entries = []
|
||||
for lesson_id in sorted(lesson_ids):
|
||||
lesson_urls = set()
|
||||
for lesson_url in re.findall(
|
||||
r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
|
||||
if lesson_url in lesson_urls:
|
||||
continue
|
||||
lesson_urls.add(lesson_url)
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
|
||||
ie=RayWenderlichIE.ie_key()))
|
||||
urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
|
||||
default=None)
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RedBullTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
|
||||
@@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
@@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..utils import ExtractorError
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# video_service_name = YOUTUBE
|
||||
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
|
||||
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
|
||||
'info_dict': {
|
||||
@@ -19,14 +20,18 @@ class SlidesLiveIE(InfoExtractor):
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}, {
|
||||
# video_service_name = youtube
|
||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
url, video_id, headers={'Accept': 'application/json'})
|
||||
service_name = video_data['video_service_name']
|
||||
if service_name == 'YOUTUBE':
|
||||
service_name = video_data['video_service_name'].lower()
|
||||
if service_name == 'youtube':
|
||||
yt_video_id = video_data['video_service_id']
|
||||
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
|
||||
else:
|
||||
|
||||
@@ -72,4 +72,7 @@ class StreamcloudIE(InfoExtractor):
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -7,8 +7,10 @@ from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '0de43ac406aa3e4ea74b66c9c7789b13',
|
||||
'md5': 'b0ce2b05ca215042124fbc9e3886493a',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
@@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):
|
||||
'uploader': 'Dan Dennett',
|
||||
'width': 853,
|
||||
'duration': 1308,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||
'md5': 'b899ac15e345fb39534d913f7606082b',
|
||||
'info_dict': {
|
||||
'id': 'tSVI8ta_P4w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4',
|
||||
'upload_date': '20140122',
|
||||
'uploader_id': 'TEDInstitute',
|
||||
'uploader': 'TED Institute',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# missing HTTP bitrates
|
||||
'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',
|
||||
'info_dict': {
|
||||
'id': '6069',
|
||||
'ext': 'mp4',
|
||||
'title': 'The beauty and power of algorithms',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'description': 'md5:734e352710fb00d840ab87ae31aaf688',
|
||||
'uploader': 'Vishal Sikka',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '71b3ab2f4233012dce09d515c9c39ce2',
|
||||
'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'mp4',
|
||||
@@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
'duration': 1128,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||
'info_dict': {
|
||||
@@ -92,17 +103,17 @@ class TEDIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube video
|
||||
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
||||
'add_ie': ['Youtube'],
|
||||
# no nativeDownloads
|
||||
'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',
|
||||
'info_dict': {
|
||||
'id': 'aFBIPO-P7LM',
|
||||
'id': '1792',
|
||||
'ext': 'mp4',
|
||||
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
||||
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
||||
'uploader': 'TEDx Talks',
|
||||
'uploader_id': 'TEDxTalks',
|
||||
'upload_date': '20111216',
|
||||
'title': 'The orchestra in my mouth',
|
||||
'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',
|
||||
'uploader': 'Tom Thum',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -161,27 +172,16 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
info = self._extract_info(webpage)
|
||||
|
||||
talk_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
||||
dict) or info['talks'][0]
|
||||
data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info
|
||||
talk_info = data['talks'][0]
|
||||
|
||||
title = talk_info['title'].strip()
|
||||
|
||||
external = talk_info.get('external')
|
||||
if external:
|
||||
service = external['service']
|
||||
self.to_screen('Found video from %s' % service)
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
native_downloads = try_get(
|
||||
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
||||
dict) or talk_info['nativeDownloads']
|
||||
talk_info,
|
||||
(lambda x: x['downloads']['nativeDownloads'],
|
||||
lambda x: x['nativeDownloads']),
|
||||
dict) or {}
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
@@ -196,10 +196,24 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
player_talk = talk_info['player_talks'][0]
|
||||
|
||||
external = player_talk.get('external')
|
||||
if isinstance(external, dict):
|
||||
service = external.get('service')
|
||||
if isinstance(service, compat_str):
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in resources_.items():
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
@@ -228,8 +242,12 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
stream_url = url_or_none(resources.get('stream'))
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
stream_url, video_name, 'mp4', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
@@ -239,9 +257,13 @@ class TEDIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
|
||||
if not bitrate:
|
||||
continue
|
||||
bitrate_url = re.sub(r'\d+k', bitrate, http_url)
|
||||
if not self._is_valid_url(
|
||||
bitrate_url, video_name, '%s bitrate' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k', bitrate, http_url),
|
||||
'url': bitrate_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
@@ -267,7 +289,11 @@ class TEDIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||
'formats': formats,
|
||||
'duration': talk_info.get('duration'),
|
||||
'duration': float_or_none(talk_info.get('duration')),
|
||||
'view_count': int_or_none(data.get('viewed_count')),
|
||||
'comment_count': int_or_none(
|
||||
try_get(data, lambda x: x['comments']['count'])),
|
||||
'tags': try_get(talk_info, lambda x: x['tags'], list),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, talk_info):
|
||||
|
||||
@@ -1,26 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mitele import MiTeleBaseIE
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class TelecincoIE(MiTeleBaseIE):
|
||||
class TelecincoIE(InfoExtractor):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'id': '1876350223',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'duration': 662,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
|
||||
'duration': 662,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@@ -30,7 +47,7 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@@ -50,17 +67,90 @@ class TelecincoIE(MiTeleBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title')
|
||||
info = self._get_player_info(url, webpage)
|
||||
article = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
||||
webpage, 'article'), display_id)['article']
|
||||
title = article.get('title')
|
||||
description = clean_html(article.get('leadParagraph'))
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
for p in article.get('body', []):
|
||||
content = p.get('content')
|
||||
if p.get('type') != 'video' or not content:
|
||||
continue
|
||||
entries.append(self._parse_content(content, url))
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')), title, description)
|
||||
content = article['opening']['content']
|
||||
info = self._parse_content(content, url)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['og:description', 'twitter:description'],
|
||||
webpage, 'title', fatal=False),
|
||||
'description': description,
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -310,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
|
||||
_TESTS = [{
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
@@ -327,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||
|
||||
@@ -32,12 +32,12 @@ class TVPlayIE(InfoExtractor):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
|
||||
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
play\.nova(?:tv)?\.bg/programi
|
||||
)
|
||||
/(?:[^/]+/)+
|
||||
)
|
||||
@@ -203,10 +203,18 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# views is null
|
||||
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
|
||||
@@ -288,6 +296,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
@@ -447,3 +456,102 @@ class ViafreeIE(InfoExtractor):
|
||||
'skip_rtmp': True,
|
||||
}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class TVPlayHomeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
|
||||
'info_dict': {
|
||||
'id': '366367',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aferistai',
|
||||
'description': 'Aferistai. Kalėdinė pasaka.',
|
||||
'series': 'Aferistai [N-7]',
|
||||
'season': '1 sezonas',
|
||||
'season_number': 1,
|
||||
'duration': 464,
|
||||
'timestamp': 1394209658,
|
||||
'upload_date': '20140307',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
|
||||
default=None)
|
||||
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='value') or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._og_search_title(
|
||||
webpage)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage,
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', default=None, group='url') or self._html_search_meta(
|
||||
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
|
||||
webpage)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
season = self._search_regex(
|
||||
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'season', default=None, group='value')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
|
||||
default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
@@ -26,7 +26,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@@ -37,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||
_LOGIN_URL = 'https://www.twitch.tv/login'
|
||||
_CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
|
||||
_LOGIN_FORM_URL = 'https://www.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/login'
|
||||
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@@ -77,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
|
||||
page_url = urlh.geturl()
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
|
||||
'post url', default=page_url, group='url')
|
||||
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||
post_url = urljoin(page_url, post_url)
|
||||
|
||||
headers = {'Referer': page_url}
|
||||
headers = {
|
||||
'Referer': page_url,
|
||||
'Origin': page_url,
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
post_url, None, note,
|
||||
data=urlencode_postdata(form),
|
||||
headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None)
|
||||
fail(response.get('message') or response['errors'][0])
|
||||
raise
|
||||
response = self._download_json(
|
||||
post_url, None, note, data=json.dumps(form).encode(),
|
||||
headers=headers, expected_status=400)
|
||||
error = response.get('error_description') or response.get('error_code')
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
if 'Authenticated successfully' in response.get('message', ''):
|
||||
return None, None
|
||||
@@ -105,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
headers=headers)
|
||||
|
||||
login_page, handle = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
self._LOGIN_FORM_URL, None, 'Downloading login page')
|
||||
|
||||
# Some TOR nodes and public proxies are blocked completely
|
||||
if 'blacklist_message' in login_page:
|
||||
@@ -115,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
login_page, handle, 'Logging in', {
|
||||
'username': username,
|
||||
'password': password,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
|
||||
# Successful login
|
||||
@@ -240,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@@ -296,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/northernlion/video/291940395',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
@@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vidzi.si/rph9gztxj1et.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidzi.nu/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
99
youtube_dl/extractor/viqeo.py
Normal file
99
youtube_dl/extractor/viqeo.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ViqeoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
viqeo:|
|
||||
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
|
||||
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
|
||||
'md5': 'a169dd1a6426b350dca4296226f21e76',
|
||||
'info_dict': {
|
||||
'id': 'cde96f09d25f39bee837',
|
||||
'ext': 'mp4',
|
||||
'title': 'cde96f09d25f39bee837',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 76,
|
||||
},
|
||||
}, {
|
||||
'url': 'viqeo:cde96f09d25f39bee837',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for media_file in data['mediaFiles']:
|
||||
if not isinstance(media_file, dict):
|
||||
continue
|
||||
media_url = url_or_none(media_file.get('url'))
|
||||
if not media_url or not media_url.startswith(('http', '//')):
|
||||
continue
|
||||
media_type = str_or_none(media_file.get('type'))
|
||||
if not media_type:
|
||||
continue
|
||||
media_kind = media_type.split('/')[0].lower()
|
||||
f = {
|
||||
'url': media_url,
|
||||
'width': int_or_none(media_file.get('width')),
|
||||
'height': int_or_none(media_file.get('height')),
|
||||
}
|
||||
format_id = str_or_none(media_file.get('quality'))
|
||||
if media_kind == 'image':
|
||||
f['id'] = format_id
|
||||
thumbnails.append(f)
|
||||
elif media_kind in ('video', 'audio'):
|
||||
is_audio = media_kind == 'audio'
|
||||
f.update({
|
||||
'format_id': 'audio' if is_audio else format_id,
|
||||
'fps': int_or_none(media_file.get('fps')),
|
||||
'vcodec': 'none' if is_audio else None,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(data.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -106,10 +107,10 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'ProtivoGunz - Хуёвая песня',
|
||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||
'uploader_id': '-77521',
|
||||
'duration': 195,
|
||||
'timestamp': 1329060660,
|
||||
'timestamp': 1329049880,
|
||||
'upload_date': '20120212',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -118,12 +119,12 @@ class VKIE(VKBaseIE):
|
||||
'info_dict': {
|
||||
'id': '165548505',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Tom Cruise',
|
||||
'title': 'No name',
|
||||
'uploader': 'Tom Cruise',
|
||||
'uploader_id': '205387401',
|
||||
'duration': 9,
|
||||
'timestamp': 1374374880,
|
||||
'upload_date': '20130721',
|
||||
'view_count': int,
|
||||
'timestamp': 1374364108,
|
||||
'upload_date': '20130720',
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -207,10 +208,10 @@ class VKIE(VKBaseIE):
|
||||
'id': 'V3K4mi0SYkc',
|
||||
'ext': 'webm',
|
||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||
'description': 'md5:d9903938abdc74c738af77f527ca0596',
|
||||
'duration': 178,
|
||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||
'duration': 179,
|
||||
'upload_date': '20130116',
|
||||
'uploader': "Children's Joy Foundation",
|
||||
'uploader': "Children's Joy Foundation Inc.",
|
||||
'uploader_id': 'thecjf',
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -222,6 +223,7 @@ class VKIE(VKBaseIE):
|
||||
'id': 'k3lz2cmXyRuJQSjGHUv',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
|
||||
# TODO: fix test by fixing dailymotion description extraction
|
||||
'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
|
||||
'uploader': 'AniLibria.Tv',
|
||||
'upload_date': '20160914',
|
||||
@@ -241,9 +243,12 @@ class VKIE(VKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'S-Dance, репетиции к The way show',
|
||||
'uploader': 'THE WAY SHOW | 17 апреля',
|
||||
'timestamp': 1454870100,
|
||||
'uploader_id': '-110305615',
|
||||
'timestamp': 1454859345,
|
||||
'upload_date': '20160207',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -296,7 +301,7 @@ class VKIE(VKBaseIE):
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
list_id = mobj.group('list_id')
|
||||
if list_id:
|
||||
@@ -346,6 +351,9 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>This video is no longer available, because its author has been blocked.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
|
||||
r'<!>This video is no longer available, because it has been deleted.':
|
||||
'Video %s is no longer available, because it has been deleted.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
@@ -394,7 +402,8 @@ class VKIE(VKBaseIE):
|
||||
if not data:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<!json>\s*({.+?})\s*<!>', info_page, 'json', default='{}'),
|
||||
[r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
|
||||
info_page, 'json', default='{}'),
|
||||
video_id)
|
||||
if data:
|
||||
data = data['player']['params'][0]
|
||||
@@ -416,7 +425,7 @@ class VKIE(VKBaseIE):
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
|
||||
'upload date', fatal=False))
|
||||
'upload date', default=None)) or int_or_none(data.get('date'))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
|
||||
@@ -454,9 +463,12 @@ class VKIE(VKBaseIE):
|
||||
'title': title,
|
||||
'thumbnail': data.get('jpg'),
|
||||
'uploader': data.get('md_author'),
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'duration': data.get('duration'),
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': int_or_none(data.get('liked')),
|
||||
'dislike_count': int_or_none(data.get('nolikes')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
|
||||
class VRVIE(VRVBaseIE):
|
||||
IE_NAME = 'vrv'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
|
||||
'info_dict': {
|
||||
'id': 'GR9PNZ396',
|
||||
@@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
|
||||
if not url or stream_format not in ('hls', 'dash'):
|
||||
return []
|
||||
stream_id = hardsub_lang or audio_lang
|
||||
format_id = '%s-%s' % (stream_format, stream_id)
|
||||
if stream_format == 'hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s m3u8 information' % stream_id,
|
||||
fatal=False)
|
||||
elif stream_format == 'dash':
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
url, video_id, mpd_id=format_id,
|
||||
note='Downloading %s MPD information' % stream_id,
|
||||
fatal=False)
|
||||
if audio_lang:
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_lang
|
||||
return adaptive_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE):
|
||||
for stream_type, streams in streams_json.get('streams', {}).items():
|
||||
if stream_type in ('adaptive_hls', 'adaptive_dash'):
|
||||
for stream in streams.values():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_id = stream.get('hardsub_locale') or audio_locale
|
||||
format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
|
||||
if stream_type == 'adaptive_hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s m3u8 information' % stream_id,
|
||||
fatal=False)
|
||||
else:
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream_url, video_id, mpd_id=format_id,
|
||||
note='Downloading %s MPD information' % stream_id,
|
||||
fatal=False)
|
||||
if audio_locale:
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
formats.extend(adaptive_formats)
|
||||
formats.extend(self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream_type.split('_')[1],
|
||||
audio_locale, stream.get('hardsub_locale')))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
||||
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source = (self._parse_json(
|
||||
player_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json,
|
||||
fatal=False) or {}).get('source') or {}
|
||||
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
|
||||
'player config', default='{}', group='data'),
|
||||
video_id, transform_source=unescapeHTML, fatal=False)
|
||||
|
||||
if not player_config:
|
||||
player_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
|
||||
source = player_config.get('source') or {}
|
||||
|
||||
video_id = compat_str(source.get('videoId') or video_id)
|
||||
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class WebOfStoriesIE(InfoExtractor):
|
||||
@@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
|
||||
for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
|
||||
self.url_result(
|
||||
'http://www.webofstories.com/play/%s' % video_id,
|
||||
'WebOfStories', video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
|
||||
]
|
||||
|
||||
title = self._search_regex(
|
||||
|
||||
@@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'powerwatch\.pw', 'PowerWatch'),
|
||||
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'vidto\.me', 'Vidto'),
|
||||
(r'vidto\.(?:me|se)', 'Vidto'),
|
||||
(r'streamin\.to', 'Streamin.To'),
|
||||
(r'xvidstage\.com', 'XVIDSTAGE'),
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
@@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidto.se/1tx1pf6t12cg.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
||||
41
youtube_dl/extractor/yourporn.py
Normal file
41
youtube_dl/extractor/yourporn.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class YourPornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
|
||||
_TEST = {
|
||||
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
|
||||
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
|
||||
'info_dict': {
|
||||
'id': '57ffcb2e1179b',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = urljoin(url, self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||
group='data'),
|
||||
video_id)[video_id])
|
||||
|
||||
title = (self._search_regex(
|
||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)).strip()
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
@@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
@@ -1178,7 +1178,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
@@ -2123,7 +2125,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
|
||||
)
|
||||
(
|
||||
(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
|
||||
(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
|
||||
# Top tracks, they can also include dots
|
||||
|(?:MC)[\w\.]*
|
||||
)
|
||||
@@ -2261,6 +2263,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# music album playlist
|
||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
||||
@@ -49,7 +49,6 @@ from .compat import (
|
||||
compat_os_name,
|
||||
compat_parse_qs,
|
||||
compat_shlex_quote,
|
||||
compat_socket_create_connection,
|
||||
compat_str,
|
||||
compat_struct_pack,
|
||||
compat_struct_unpack,
|
||||
@@ -82,7 +81,7 @@ def register_socks_protocols():
|
||||
compiled_regex_type = type(re.compile(''))
|
||||
|
||||
std_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
|
||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
@@ -882,13 +881,51 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
kwargs['strict'] = True
|
||||
hc = http_class(*args, **compat_kwargs(kwargs))
|
||||
source_address = ydl_handler._params.get('source_address')
|
||||
|
||||
if source_address is not None:
|
||||
# This is to workaround _create_connection() from socket where it will try all
|
||||
# address data from getaddrinfo() including IPv6. This filters the result from
|
||||
# getaddrinfo() based on the source_address value.
|
||||
# This is based on the cpython socket.create_connection() function.
|
||||
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
|
||||
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
|
||||
host, port = address
|
||||
err = None
|
||||
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
|
||||
ip_addrs = [addr for addr in addrs if addr[0] == af]
|
||||
if addrs and not ip_addrs:
|
||||
ip_version = 'v4' if af == socket.AF_INET else 'v6'
|
||||
raise socket.error(
|
||||
"No remote IP%s addresses available for connect, can't use '%s' as source address"
|
||||
% (ip_version, source_address[0]))
|
||||
for res in ip_addrs:
|
||||
af, socktype, proto, canonname, sa = res
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||
sock.settimeout(timeout)
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
err = None # Explicitly break reference cycle
|
||||
return sock
|
||||
except socket.error as _:
|
||||
err = _
|
||||
if sock is not None:
|
||||
sock.close()
|
||||
if err is not None:
|
||||
raise err
|
||||
else:
|
||||
raise socket.error('getaddrinfo returns an empty list')
|
||||
if hasattr(hc, '_create_connection'):
|
||||
hc._create_connection = _create_connection
|
||||
sa = (source_address, 0)
|
||||
if hasattr(hc, 'source_address'): # Python 2.7+
|
||||
hc.source_address = sa
|
||||
else: # Python 2.6
|
||||
def _hc_connect(self, *args, **kwargs):
|
||||
sock = compat_socket_create_connection(
|
||||
sock = _create_connection(
|
||||
(self.host, self.port), self.timeout, sa)
|
||||
if is_https:
|
||||
self.sock = ssl.wrap_socket(
|
||||
@@ -3569,7 +3606,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
setattr(self, '%s_open' % type,
|
||||
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
|
||||
meth(r, proxy, type))
|
||||
return compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||
compat_urllib_request.ProxyHandler.__init__(self, proxies)
|
||||
|
||||
def proxy_open(self, req, proxy, type):
|
||||
req_proxy = req.headers.get('Ytdl-request-proxy')
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.07.21'
|
||||
__version__ = '2018.09.08'
|
||||
|
||||
Reference in New Issue
Block a user