Compare commits
29 Commits
2017.02.14
...
2017.02.17
Author | SHA1 | Date | |
---|---|---|---|
28e35f5070 | |||
cf3704c132 | |||
2c1f442c2b | |||
bad4ccdb5d | |||
db76c30c6e | |||
c2bde5d081 | |||
90fad0e74c | |||
d94badc755 | |||
fef51645d6 | |||
4cead6a614 | |||
a4a554a793 | |||
b898f0a173 | |||
2480b056c1 | |||
3aa25395aa | |||
eafaeb226a | |||
de4d378c0c | |||
099cfdb770 | |||
398dea3210 | |||
db13c16ef8 | |||
1bd05345ea | |||
3021cf83b7 | |||
04a741232f | |||
43a3d9edfc | |||
d31aa74fdb | |||
6092ccd058 | |||
22ce9ad2bd | |||
9a372f14b4 | |||
5cb2d36c82 | |||
fcca0d53a8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.02.14
|
||||
[debug] youtube-dl version 2017.02.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -11,8 +11,6 @@ sudo: false
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
before_script:
|
||||
- chmod +x ./devscripts/run_tests.sh
|
||||
script: ./devscripts/run_tests.sh
|
||||
notifications:
|
||||
email:
|
||||
|
30
ChangeLog
30
ChangeLog
@ -1,3 +1,33 @@
|
||||
version 2017.02.17
|
||||
|
||||
Extractors
|
||||
* [heise] Improve extraction (#9725)
|
||||
* [ellentv] Improve (#11653)
|
||||
* [openload] Fix extraction (#10408, #12002)
|
||||
+ [theplatform] Recognize URLs with whitespaces (#12044)
|
||||
* [einthusan] Relax URL regular expression (#12141, #12159)
|
||||
+ [generic] Support complex JWPlayer embedded videos (#12030)
|
||||
* [elpais] Improve extraction (#12139)
|
||||
|
||||
|
||||
version 2017.02.16
|
||||
|
||||
Core
|
||||
+ [utils] Add support for quoted string literals in --match-filter (#8050,
|
||||
#12142, #12144)
|
||||
|
||||
Extractors
|
||||
* [ceskatelevize] Lower priority for audio description sources (#12119)
|
||||
* [amcnetworks] Fix extraction (#12127)
|
||||
* [pinkbike] Fix uploader extraction (#12054)
|
||||
+ [onetpl] Add support for businessinsider.com.pl and plejada.pl
|
||||
+ [onetpl] Add support for onet.pl (#10507)
|
||||
+ [onetmvp] Add shortcut extractor
|
||||
+ [vodpl] Add support for vod.pl (#12122)
|
||||
+ [pornhub] Extract video URL from tv platform site (#12007, #12129)
|
||||
+ [ceskatelevize] Extract DASH formats (#12119, #12133)
|
||||
|
||||
|
||||
version 2017.02.14
|
||||
|
||||
Core
|
||||
|
0
devscripts/run_tests.sh
Normal file → Executable file
0
devscripts/run_tests.sh
Normal file → Executable file
@ -546,8 +546,10 @@
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **OnDemandKorea**
|
||||
- **onet.pl**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnetMVP**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
@ -900,6 +902,7 @@
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -540,10 +541,10 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertEqual(ydl._format_note({}), '')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'vbr': 10,
|
||||
}), '^\s*10k$')
|
||||
}), r'^\s*10k$')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'fps': 30,
|
||||
}), '^30fps$')
|
||||
}), r'^30fps$')
|
||||
|
||||
def test_postprocessors(self):
|
||||
filename = 'post-processor-testfile.mp4'
|
||||
@ -606,6 +607,8 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
'playlist_id': '42',
|
||||
'uploader': "變態妍字幕版 太妍 тест",
|
||||
'creator': "тест ' 123 ' тест--",
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
@ -616,6 +619,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
'playlist_id': '43',
|
||||
'uploader': "тест 123",
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
@ -656,6 +660,26 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('creator = "тест \' 123 \' тест--"')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, [])
|
||||
|
||||
def test_playlist_items_selection(self):
|
||||
entries = [{
|
||||
'id': compat_str(i),
|
||||
|
@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
media_url = self._search_regex(
|
||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
r'link\.theplatform\.com/s/([^?]+)',
|
||||
media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
auth_required = self._search_regex(
|
||||
r'window\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
requestor_id = self._search_regex(
|
||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
|
@ -1,13 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
@ -21,10 +22,10 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '61924494876951776',
|
||||
'id': '61924494877246241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace',
|
||||
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
|
||||
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
@ -114,6 +115,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
entries = []
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
@ -121,9 +125,14 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id)
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
if not playlistpage:
|
||||
continue
|
||||
|
||||
playlist_url = playlistpage['url']
|
||||
if playlist_url == 'error_region':
|
||||
@ -135,19 +144,38 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id)['playlist']
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
|
||||
playlist = playlist.get('playlist')
|
||||
if not isinstance(playlist, list):
|
||||
continue
|
||||
|
||||
playlist_len = len(playlist)
|
||||
|
||||
entries = []
|
||||
for item in playlist:
|
||||
for num, item in enumerate(playlist):
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item['streamUrls'].items():
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
fatal=False))
|
||||
self._sort_formats(formats)
|
||||
m3u8_id='hls-%s' % format_id, fatal=False)
|
||||
else:
|
||||
stream_formats = self._extract_mpd_formats(
|
||||
stream_url, playlist_id,
|
||||
mpd_id='dash-%s' % format_id, fatal=False)
|
||||
# See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
|
||||
if format_id == 'audioDescription':
|
||||
for f in stream_formats:
|
||||
f['source_preference'] = -10
|
||||
formats.extend(stream_formats)
|
||||
|
||||
if user_agent and len(entries) == playlist_len:
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
title = item['title']
|
||||
@ -179,6 +207,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
})
|
||||
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
|
@ -40,6 +40,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@ -2073,6 +2074,123 @@ class InfoExtractor(object):
|
||||
})
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
def _find_jwplayer_data(webpage):
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like 1080p.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
||||
'height', default=None))
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
|
@ -18,8 +18,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
@ -29,7 +29,10 @@ class EinthusanIE(InfoExtractor):
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
|
@ -1,13 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import NO_DEFAULT
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor):
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
|
||||
|
||||
|
||||
class EllenTVClipsIE(InfoExtractor):
|
||||
@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
'id': 'meryl-streep-vanessa-hudgens',
|
||||
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist = self._extract_playlist(webpage)
|
||||
playlist = self._extract_playlist(webpage, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
'entries': self._extract_entries(playlist)
|
||||
}
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
def _extract_playlist(self, webpage, playlist_id):
|
||||
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||
try:
|
||||
return json.loads('[{' + json_string + '}]')
|
||||
except ValueError as ve:
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
return self._parse_json('[{' + json_string + '}]', playlist_id)
|
||||
|
||||
def _extract_entries(self, playlist):
|
||||
return [
|
||||
self.url_result(
|
||||
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
|
||||
'Kaltura')
|
||||
KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
|
||||
for item in playlist]
|
||||
|
@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor):
|
||||
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
||||
'upload_date': '20170127',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
|
||||
'info_dict': {
|
||||
'id': '1487062137_075943',
|
||||
'ext': 'mp4',
|
||||
'title': 'Disyuntivas',
|
||||
'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
|
||||
'upload_date': '20170214',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor):
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
webpage, 'thumbnail URL', default=None)
|
||||
thumbnail = (
|
||||
None if thumbnail_suffix is None
|
||||
else prefix + thumbnail_suffix)
|
||||
else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
|
||||
title = self._html_search_regex(
|
||||
(r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
|
||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
|
||||
webpage, 'title')
|
||||
(r"tituloVideo\s*=\s*'([^']+)'",
|
||||
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||
r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||
webpage, 'upload date', default=None) or self._html_search_meta(
|
||||
|
@ -694,6 +694,8 @@ from .ondemandkorea import OnDemandKoreaIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
OnetMVPIE,
|
||||
OnetPlIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
@ -1147,6 +1149,7 @@ from .vlive import (
|
||||
VLiveChannelIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
|
@ -20,6 +20,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
is_html,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@ -961,6 +962,16 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Complex jwplayer
|
||||
{
|
||||
'url': 'http://www.indiedb.com/games/king-machine/videos',
|
||||
'info_dict': {
|
||||
'id': 'videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'king machine trailer 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@ -1490,7 +1501,12 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VideoPressIE.ie_key()],
|
||||
}
|
||||
},
|
||||
{
|
||||
# ThePlatform embedded with whitespaces in URLs
|
||||
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
|
||||
'only_matching': True,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2488,6 +2504,15 @@ class GenericIE(InfoExtractor):
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
jwplayer_data_str = self._find_jwplayer_data(webpage)
|
||||
if jwplayer_data_str:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(
|
||||
jwplayer_data_str, video_id, transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(jwplayer_data, video_id)
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@ -6,59 +6,58 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class HeiseIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?heise\.de/video/artikel/
|
||||
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
||||
'''
|
||||
_TEST = {
|
||||
'url': (
|
||||
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
||||
),
|
||||
_VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
|
||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||
'info_dict': {
|
||||
'id': '2404147',
|
||||
'ext': 'mp4',
|
||||
'title': (
|
||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||
),
|
||||
'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
|
||||
'format_id': 'mp4_720p',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
|
||||
'thumbnail': r're:^https?://.*/gallery/$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
container_id = self._search_regex(
|
||||
r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
|
||||
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
|
||||
webpage, 'container ID')
|
||||
sequenz_id = self._search_regex(
|
||||
r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
|
||||
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
|
||||
webpage, 'sequenz ID')
|
||||
data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
|
||||
doc = self._download_xml(data_url, video_id)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_meta('date', webpage)),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
title = self._html_search_meta('fulltitle', webpage, default=None)
|
||||
if not title or title == "c't":
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
|
||||
webpage, 'title')
|
||||
|
||||
title = self._html_search_meta('fulltitle', webpage)
|
||||
if title:
|
||||
info['title'] = title
|
||||
else:
|
||||
info['title'] = self._og_search_title(webpage)
|
||||
doc = self._download_xml(
|
||||
'http://www.heise.de/videout/feed', video_id, query={
|
||||
'container': container_id,
|
||||
'sequenz': sequenz_id,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
|
||||
@ -74,6 +73,18 @@ class HeiseIE(InfoExtractor):
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
return info
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
|
||||
self._og_search_thumbnail(webpage)),
|
||||
'timestamp': parse_iso8601(
|
||||
self._html_search_meta('date', webpage)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -4,139 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _find_jwplayer_data(webpage):
|
||||
# TODO: Merge this with JWPlayer-related codes in generic.py
|
||||
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like 1080p.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
||||
'height', default=None))
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class JWPlatformIE(JWPlatformBaseIE):
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
|
@ -1,14 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class OnDemandKoreaIE(JWPlatformBaseIE):
|
||||
class OnDemandKoreaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
|
||||
|
@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
|
||||
def _extract_from_id(self, video_id, webpage):
|
||||
def _extract_from_id(self, video_id, webpage=None):
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor):
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
title = (self._og_search_title(
|
||||
webpage, default=None) if webpage else None) or meta['title']
|
||||
description = (self._og_search_description(
|
||||
webpage, default=None) if webpage else None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class OnetMVPIE(OnetBaseIE):
|
||||
_VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'onetmvp:381027.1509591944',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_from_id(self._match_id(url))
|
||||
|
||||
|
||||
class OnetIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.tv'
|
||||
@ -167,3 +181,44 @@ class OnetChannelIE(OnetBaseIE):
|
||||
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
||||
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
||||
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
||||
|
||||
|
||||
class OnetPlIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.pl'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
|
||||
'md5': 'b94021eb56214c3969380388b6e73cb0',
|
||||
'info_dict': {
|
||||
'id': '1561707.1685479',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
|
||||
'description': 'md5:61fb0740084d2d702ea96512a03585b4',
|
||||
'upload_date': '20170214',
|
||||
'timestamp': 1487078046,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mvp_id = self._search_regex(
|
||||
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
||||
|
||||
return self.url_result(
|
||||
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||
|
@ -75,17 +75,17 @@ class OpenloadIE(InfoExtractor):
|
||||
'<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
|
||||
webpage, 'openload ID')
|
||||
|
||||
first_three_chars = int(float(ol_id[0:][:3]))
|
||||
fifth_char = int(float(ol_id[3:5]))
|
||||
urlcode = ''
|
||||
num = 5
|
||||
first_two_chars = int(float(ol_id[0:][:2]))
|
||||
urlcode = []
|
||||
num = 2
|
||||
|
||||
while num < len(ol_id):
|
||||
urlcode += compat_chr(int(float(ol_id[num:][:3])) +
|
||||
first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
|
||||
key = int(float(ol_id[num + 3:][:2]))
|
||||
urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars)))
|
||||
num += 5
|
||||
|
||||
video_url = 'https://openload.co/stream/' + urlcode
|
||||
video_url = 'https://openload.co/stream/' + ''.join(
|
||||
[value for _, value in sorted(urlcode, key=lambda x: x[0])])
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
|
@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor):
|
||||
'video:duration', webpage, 'duration'))
|
||||
|
||||
uploader = self._search_regex(
|
||||
r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
|
||||
'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="fullTime"[^>]+title="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
@ -2,27 +2,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import os
|
||||
# import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
# compat_urllib_parse_unquote,
|
||||
# compat_urllib_parse_unquote_plus,
|
||||
# compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
# sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
# from ..aes import (
|
||||
# aes_decrypt_text
|
||||
# )
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = sanitized_Request(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
def dl_webpage(platform):
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id, headers={
|
||||
'Cookie': 'age_verified=1; platform=%s' % platform,
|
||||
})
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
|
||||
'video url', group='url')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
title = self._html_search_meta(
|
||||
title = title or self._html_search_meta(
|
||||
'twitter:title', webpage, default=None) or self._search_regex(
|
||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||
@ -156,48 +169,6 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_variables = {}
|
||||
for video_variablename, quote, video_variable in re.findall(
|
||||
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
|
||||
video_variables[video_variablename] = video_variable
|
||||
|
||||
video_urls = []
|
||||
for encoded_video_url in re.findall(
|
||||
r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
|
||||
for varname, varval in video_variables.items():
|
||||
encoded_video_url = encoded_video_url.replace(varname, varval)
|
||||
video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
|
||||
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = '-'.join(format)
|
||||
|
||||
m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
|
||||
if m is None:
|
||||
height = None
|
||||
tbr = None
|
||||
else:
|
||||
height = int(m.group('height'))
|
||||
tbr = int(m.group('tbr'))
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'tbr': tbr,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
page_params = self._parse_json(self._search_regex(
|
||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||
webpage, 'page parameters', group='data', default='{}'),
|
||||
@ -209,6 +180,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@ -217,7 +189,7 @@ class PornHubIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
# 'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
|
@ -2,13 +2,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class PornoXOIE(JWPlatformBaseIE):
|
||||
class PornoXOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
|
||||
|
@ -2,11 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class RENTVIE(JWPlatformBaseIE):
|
||||
class RENTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://ren.tv/video/epizod/118577',
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
get_element_by_class,
|
||||
@ -11,7 +11,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class RudoIE(JWPlatformBaseIE):
|
||||
class RudoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
|
@ -1,11 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class ScreencastOMaticIE(JWPlatformBaseIE):
|
||||
class ScreencastOMaticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
@ -14,7 +14,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(JWPlatformBaseIE):
|
||||
class SendtoNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
|
@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
# Are whitesapces ignored in URLs?
|
||||
# https://github.com/rg3/youtube-dl/issues/12044
|
||||
matches = re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
if matches:
|
||||
return list(zip(*matches))[1]
|
||||
return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
|
||||
|
||||
@staticmethod
|
||||
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||
|
@ -3,11 +3,11 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class ThisAVIE(JWPlatformBaseIE):
|
||||
class ThisAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
|
||||
|
@ -1,7 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
@ -9,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class TVNoeIE(JWPlatformBaseIE):
|
||||
class TVNoeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvnoe.cz/video/10362',
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
js_to_json,
|
||||
@ -12,7 +12,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class VidziIE(JWPlatformBaseIE):
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
|
32
youtube_dl/extractor/vodpl.py
Normal file
32
youtube_dl/extractor/vodpl.py
Normal file
@ -0,0 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .onet import OnetBaseIE
|
||||
|
||||
|
||||
class VODPlIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
|
||||
'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
|
||||
'info_dict': {
|
||||
'id': '3ep3jns',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chłopaki nie płaczą',
|
||||
'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
|
||||
'timestamp': 1463415154,
|
||||
'duration': 5765,
|
||||
'upload_date': '20160516',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
|
||||
info_dict['id'] = video_id
|
||||
return info_dict
|
@ -1,10 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
|
||||
|
||||
class WimpIE(JWPlatformBaseIE):
|
||||
class WimpIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.wimp.com/maru-is-exhausted/',
|
||||
|
@ -2383,6 +2383,7 @@ def _match_one(filter_part, dct):
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?:
|
||||
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
||||
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
||||
)
|
||||
\s*$
|
||||
@ -2391,7 +2392,8 @@ def _match_one(filter_part, dct):
|
||||
if m:
|
||||
op = COMPARISON_OPERATORS[m.group('op')]
|
||||
actual_value = dct.get(m.group('key'))
|
||||
if (m.group('strval') is not None or
|
||||
if (m.group('quotedstrval') is not None or
|
||||
m.group('strval') is not None or
|
||||
# If the original field is a string and matching comparisonvalue is
|
||||
# a number we should respect the origin of the original field
|
||||
# and process comparison value as a string (see
|
||||
@ -2401,7 +2403,10 @@ def _match_one(filter_part, dct):
|
||||
if m.group('op') not in ('=', '!='):
|
||||
raise ValueError(
|
||||
'Operator %s does not support string values!' % m.group('op'))
|
||||
comparison_value = m.group('strval') or m.group('intval')
|
||||
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
|
||||
quote = m.group('quote')
|
||||
if quote is not None:
|
||||
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
||||
else:
|
||||
try:
|
||||
comparison_value = int(m.group('intval'))
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.02.14'
|
||||
__version__ = '2017.02.17'
|
||||
|
Reference in New Issue
Block a user