Compare commits
17 Commits
2017.02.14
...
2017.02.16
Author | SHA1 | Date | |
---|---|---|---|
2480b056c1 | |||
3aa25395aa | |||
eafaeb226a | |||
de4d378c0c | |||
099cfdb770 | |||
398dea3210 | |||
db13c16ef8 | |||
1bd05345ea | |||
3021cf83b7 | |||
04a741232f | |||
43a3d9edfc | |||
d31aa74fdb | |||
6092ccd058 | |||
22ce9ad2bd | |||
9a372f14b4 | |||
5cb2d36c82 | |||
fcca0d53a8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.02.14
|
[debug] youtube-dl version 2017.02.16
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -11,8 +11,6 @@ sudo: false
|
|||||||
env:
|
env:
|
||||||
- YTDL_TEST_SET=core
|
- YTDL_TEST_SET=core
|
||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
before_script:
|
|
||||||
- chmod +x ./devscripts/run_tests.sh
|
|
||||||
script: ./devscripts/run_tests.sh
|
script: ./devscripts/run_tests.sh
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
|||||||
|
version 2017.02.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add support for quoted string literals in --match-filter (#8050,
|
||||||
|
#12142, #12144)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [ceskatelevize] Lower priority for audio description sources (#12119)
|
||||||
|
* [amcnetworks] Fix extraction (#12127)
|
||||||
|
* [pinkbike] Fix uploader extraction (#12054)
|
||||||
|
+ [onetpl] Add support for businessinsider.com.pl and plejada.pl
|
||||||
|
+ [onetpl] Add support for onet.pl (#10507)
|
||||||
|
+ [onetmvp] Add shortcut extractor
|
||||||
|
+ [vodpl] Add support for vod.pl (#12122)
|
||||||
|
+ [pornhub] Extract video URL from tv platform site (#12007, #12129)
|
||||||
|
+ [ceskatelevize] Extract DASH formats (#12119, #12133)
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.14
|
version 2017.02.14
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
0
devscripts/run_tests.sh
Normal file → Executable file
0
devscripts/run_tests.sh
Normal file → Executable file
@ -546,8 +546,10 @@
|
|||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
- **OnDemandKorea**
|
- **OnDemandKorea**
|
||||||
|
- **onet.pl**
|
||||||
- **onet.tv**
|
- **onet.tv**
|
||||||
- **onet.tv:channel**
|
- **onet.tv:channel**
|
||||||
|
- **OnetMVP**
|
||||||
- **OnionStudios**
|
- **OnionStudios**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OoyalaExternal**
|
- **OoyalaExternal**
|
||||||
@ -900,6 +902,7 @@
|
|||||||
- **vlive**
|
- **vlive**
|
||||||
- **vlive:channel**
|
- **vlive:channel**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
@ -540,10 +541,10 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(ydl._format_note({}), '')
|
self.assertEqual(ydl._format_note({}), '')
|
||||||
assertRegexpMatches(self, ydl._format_note({
|
assertRegexpMatches(self, ydl._format_note({
|
||||||
'vbr': 10,
|
'vbr': 10,
|
||||||
}), '^\s*10k$')
|
}), r'^\s*10k$')
|
||||||
assertRegexpMatches(self, ydl._format_note({
|
assertRegexpMatches(self, ydl._format_note({
|
||||||
'fps': 30,
|
'fps': 30,
|
||||||
}), '^30fps$')
|
}), r'^30fps$')
|
||||||
|
|
||||||
def test_postprocessors(self):
|
def test_postprocessors(self):
|
||||||
filename = 'post-processor-testfile.mp4'
|
filename = 'post-processor-testfile.mp4'
|
||||||
@ -606,6 +607,8 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'duration': 30,
|
'duration': 30,
|
||||||
'filesize': 10 * 1024,
|
'filesize': 10 * 1024,
|
||||||
'playlist_id': '42',
|
'playlist_id': '42',
|
||||||
|
'uploader': "變態妍字幕版 太妍 тест",
|
||||||
|
'creator': "тест ' 123 ' тест--",
|
||||||
}
|
}
|
||||||
second = {
|
second = {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
@ -616,6 +619,7 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'description': 'foo',
|
'description': 'foo',
|
||||||
'filesize': 5 * 1024,
|
'filesize': 5 * 1024,
|
||||||
'playlist_id': '43',
|
'playlist_id': '43',
|
||||||
|
'uploader': "тест 123",
|
||||||
}
|
}
|
||||||
videos = [first, second]
|
videos = [first, second]
|
||||||
|
|
||||||
@ -656,6 +660,26 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
res = get_videos(f)
|
res = get_videos(f)
|
||||||
self.assertEqual(res, ['1'])
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('creator = "тест \' 123 \' тест--"')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, [])
|
||||||
|
|
||||||
def test_playlist_items_selection(self):
|
def test_playlist_items_selection(self):
|
||||||
entries = [{
|
entries = [{
|
||||||
'id': compat_str(i),
|
'id': compat_str(i),
|
||||||
|
@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE):
|
|||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
}
|
}
|
||||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
media_url = self._search_regex(
|
||||||
|
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||||
|
webpage, 'media url')
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
r'link\.theplatform\.com/s/([^?]+)',
|
||||||
|
media_url, 'theplatform_path'), display_id)
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
video_id = theplatform_metadata['pid']
|
video_id = theplatform_metadata['pid']
|
||||||
title = theplatform_metadata['title']
|
title = theplatform_metadata['title']
|
||||||
rating = theplatform_metadata['ratings'][0]['rating']
|
rating = theplatform_metadata['ratings'][0]['rating']
|
||||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
auth_required = self._search_regex(
|
||||||
|
r'window\.authRequired\s*=\s*(true|false);',
|
||||||
|
webpage, 'auth required')
|
||||||
if auth_required == 'true':
|
if auth_required == 'true':
|
||||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
requestor_id = self._search_regex(
|
||||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
webpage, 'requestor id')
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
requestor_id, title, video_id, rating)
|
||||||
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
url, video_id, requestor_id, resource)
|
||||||
media_url = update_url_query(media_url, query)
|
media_url = update_url_query(media_url, query)
|
||||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
formats, subtitles = self._extract_theplatform_smil(
|
||||||
|
media_url, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info.update({
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE):
|
|||||||
if ns_keys:
|
if ns_keys:
|
||||||
ns = list(ns_keys)[0]
|
ns = list(ns_keys)[0]
|
||||||
series = theplatform_metadata.get(ns + '$show')
|
series = theplatform_metadata.get(ns + '$show')
|
||||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
season_number = int_or_none(
|
||||||
|
theplatform_metadata.get(ns + '$season'))
|
||||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
episode_number = int_or_none(
|
||||||
|
theplatform_metadata.get(ns + '$episode'))
|
||||||
if season_number:
|
if season_number:
|
||||||
title = 'Season %d - %s' % (season_number, title)
|
title = 'Season %d - %s' % (season_number, title)
|
||||||
if series:
|
if series:
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
USER_AGENTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,10 +22,10 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494876951776',
|
'id': '61924494877246241',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hyde Park Civilizace',
|
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||||
'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
|
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 3350,
|
'duration': 3350,
|
||||||
},
|
},
|
||||||
@ -114,6 +115,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
'requestSource': 'iVysilani',
|
'requestSource': 'iVysilani',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for user_agent in (None, USER_AGENTS['Safari']):
|
||||||
req = sanitized_Request(
|
req = sanitized_Request(
|
||||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||||
data=urlencode_postdata(data))
|
data=urlencode_postdata(data))
|
||||||
@ -121,9 +125,14 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
req.add_header('x-addr', '127.0.0.1')
|
req.add_header('x-addr', '127.0.0.1')
|
||||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
|
if user_agent:
|
||||||
|
req.add_header('User-Agent', user_agent)
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlistpage = self._download_json(req, playlist_id)
|
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||||
|
|
||||||
|
if not playlistpage:
|
||||||
|
continue
|
||||||
|
|
||||||
playlist_url = playlistpage['url']
|
playlist_url = playlistpage['url']
|
||||||
if playlist_url == 'error_region':
|
if playlist_url == 'error_region':
|
||||||
@ -135,19 +144,38 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
playlist_title = self._og_search_title(webpage, default=None)
|
playlist_title = self._og_search_title(webpage, default=None)
|
||||||
playlist_description = self._og_search_description(webpage, default=None)
|
playlist_description = self._og_search_description(webpage, default=None)
|
||||||
|
|
||||||
playlist = self._download_json(req, playlist_id)['playlist']
|
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||||
|
if not playlist:
|
||||||
|
continue
|
||||||
|
|
||||||
|
playlist = playlist.get('playlist')
|
||||||
|
if not isinstance(playlist, list):
|
||||||
|
continue
|
||||||
|
|
||||||
playlist_len = len(playlist)
|
playlist_len = len(playlist)
|
||||||
|
|
||||||
entries = []
|
for num, item in enumerate(playlist):
|
||||||
for item in playlist:
|
|
||||||
is_live = item.get('type') == 'LIVE'
|
is_live = item.get('type') == 'LIVE'
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item['streamUrls'].items():
|
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||||
formats.extend(self._extract_m3u8_formats(
|
if 'playerType=flash' in stream_url:
|
||||||
|
stream_formats = self._extract_m3u8_formats(
|
||||||
stream_url, playlist_id, 'mp4',
|
stream_url, playlist_id, 'mp4',
|
||||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||||
fatal=False))
|
m3u8_id='hls-%s' % format_id, fatal=False)
|
||||||
self._sort_formats(formats)
|
else:
|
||||||
|
stream_formats = self._extract_mpd_formats(
|
||||||
|
stream_url, playlist_id,
|
||||||
|
mpd_id='dash-%s' % format_id, fatal=False)
|
||||||
|
# See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
|
||||||
|
if format_id == 'audioDescription':
|
||||||
|
for f in stream_formats:
|
||||||
|
f['source_preference'] = -10
|
||||||
|
formats.extend(stream_formats)
|
||||||
|
|
||||||
|
if user_agent and len(entries) == playlist_len:
|
||||||
|
entries[num]['formats'].extend(formats)
|
||||||
|
continue
|
||||||
|
|
||||||
item_id = item.get('id') or item['assetId']
|
item_id = item.get('id') or item['assetId']
|
||||||
title = item['title']
|
title = item['title']
|
||||||
@ -179,6 +207,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
for e in entries:
|
||||||
|
self._sort_formats(e['formats'])
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def _get_subtitles(self, episode_id, subs):
|
def _get_subtitles(self, episode_id, subs):
|
||||||
|
@ -694,6 +694,8 @@ from .ondemandkorea import OnDemandKoreaIE
|
|||||||
from .onet import (
|
from .onet import (
|
||||||
OnetIE,
|
OnetIE,
|
||||||
OnetChannelIE,
|
OnetChannelIE,
|
||||||
|
OnetMVPIE,
|
||||||
|
OnetPlIE,
|
||||||
)
|
)
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
from .ooyala import (
|
from .ooyala import (
|
||||||
@ -1147,6 +1149,7 @@ from .vlive import (
|
|||||||
VLiveChannelIE
|
VLiveChannelIE
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
|
from .vodpl import VODPlIE
|
||||||
from .vodplatform import VODPlatformIE
|
from .vodplatform import VODPlatformIE
|
||||||
from .voicerepublic import VoiceRepublicIE
|
from .voicerepublic import VoiceRepublicIE
|
||||||
from .voxmedia import VoxMediaIE
|
from .voxmedia import VoxMediaIE
|
||||||
|
@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor):
|
|||||||
return self._search_regex(
|
return self._search_regex(
|
||||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||||
|
|
||||||
def _extract_from_id(self, video_id, webpage):
|
def _extract_from_id(self, video_id, webpage=None):
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'http://qi.ckm.onetapi.pl/', video_id,
|
'http://qi.ckm.onetapi.pl/', video_id,
|
||||||
query={
|
query={
|
||||||
@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
meta = video.get('meta', {})
|
meta = video.get('meta', {})
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
title = (self._og_search_title(
|
||||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
webpage, default=None) if webpage else None) or meta['title']
|
||||||
|
description = (self._og_search_description(
|
||||||
|
webpage, default=None) if webpage else None) or meta.get('description')
|
||||||
duration = meta.get('length') or meta.get('lenght')
|
duration = meta.get('length') or meta.get('lenght')
|
||||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||||
|
|
||||||
@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class OnetMVPIE(OnetBaseIE):
|
||||||
|
_VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'onetmvp:381027.1509591944',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_from_id(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
class OnetIE(OnetBaseIE):
|
class OnetIE(OnetBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||||
IE_NAME = 'onet.tv'
|
IE_NAME = 'onet.tv'
|
||||||
@ -167,3 +181,44 @@ class OnetChannelIE(OnetBaseIE):
|
|||||||
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
||||||
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
||||||
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
||||||
|
|
||||||
|
|
||||||
|
class OnetPlIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
|
||||||
|
IE_NAME = 'onet.pl'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
|
||||||
|
'md5': 'b94021eb56214c3969380388b6e73cb0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1561707.1685479',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
|
||||||
|
'description': 'md5:61fb0740084d2d702ea96512a03585b4',
|
||||||
|
'upload_date': '20170214',
|
||||||
|
'timestamp': 1487078046,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
mvp_id = self._search_regex(
|
||||||
|
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||||
|
@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor):
|
|||||||
'video:duration', webpage, 'duration'))
|
'video:duration', webpage, 'duration'))
|
||||||
|
|
||||||
uploader = self._search_regex(
|
uploader = self._search_regex(
|
||||||
r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
|
||||||
|
'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._search_regex(
|
upload_date = unified_strdate(self._search_regex(
|
||||||
r'class="fullTime"[^>]+title="([^"]+)"',
|
r'class="fullTime"[^>]+title="([^"]+)"',
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
@ -2,27 +2,27 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
# import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_urllib_parse_unquote,
|
# compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
# compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlparse,
|
# compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
sanitized_Request,
|
# sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
# from ..aes import (
|
||||||
aes_decrypt_text
|
# aes_decrypt_text
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubIE(InfoExtractor):
|
||||||
@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
req = sanitized_Request(
|
def dl_webpage(platform):
|
||||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
|
return self._download_webpage(
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||||
webpage = self._download_webpage(req, video_id)
|
video_id, headers={
|
||||||
|
'Cookie': 'age_verified=1; platform=%s' % platform,
|
||||||
|
})
|
||||||
|
|
||||||
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||||
@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor):
|
|||||||
'PornHub said: %s' % error_msg,
|
'PornHub said: %s' % error_msg,
|
||||||
expected=True, video_id=video_id)
|
expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
tv_webpage = dl_webpage('tv')
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
|
||||||
|
'video url', group='url')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||||
|
|
||||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||||
# on that anymore.
|
# on that anymore.
|
||||||
title = self._html_search_meta(
|
title = title or self._html_search_meta(
|
||||||
'twitter:title', webpage, default=None) or self._search_regex(
|
'twitter:title', webpage, default=None) or self._search_regex(
|
||||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||||
@ -156,6 +169,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
|
"""
|
||||||
video_variables = {}
|
video_variables = {}
|
||||||
for video_variablename, quote, video_variable in re.findall(
|
for video_variablename, quote, video_variable in re.findall(
|
||||||
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
|
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
|
||||||
@ -197,6 +211,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
"""
|
||||||
|
|
||||||
page_params = self._parse_json(self._search_regex(
|
page_params = self._parse_json(self._search_regex(
|
||||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||||
@ -209,6 +224,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
@ -217,7 +233,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'dislike_count': dislike_count,
|
'dislike_count': dislike_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
# 'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
32
youtube_dl/extractor/vodpl.py
Normal file
32
youtube_dl/extractor/vodpl.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .onet import OnetBaseIE
|
||||||
|
|
||||||
|
|
||||||
|
class VODPlIE(OnetBaseIE):
|
||||||
|
_VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
|
||||||
|
'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3ep3jns',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chłopaki nie płaczą',
|
||||||
|
'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
|
||||||
|
'timestamp': 1463415154,
|
||||||
|
'duration': 5765,
|
||||||
|
'upload_date': '20160516',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
|
||||||
|
info_dict['id'] = video_id
|
||||||
|
return info_dict
|
@ -2383,6 +2383,7 @@ def _match_one(filter_part, dct):
|
|||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?:
|
(?:
|
||||||
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||||
|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
|
||||||
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
||||||
)
|
)
|
||||||
\s*$
|
\s*$
|
||||||
@ -2391,7 +2392,8 @@ def _match_one(filter_part, dct):
|
|||||||
if m:
|
if m:
|
||||||
op = COMPARISON_OPERATORS[m.group('op')]
|
op = COMPARISON_OPERATORS[m.group('op')]
|
||||||
actual_value = dct.get(m.group('key'))
|
actual_value = dct.get(m.group('key'))
|
||||||
if (m.group('strval') is not None or
|
if (m.group('quotedstrval') is not None or
|
||||||
|
m.group('strval') is not None or
|
||||||
# If the original field is a string and matching comparisonvalue is
|
# If the original field is a string and matching comparisonvalue is
|
||||||
# a number we should respect the origin of the original field
|
# a number we should respect the origin of the original field
|
||||||
# and process comparison value as a string (see
|
# and process comparison value as a string (see
|
||||||
@ -2401,7 +2403,10 @@ def _match_one(filter_part, dct):
|
|||||||
if m.group('op') not in ('=', '!='):
|
if m.group('op') not in ('=', '!='):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
'Operator %s does not support string values!' % m.group('op'))
|
'Operator %s does not support string values!' % m.group('op'))
|
||||||
comparison_value = m.group('strval') or m.group('intval')
|
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
|
||||||
|
quote = m.group('quote')
|
||||||
|
if quote is not None:
|
||||||
|
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
comparison_value = int(m.group('intval'))
|
comparison_value = int(m.group('intval'))
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.02.14'
|
__version__ = '2017.02.16'
|
||||||
|
Reference in New Issue
Block a user