Compare commits

..

17 Commits

Author SHA1 Message Date
Sergey M․
689af4960e release 2018.07.04 2018-07-04 04:59:21 +07:00
Sergey M․
d5de0f21b9 [ChangeLog] Actualize
[ci skip]
2018-07-04 04:57:17 +07:00
Sergey M․
24d26ab380 [lynda] PEP 8 2018-07-04 04:49:03 +07:00
Sergey M․
836ef4840f [pluralsight] Switch to graphql (closes #16889, closes #16899) 2018-07-04 04:48:40 +07:00
Sergey M․
5621c3222e [lynda] Simplify login and improve error capturing (#16891) 2018-07-03 02:47:09 +07:00
Remita Amine
db5debf313 [go90] add support for embed urls(closes #16873) 2018-07-01 22:41:32 +01:00
Remita Amine
8cee692b8b [go90] detect geo restriction error and pass geo verification headers(closes #16874) 2018-07-01 22:41:32 +01:00
coreynicholson
973b6ceebb [vlive] Fix live streams extraction 2018-07-01 21:19:17 +07:00
Sergey M․
eca1f0d115 [extractor/common] Properly escape % in MPD templates (closes #16867) 2018-07-01 02:11:36 +07:00
Sergey M․
2160768a21 [npo] Fix typo (closes #16872) 2018-06-30 23:39:56 +07:00
Sergey M․
267d81962a [mediaset] Fix issues and extract all formats (closes #16568) 2018-06-30 02:19:02 +07:00
Timendum
9cf648c92b [mediaset] Add support for new videos 2018-06-30 02:17:51 +07:00
Sergey M․
5e8e2fa51f [extractor/common] Use source URL as Referer for HTML5 entries (closes #16849) 2018-06-29 01:25:05 +07:00
Sergey M․
d4a24f4091 Prefer ffmpeg over avconv by default (closes #8622) 2018-06-29 01:09:14 +07:00
Sergey M․
acbd0ff5df [dctptv] Restore extraction based on REST API (closes #16850) 2018-06-29 00:35:05 +07:00
Sergey M․
7b393f9cc5 [svt] Improve extraction and add support for pages (closes #16802) 2018-06-28 04:29:11 +07:00
Sergey M․
c3bcd206eb [porncom] Fix extraction (closes #16808) 2018-06-26 00:01:06 +07:00
20 changed files with 395 additions and 143 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.25*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.25**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.04*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.04**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.06.25
[debug] youtube-dl version 2018.07.04
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,24 @@
version 2018.07.04
Core
* [extractor/common] Properly escape % in MPD templates (#16867)
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
* Prefer ffmpeg over avconv by default (#8622)
Extractors
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
* [lynda] Simplify login and improve error capturing (#16891)
+ [go90] Add support for embed URLs (#16873)
* [go90] Detect geo restriction error and pass geo verification headers
(#16874)
* [vlive] Fix live streams extraction (#16871)
* [npo] Fix typo (#16872)
+ [mediaset] Add support for new videos and extract all formats (#16568)
* [dctptv] Restore extraction based on REST API (#16850)
* [svt] Improve extraction and add support for pages (#16802)
* [porncom] Fix extraction (#16808)
version 2018.06.25
Extractors

View File

@@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
default; fix file if we can, warn
otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors (default)
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
either the path to the binary or its
containing directory.

View File

@@ -813,6 +813,7 @@
- **StretchInternet**
- **SunPorno**
- **SVT**
- **SVTPage**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SVTSeries**
- **SWRMediathek**

View File

@@ -305,8 +305,8 @@ class YoutubeDL(object):
http_chunk_size.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.

View File

@@ -2106,7 +2106,21 @@ class InfoExtractor(object):
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
def prepare_template(template_name, identifiers):
t = representation_ms_info[template_name]
tmpl = representation_ms_info[template_name]
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
# https://github.com/rg3/youtube-dl/issues/16867).
t = ''
in_template = False
for c in tmpl:
t += c
if c == '$':
in_template = not in_template
elif c == '%' and not in_template:
t += c
# Next, $...$ templates are translated to their
# %(...) counterparts to be used with % operator
t = t.replace('$RepresentationID$', representation_id)
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
@@ -2437,6 +2451,8 @@ class InfoExtractor(object):
media_info['subtitles'].setdefault(lang, []).append({
'url': absolute_url(src),
})
for f in media_info['formats']:
f.setdefault('http_headers', {})['Referer'] = base_url
if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries

View File

@@ -5,13 +5,15 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
int_or_none,
unified_timestamp,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
# 4x3
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
@@ -19,31 +21,49 @@ class DctpTvIE(InfoExtractor):
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
'timestamp': 1302172322,
'upload_date': '20110407',
},
'params': {
# rtmp download
'skip_download': True,
},
}
}, {
# 16x9
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
'only_matching': True,
}]
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
version = self._download_json(
'%s/version.json' % self._BASE_URL, display_id,
'Downloading version JSON')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
restapi_base = '%s/%s/restapi' % (
self._BASE_URL, version['version_name'])
title = self._og_search_title(webpage)
info = self._download_json(
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
'Downloading video info JSON')
media = self._download_json(
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
display_id, 'Downloading media JSON')
uuid = media['uuid']
title = media['title']
ratio = '16x9' if media.get('is_wide') else '4x3'
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
note='Downloading server list JSON', fatal=False)
if servers:
endpoint = next(
@@ -60,27 +80,35 @@ class DctpTvIE(InfoExtractor):
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'play_path': play_path,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
thumbnails = []
images = media.get('images')
if isinstance(images, list):
for image in images:
if not isinstance(image, dict):
continue
image_url = image.get('url')
if not image_url or not isinstance(image_url, compat_str):
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
return {
'id': video_id,
'title': title,
'formats': formats,
'id': uuid,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
'title': title,
'alt_title': media.get('subtitle'),
'description': media.get('description') or media.get('teaser'),
'timestamp': unified_timestamp(media.get('created')),
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
'thumbnails': thumbnails,
'formats': formats,
}

View File

@@ -1040,6 +1040,7 @@ from .stretchinternet import StretchInternetIE
from .sunporno import SunPornoIE
from .svt import (
SVTIE,
SVTPageIE,
SVTPlayIE,
SVTSeriesIE,
)

View File

@@ -1395,17 +1395,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
# SVT embed
{
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
'info_dict': {
'id': '2900353',
'ext': 'flv',
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
'duration': 27,
'age_limit': 0,
},
},
# Crooks and Liars embed
{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
ExtractorError,
@@ -14,8 +15,8 @@ from ..utils import (
class Go90IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
'info_dict': {
@@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
'upload_date': '20170411',
'age_limit': 14,
}
}
}, {
'url': 'https://www.go90.com/embed/261MflWkD3N',
'only_matching': True,
}]
_GEO_BYPASS = False
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id,
video_id, headers={
try:
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json; charset=utf-8',
}, data=b'{"client":"web","device_type":"pc"}')
})
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id, video_id,
headers=headers, data=b'{"client":"web","device_type":"pc"}')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
if 'region unavailable' in message:
self.raise_geo_restricted(countries=['US'])
raise ExtractorError(message, expected=True)
raise
if video_data.get('requires_drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
main_video_asset = video_data['main_video_asset']

View File

@@ -4,7 +4,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
@@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
form_data = self._hidden_inputs(form_html)
form_data.update(extra_form_data)
try:
response = self._download_json(
action_url, None, note,
data=urlencode_postdata(form_data),
headers={
'Referer': referrer_url,
'X-Requested-With': 'XMLHttpRequest',
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
response = self._parse_json(e.cause.read().decode('utf-8'), None)
self._check_error(response, ('email', 'password'))
raise
response = self._download_json(
action_url, None, note,
data=urlencode_postdata(form_data),
headers={
'Referer': referrer_url,
'X-Requested-With': 'XMLHttpRequest',
}, expected_status=(418, 500, ))
self._check_error(response, 'ErrorMessage')
self._check_error(response, ('email', 'password', 'ErrorMessage'))
return response, action_url

View File

@@ -42,6 +42,22 @@ class MediasetIE(InfoExtractor):
'categories': ['reality'],
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
'md5': '1276f966ac423d16ba255ce867de073e',
'info_dict': {
'id': '846685',
'ext': 'mp4',
'title': 'Puntata del 25 maggio',
'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 6565,
'creator': 'mediaset',
'upload_date': '20180525',
'series': 'Matrix',
'categories': ['infotainment'],
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# clip
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
@@ -70,16 +86,33 @@ class MediasetIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://www.video.mediaset.it/html/metainfo.sjson',
video_id, 'Downloading media info', query={
'id': video_id
})['video']
title = video['title']
media_id = video.get('guid') or video_id
video_list = self._download_json(
'http://cdnsel01.mediaset.net/GetCdn.aspx',
'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
video_id, 'Downloading video CDN JSON', query={
'streamid': video_id,
'streamid': media_id,
'format': 'json',
})['videoList']
formats = []
for format_url in video_list:
if '.ism' in format_url:
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'ism' or '.ism' in format_url:
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
@@ -89,30 +122,23 @@ class MediasetIE(InfoExtractor):
})
self._sort_formats(formats)
mediainfo = self._download_json(
'http://plr.video.mediaset.it/html/metainfo.sjson',
video_id, 'Downloading video info JSON', query={
'id': video_id,
})['video']
title = mediainfo['title']
creator = try_get(
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
video, lambda x: x['brand-info']['publisher'], compat_str)
category = try_get(
mediainfo, lambda x: x['brand-info']['category'], compat_str)
video, lambda x: x['brand-info']['category'], compat_str)
categories = [category] if category else None
return {
'id': video_id,
'title': title,
'description': mediainfo.get('short-description'),
'thumbnail': mediainfo.get('thumbnail'),
'duration': parse_duration(mediainfo.get('duration')),
'description': video.get('short-description'),
'thumbnail': video.get('thumbnail'),
'duration': parse_duration(video.get('duration')),
'creator': creator,
'upload_date': unified_strdate(mediainfo.get('production-date')),
'webpage_url': mediainfo.get('url'),
'series': mediainfo.get('brand-value'),
'upload_date': unified_strdate(video.get('production-date')),
'webpage_url': video.get('url'),
'series': video.get('brand-value'),
'season': video.get('season'),
'categories': categories,
'formats': formats,
}

View File

@@ -282,7 +282,7 @@ class NPOIE(NPOBaseIE):
video_url = stream_info.get('url')
if not video_url or video_url in urls:
continue
urls.add(item_url)
urls.add(video_url)
if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, ext='mp4',

View File

@@ -27,6 +27,60 @@ from ..utils import (
class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'https://app.pluralsight.com'
_GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
_GRAPHQL_HEADERS = {
'Content-Type': 'application/json;charset=UTF-8',
}
_GRAPHQL_COURSE_TMPL = '''
query BootstrapPlayer {
rpc {
bootstrapPlayer {
profile {
firstName
lastName
email
username
userHandle
authed
isAuthed
plan
}
course(courseId: "%s") {
name
title
courseHasCaptions
translationLanguages {
code
name
}
supportsWideScreenVideoFormats
timestamp
modules {
name
title
duration
formattedDuration
author
authorized
clips {
authorized
clipId
duration
formattedDuration
id
index
moduleIndex
moduleTitle
name
title
watched
}
}
}
}
}
}'''
def _download_course(self, course_id, url, display_id):
try:
return self._download_course_rpc(course_id, url, display_id)
@@ -39,20 +93,14 @@ class PluralsightBaseIE(InfoExtractor):
def _download_course_rpc(self, course_id, url, display_id):
response = self._download_json(
'%s/player/functions/rpc' % self._API_BASE, display_id,
'Downloading course JSON',
data=json.dumps({
'fn': 'bootstrapPlayer',
'payload': {
'courseId': course_id,
},
}).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
})
self._GRAPHQL_EP, display_id, data=json.dumps({
'query': self._GRAPHQL_COURSE_TMPL % course_id,
'variables': {}
}).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
course = try_get(response, lambda x: x['payload']['course'], dict)
course = try_get(
response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
dict)
if course:
return course
@@ -90,6 +138,28 @@ class PluralsightIE(PluralsightBaseIE):
'only_matching': True,
}]
GRAPHQL_VIEWCLIP_TMPL = '''
query viewClip {
viewClip(input: {
author: "%(author)s",
clipIndex: %(clipIndex)d,
courseName: "%(courseName)s",
includeCaptions: %(includeCaptions)s,
locale: "%(locale)s",
mediaType: "%(mediaType)s",
moduleName: "%(moduleName)s",
quality: "%(quality)s"
}) {
urls {
url
cdn
rank
source
},
status
}
}'''
def _real_initialize(self):
self._login()
@@ -277,7 +347,7 @@ class PluralsightIE(PluralsightBaseIE):
f = QUALITIES[quality].copy()
clip_post = {
'author': author,
'includeCaptions': False,
'includeCaptions': 'false',
'clipIndex': int(clip_idx),
'courseName': course_name,
'locale': 'en',
@@ -286,11 +356,23 @@ class PluralsightIE(PluralsightBaseIE):
'quality': '%dx%d' % (f['width'], f['height']),
}
format_id = '%s-%s' % (ext, quality)
viewclip = self._download_json(
'%s/video/clips/viewclip' % self._API_BASE, display_id,
'Downloading %s viewclip JSON' % format_id, fatal=False,
data=json.dumps(clip_post).encode('utf-8'),
headers={'Content-Type': 'application/json;charset=utf-8'})
try:
viewclip = self._download_json(
self._GRAPHQL_EP, display_id,
'Downloading %s viewclip graphql' % format_id,
data=json.dumps({
'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
'variables': {}
}).encode('utf-8'),
headers=self._GRAPHQL_HEADERS)['data']['viewClip']
except ExtractorError:
# Still works but most likely will go soon
viewclip = self._download_json(
'%s/video/clips/viewclip' % self._API_BASE, display_id,
'Downloading %s viewclip JSON' % format_id, fatal=False,
data=json.dumps(clip_post).encode('utf-8'),
headers={'Content-Type': 'application/json;charset=utf-8'})
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see

View File

@@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
config = self._parse_json(
self._search_regex(
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
(r'=\s*({.+?})\s*;\s*v1ar\b',
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
webpage, 'config', default='{}'),
display_id, transform_source=js_to_json, fatal=False)
@@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
'height': int(height),
'filesize_approx': parse_filesize(filesize),
} for format_url, height, filesize in re.findall(
r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
webpage)]
thumbnail = None
duration = None

View File

@@ -12,6 +12,8 @@ from ..utils import (
determine_ext,
dict_get,
int_or_none,
orderedSet,
strip_or_none,
try_get,
urljoin,
compat_str,
@@ -137,7 +139,12 @@ class SVTPlayBaseIE(SVTBaseIE):
class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
_VALID_URL = r'''(?x)
(?:
svt:(?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
)
'''
_TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@@ -164,10 +171,40 @@ class SVTPlayIE(SVTPlayBaseIE):
}, {
'url': 'https://www.svtplay.se/kanaler/svt1',
'only_matching': True,
}, {
'url': 'svt:1376446-003A',
'only_matching': True,
}, {
'url': 'svt:14278044',
'only_matching': True,
}]
def _adjust_title(self, info):
if info['is_live']:
info['title'] = self._live_title(info['title'])
def _extract_by_video_id(self, video_id, webpage=None):
data = self._download_json(
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
video_id, headers=self.geo_verification_headers())
info_dict = self._extract_video(data, video_id)
if not info_dict.get('title'):
title = dict_get(info_dict, ('episode', 'series'))
if not title and webpage:
title = re.sub(
r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
if not title:
title = video_id
info_dict['title'] = title
self._adjust_title(info_dict)
return info_dict
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id, svt_id = mobj.group('id', 'svt_id')
if svt_id:
return self._extract_by_video_id(svt_id)
webpage = self._download_webpage(url, video_id)
@@ -179,10 +216,6 @@ class SVTPlayIE(SVTPlayBaseIE):
thumbnail = self._og_search_thumbnail(webpage)
def adjust_title(info):
if info['is_live']:
info['title'] = self._live_title(info['title'])
if data:
video_info = try_get(
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
@@ -193,24 +226,14 @@ class SVTPlayIE(SVTPlayBaseIE):
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
'thumbnail': thumbnail,
})
adjust_title(info_dict)
self._adjust_title(info_dict)
return info_dict
video_id = self._search_regex(
svt_id = self._search_regex(
r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
webpage, 'video id', default=None)
webpage, 'video id')
if video_id:
data = self._download_json(
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
video_id, headers=self.geo_verification_headers())
info_dict = self._extract_video(data, video_id)
if not info_dict.get('title'):
info_dict['title'] = re.sub(
r'\s*\|\s*.+?$', '',
info_dict.get('episode') or self._og_search_title(webpage))
adjust_title(info_dict)
return info_dict
return self._extract_by_video_id(svt_id, webpage)
class SVTSeriesIE(SVTPlayBaseIE):
@@ -292,3 +315,57 @@ class SVTSeriesIE(SVTPlayBaseIE):
return self.playlist_result(
entries, series_id, title, metadata.get('description'))
class SVTPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
'info_dict': {
'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
},
'playlist_count': 7,
}, {
'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
'info_dict': {
'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
},
'playlist_count': 1,
}, {
# only programTitle
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
'info_dict': {
'id': '2900353',
'ext': 'mp4',
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
'duration': 27,
'age_limit': 0,
},
}, {
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
'only_matching': True,
}, {
'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result(
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
for video_id in orderedSet(re.findall(
r'data-video-id=["\'](\d+)', webpage))]
title = strip_or_none(self._og_search_title(webpage, default=None))
return self.playlist_result(entries, playlist_id, title)

View File

@@ -57,7 +57,7 @@ class VLiveIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s' % video_id, video_id)
'https://www.vlive.tv/video/%s' % video_id, video_id)
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
VIDEO_PARAMS_FIELD = 'video params'
@@ -108,11 +108,11 @@ class VLiveIE(InfoExtractor):
def _live(self, video_id, webpage):
init_page = self._download_webpage(
'http://www.vlive.tv/video/init/view',
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'http://www.vlive.tv/video/%s' % video_id,
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})

View File

@@ -841,11 +841,11 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'--prefer-avconv',
action='store_false', dest='prefer_ffmpeg',
help='Prefer avconv over ffmpeg for running the postprocessors (default)')
help='Prefer avconv over ffmpeg for running the postprocessors')
postproc.add_option(
'--prefer-ffmpeg',
action='store_true', dest='prefer_ffmpeg',
help='Prefer ffmpeg over avconv for running the postprocessors')
help='Prefer ffmpeg over avconv for running the postprocessors (default)')
postproc.add_option(
'--ffmpeg-location', '--avconv-location', metavar='PATH',
dest='ffmpeg_location',

View File

@@ -77,7 +77,7 @@ class FFmpegPostProcessor(PostProcessor):
def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
prefer_ffmpeg = False
prefer_ffmpeg = True
self.basename = None
self.probe_basename = None
@@ -85,7 +85,7 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None
self._versions = None
if self._downloader:
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
location = self._downloader.params.get('ffmpeg_location')
if location is not None:
if not os.path.exists(location):
@@ -117,19 +117,19 @@ class FFmpegPostProcessor(PostProcessor):
(p, get_exe_version(p, args=['-version'])) for p in programs)
self._paths = dict((p, p) for p in programs)
if prefer_ffmpeg:
prefs = ('ffmpeg', 'avconv')
else:
if prefer_ffmpeg is False:
prefs = ('avconv', 'ffmpeg')
else:
prefs = ('ffmpeg', 'avconv')
for p in prefs:
if self._versions[p]:
self.basename = p
break
if prefer_ffmpeg:
prefs = ('ffprobe', 'avprobe')
else:
if prefer_ffmpeg is False:
prefs = ('avprobe', 'ffprobe')
else:
prefs = ('ffprobe', 'avprobe')
for p in prefs:
if self._versions[p]:
self.probe_basename = p

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.06.25'
__version__ = '2018.07.04'