youtube-dl/youtube_dl/extractor/screenwavemedia.py

116 lines
4.1 KiB
Python
Raw Normal View History

2013-10-05 09:37:11 +02:00
# encoding: utf-8
2014-02-24 14:44:29 +01:00
from __future__ import unicode_literals
2013-10-05 09:37:11 +02:00
from .common import InfoExtractor
from ..utils import (
int_or_none,
2014-12-12 02:11:58 +01:00
unified_strdate,
js_to_json,
2013-10-05 09:37:11 +02:00
)
2014-12-07 11:52:03 +01:00
2014-12-12 02:11:58 +01:00
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
2014-12-07 11:52:03 +01:00
2014-12-12 02:11:58 +01:00
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
'only_matching': True,
}]
2014-12-12 02:11:58 +01:00
def _real_extract(self, url):
video_id = self._match_id(url)
playerdata = self._download_webpage(
'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
2014-12-12 02:11:58 +01:00
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
playerconfig = self._download_webpage(
'http://player.screenwavemedia.com/player.js',
video_id, 'Downloading playerconfig webpage')
videoserver = self._search_regex(r"'videoserver'\s*:\s*'([^']+)", playerconfig, 'videoserver')
sources = self._parse_json(
js_to_json(
self._search_regex(
r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
'sources',
).replace(
"' + thisObj.options.videoserver + '",
videoserver
).replace(
"' + playerVidId + '",
video_id
)
),
video_id
)
formats = []
for source in sources:
if source['type'] == 'hls':
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
else:
format_label = source.get('label')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_label, 'height', default=None))
formats.append({
'url': source['file'],
'format': format_label,
'ext': source.get('type'),
'height': height,
})
2014-12-12 02:11:58 +01:00
self._sort_formats(formats)
2013-10-05 09:37:11 +02:00
return {
'id': video_id,
'title': vidtitle,
'formats': formats,
}
2014-12-12 02:11:58 +01:00
class TeamFourIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
'info_dict': {
'id': 'TeamFourStar-5292a02f20bfa',
'ext': 'mp4',
'upload_date': '20130401',
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
'title': 'A Moment With TFS Episode 4',
}
}
2014-12-12 02:11:58 +01:00
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
2014-12-12 02:11:58 +01:00
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
2014-12-12 02:11:58 +01:00
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}