Compare commits
46 Commits
2016.08.13
...
2016.08.17
Author | SHA1 | Date | |
---|---|---|---|
|
b3d7dce429 | ||
|
a44694ab4e | ||
|
ab19b46b88 | ||
|
8804f10e6b | ||
|
6be17c0870 | ||
|
8652770bd2 | ||
|
2a1321a272 | ||
|
9c0fa60bf3 | ||
|
502d87c546 | ||
|
b35b0d73d8 | ||
|
6e7e4a6edf | ||
|
53fef319f1 | ||
|
2cabee2a7d | ||
|
11f502fac1 | ||
|
98affc1a48 | ||
|
70a2829fee | ||
|
837e56c8ee | ||
|
b5ddee8c77 | ||
|
fb64adcbd3 | ||
|
4f640f2890 | ||
|
254e64a20a | ||
|
818ac213eb | ||
|
cbef4d5c9f | ||
|
bf90c46790 | ||
|
69eb4d699f | ||
|
6d8ec8c3b7 | ||
|
760845ce99 | ||
|
5c2d087221 | ||
|
b6c4e36728 | ||
|
1a57b8c18c | ||
|
24eb13b1c6 | ||
|
525e0316c0 | ||
|
7e60ce9cf7 | ||
|
e811bcf8f8 | ||
|
6103f59095 | ||
|
9fa5789279 | ||
|
d2ac04674d | ||
|
1fd6e30988 | ||
|
884cdb6cd9 | ||
|
9771b1f901 | ||
|
2118fdd1a9 | ||
|
320d597c21 | ||
|
aaf44a2f47 | ||
|
fafabc0712 | ||
|
409760a932 | ||
|
097eba019d |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.13
|
||||
[debug] youtube-dl version 2016.08.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
29
ChangeLog
29
ChangeLog
@@ -1,3 +1,31 @@
|
||||
version 2016.08.17
|
||||
|
||||
Core
|
||||
+ Add _get_netrc_login_info
|
||||
|
||||
Extractors
|
||||
* [mofosex] Extract all formats (#10335)
|
||||
+ [generic] Add support for vbox7 embeds
|
||||
+ [vbox7] Add support for embed URLs
|
||||
+ [viafree] Add extractor (#10358)
|
||||
+ [mtg] Add support for viafree URLs (#10358)
|
||||
* [theplatform] Extract all subtitles per language
|
||||
+ [xvideos] Fix HLS extraction (#10356)
|
||||
+ [amcnetworks] Add extractor
|
||||
+ [bbc:playlist] Add support for pagination (#10349)
|
||||
+ [fxnetworks] Add extractor (#9462)
|
||||
* [cbslocal] Fix extraction for SendtoNews-based videos
|
||||
* [sendtonews] Fix extraction
|
||||
* [jwplatform] Extract video id from JWPlayer data
|
||||
- [zippcast] Remove extractor (#10332)
|
||||
+ [viceland] Add extractor (#8799)
|
||||
+ [adobepass] Add base extractor for Adobe Pass Authentication
|
||||
* [life:embed] Improve extraction
|
||||
* [vgtv] Detect geo restricted videos (#10348)
|
||||
+ [uplynk] Add extractor
|
||||
* [xiami] Fix extraction (#10342)
|
||||
|
||||
|
||||
version 2016.08.13
|
||||
|
||||
Core
|
||||
@@ -23,6 +51,7 @@ Extractors
|
||||
+ [pbs] Add support for high quality HTTP formats
|
||||
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||
|
||||
|
||||
version 2016.08.12
|
||||
|
||||
Core
|
||||
|
@@ -35,6 +35,7 @@
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@@ -247,6 +248,7 @@
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **FXNetworks**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
@@ -398,6 +400,7 @@
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtvservices:embedded**
|
||||
@@ -731,7 +734,6 @@
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@@ -748,6 +750,8 @@
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USAToday**
|
||||
@@ -765,7 +769,9 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
@@ -887,4 +893,3 @@
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
||||
- **ZippCast**
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,6 +83,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -95,6 +97,8 @@ class HlsFD(FragmentFD):
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
if extra_param_to_segment_url:
|
||||
frag_url = update_url_query(frag_url, extra_param_to_segment_url)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
@@ -120,6 +124,8 @@ class HlsFD(FragmentFD):
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_param_to_segment_url:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
134
youtube_dl/extractor/adobepass.py
Normal file
134
youtube_dl/extractor/adobepass.py
Normal file
@@ -0,0 +1,134 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class AdobePassIE(InfoExtractor):
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
channel_title = etree.SubElement(channel, 'title')
|
||||
channel_title.text = provider_id
|
||||
item = etree.SubElement(channel, 'item')
|
||||
resource_title = etree.SubElement(item, 'title')
|
||||
resource_title.text = title
|
||||
resource_guid = etree.SubElement(item, 'guid')
|
||||
resource_guid.text = guid
|
||||
resource_rating = etree.SubElement(item, 'media:rating')
|
||||
resource_rating.attrib = {'scheme': 'urn:v-chip'}
|
||||
resource_rating.text = rating
|
||||
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
'ap_z': self._USER_AGENT,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
|
||||
if token_expires and token_expires <= int(time.time()):
|
||||
authn_token = None
|
||||
requestor_info = {}
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
username, password = self._get_netrc_login_info(mso_id)
|
||||
if not username or not password:
|
||||
return ''
|
||||
|
||||
def post_form(form_page, note, data={}):
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
return self._download_webpage(
|
||||
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
provider_redirect_page = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page = post_form(
|
||||
provider_redirect_page, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
post_form(mvpd_confirm_page, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in session:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
'Retrieving Authorization Token', data=urlencode_postdata({
|
||||
'resource_id': resource,
|
||||
'requestor_id': requestor_id,
|
||||
'authentication_token': authn_token,
|
||||
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
|
||||
})
|
||||
|
||||
short_authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
|
||||
video_id, 'Retrieving Media Token', data=urlencode_postdata({
|
||||
'authz_token': authz_token,
|
||||
'requestor_id': requestor_id,
|
||||
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
return short_authorize
|
@@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
|
91
youtube_dl/extractor/amcnetworks.py
Normal file
91
youtube_dl/extractor/amcnetworks.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
return info
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@@ -1056,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
yield self.url_result(
|
||||
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
if single_page:
|
||||
return
|
||||
next_page = self._search_regex(
|
||||
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage, 'next page url', default=None, group='url')
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
|
||||
|
||||
title, description = self._extract_title_and_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(webpage, url, playlist_id),
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
@@ -1117,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# multipage playlist, explicit page
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
# multipage playlist, all pages
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 142,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||
'only_matching': True,
|
||||
|
@@ -41,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
@@ -60,12 +55,11 @@ class CBSLocalIE(AnvatoIE):
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, sendtonews_url),
|
||||
ie=SendtoNewsIE.ie_key())
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
|
@@ -70,7 +70,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@@ -78,15 +79,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
'skip': 'Video gone',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -662,6 +662,24 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self._downloader.params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
@@ -679,16 +697,8 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
else:
|
||||
username, password = self._get_netrc_login_info()
|
||||
|
||||
return (username, password)
|
||||
|
||||
|
@@ -29,6 +29,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
@@ -287,6 +288,7 @@ from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
@@ -896,7 +898,10 @@ from .tvp import (
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@@ -926,6 +931,10 @@ from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usatoday import USATodayIE
|
||||
@@ -954,6 +963,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@@ -1104,4 +1114,3 @@ from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
|
@@ -1,22 +1,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..utils import str_to_int
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
class ExtremeTubeIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P<display_id>[^/]+)-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'info_dict': {
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'id': '652431',
|
||||
'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow',
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'unknown',
|
||||
@@ -35,58 +30,22 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
if not info['title']:
|
||||
info['title'] = self._search_regex(
|
||||
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
info.update({
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
70
youtube_dl/extractor/fxnetworks.py
Normal file
70
youtube_dl/extractor/fxnetworks.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
extract_attributes,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/719841347694',
|
||||
'md5': '1447d4722e42ebca19e5232ab93abb22',
|
||||
'info_dict': {
|
||||
'id': '719841347694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vanpage',
|
||||
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20160706',
|
||||
'timestamp': 1467844741,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -72,6 +72,7 @@ from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
from .soundcloud import SoundcloudIE
|
||||
from .vbox7 import Vbox7IE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1373,6 +1374,18 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
|
||||
'info_dict': {
|
||||
'id': '1c7141f46c',
|
||||
'ext': 'mp4',
|
||||
'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [Vbox7IE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2239,6 +2252,11 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# Look for VBOX7 embeds
|
||||
vbox7_url = Vbox7IE._extract_url(webpage)
|
||||
if vbox7_url:
|
||||
return self.url_result(vbox7_url, Vbox7IE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
@@ -30,7 +30,7 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
@@ -43,6 +43,8 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
if 'sources' not in video_data:
|
||||
video_data['sources'] = [video_data]
|
||||
|
||||
this_video_id = video_id or video_data['mediaid']
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
@@ -52,7 +54,7 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
@@ -68,7 +70,7 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv',
|
||||
a_format['ext'] = 'flv'
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
@@ -95,7 +97,7 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'id': this_video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
|
@@ -3,64 +3,124 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_decrypt_text
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
url_basename,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KeezMoviesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||
'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
|
||||
'info_dict': {
|
||||
'id': '1214711',
|
||||
'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
|
||||
'ext': 'mp4',
|
||||
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.keezmovies.com/video/1214711',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_info(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
# embedded video
|
||||
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
|
||||
if mobj:
|
||||
embedded_url = mobj.group(1)
|
||||
return self.url_result(embedded_url)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||
flashvars = self._parse_json(self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers={'Cookie': 'age_verified=1'})
|
||||
|
||||
formats = []
|
||||
for height in (180, 240, 480):
|
||||
if flashvars.get('quality_%dp' % height):
|
||||
video_url = flashvars['quality_%dp' % height]
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
'height': height,
|
||||
'format_id': '%dp' % height,
|
||||
}
|
||||
filename_parts = url_basename(video_url).split('_')
|
||||
if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
|
||||
a_format['tbr'] = int(filename_parts[1][:-1])
|
||||
formats.append(a_format)
|
||||
format_urls = set()
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
title = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
encrypted = False
|
||||
|
||||
return {
|
||||
def extract_format(format_url, height=None):
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
format_urls.add(format_url)
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
|
||||
if not height:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
|
||||
if encrypted:
|
||||
format_url = aes_decrypt_text(
|
||||
video_url, title, 32).decode('utf-8')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'flashvars\s*=\s*({.+?});', webpage,
|
||||
'flashvars', default='{}'),
|
||||
display_id, fatal=False)
|
||||
|
||||
if flashvars:
|
||||
title = flashvars.get('video_title')
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
encrypted = flashvars.get('encrypted') is True
|
||||
for key, value in flashvars.items():
|
||||
mobj = re.search(r'quality_(\d+)[pP]', key)
|
||||
if mobj:
|
||||
extract_format(value, int(mobj.group(1)))
|
||||
video_url = flashvars.get('video_url')
|
||||
if video_url and determine_ext(video_url, None):
|
||||
extract_format(video_url)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
|
||||
webpage, 'video url', default=None, group='url')
|
||||
if video_url:
|
||||
extract_format(compat_urllib_parse_unquote(video_url))
|
||||
|
||||
if not formats:
|
||||
if 'title="This video is no longer available"' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)', webpage, 'title')
|
||||
|
||||
return webpage, {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'display_id': display_id,
|
||||
'title': strip_or_none(title),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': flashvars.get('image_url')
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage, info = self._extract_info(url)
|
||||
info['view_count'] = str_to_int(self._search_regex(
|
||||
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
|
||||
return info
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -96,7 +99,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
|
||||
iframe_links = re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
|
||||
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
|
||||
webpage)
|
||||
|
||||
if not video_urls and not iframe_links:
|
||||
@@ -164,9 +167,9 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
class LifeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'life:embed'
|
||||
_VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
|
||||
_VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
|
||||
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
|
||||
'info_dict': {
|
||||
@@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor):
|
||||
'title': 'e50c2dec2867350528e2574c899b8291',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# with 1080p
|
||||
'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
thumbnail = None
|
||||
formats = []
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
def extract_m3u8(manifest_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
|
||||
def extract_original(original_url):
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': determine_ext(original_url, None),
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
|
||||
video_id).get('playlist', {})
|
||||
if playlist:
|
||||
master = playlist.get('master')
|
||||
if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
|
||||
extract_m3u8(compat_urlparse.urljoin(url, master))
|
||||
original = playlist.get('original')
|
||||
if isinstance(original, compat_str):
|
||||
extract_original(original)
|
||||
thumbnail = playlist.get('image')
|
||||
|
||||
# Old rendition fallback
|
||||
if not formats:
|
||||
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
extract_m3u8(video_url)
|
||||
else:
|
||||
extract_original(video_url)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
thumbnail = thumbnail or self._search_regex(
|
||||
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
|
@@ -1,53 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
|
||||
_TEST = {
|
||||
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||
class MofosexIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
|
||||
'md5': '39a15853632b7b2e5679f92f69b78e91',
|
||||
'info_dict': {
|
||||
'id': '5018',
|
||||
'id': '318131',
|
||||
'display_id': 'amateur-teen-playing-and-masturbating-318131',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanese Teen Music Video',
|
||||
'title': 'amateur teen playing and masturbating',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20121114',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# This video is no longer available
|
||||
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
|
||||
'like count', fatal=False))
|
||||
dislike_count = int_or_none(self._search_regex(
|
||||
r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
|
||||
'like count', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = '-'.join(format)
|
||||
info.update({
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
})
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
return info
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
@@ -65,7 +65,7 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE):
|
||||
class NationalGeographicIE(AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||
|
||||
@@ -119,7 +119,7 @@ class NationalGeographicIE(ThePlatformIE):
|
||||
auth_resource_id = self._search_regex(
|
||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
||||
webpage, 'auth resource id')
|
||||
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
|
||||
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -131,7 +131,7 @@ class NationalGeographicIE(ThePlatformIE):
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicEpisodeGuideIE(ThePlatformIE):
|
||||
class NationalGeographicEpisodeGuideIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:episodeguide'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
||||
_TESTS = [
|
||||
|
@@ -4,33 +4,43 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
|
||||
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
|
||||
'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'duration': 49,
|
||||
'id': 'GxfCe0Zo7D-175909-5588'
|
||||
},
|
||||
'playlist_count': 9,
|
||||
# test the first video only to prevent lengthy tests
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '198180',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 5, LAA 4',
|
||||
'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win',
|
||||
'duration': 57.343,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'upload_date': '20160815',
|
||||
'timestamp': 1471221961,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
@@ -39,48 +49,41 @@ class SendtoNewsIE(JWPlatformBaseIE):
|
||||
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
|
||||
\1>''', webpage)
|
||||
if mobj:
|
||||
sk, mk, pk = mobj.group('SC').split('-')
|
||||
return cls._URL_TEMPLATE % (sk, mk, pk)
|
||||
sc = mobj.group('SC')
|
||||
return cls._URL_TEMPLATE % sc
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
params = compat_parse_qs(mobj.group('query'))
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
data_url = update_url_query(
|
||||
url.replace('embedplayer.php', 'data_read.php'),
|
||||
{'cmd': 'loadInitial'})
|
||||
playlist_data = self._download_json(data_url, playlist_id)
|
||||
|
||||
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
|
||||
entries = []
|
||||
for video in playlist_data['playlistData'][0]:
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
video['jwconfiguration'],
|
||||
require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
thumbnails = []
|
||||
if video.get('thumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'normal',
|
||||
'url': video['thumbnailUrl'],
|
||||
})
|
||||
if video.get('smThumbnailUrl'):
|
||||
thumbnails.append({
|
||||
'id': 'small',
|
||||
'url': video['smThumbnailUrl'],
|
||||
})
|
||||
info_dict.update({
|
||||
'title': video['S_headLine'],
|
||||
'description': video.get('S_fullStory'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(video.get('SM_length')),
|
||||
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
|
||||
})
|
||||
entries.append(info_dict)
|
||||
|
||||
jwplayer_data_str = self._search_regex(
|
||||
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
|
||||
js_vars = {
|
||||
'w': 1024,
|
||||
'h': 768,
|
||||
'modeVar': 'html5',
|
||||
}
|
||||
for name, val in js_vars.items():
|
||||
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
|
||||
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
self._parse_json(jwplayer_data_str, video_id),
|
||||
video_id, require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
|
||||
'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -1,13 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class SyfyIE(ThePlatformIE):
|
||||
class SyfyIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
|
||||
@@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE):
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if syfy_mpx.get('entitlement') == 'auth':
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
|
||||
resource = self._get_mvpd_resource(
|
||||
'syfy', title, video_id,
|
||||
syfy_mpx.get('mpxRating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'syfy', resource)
|
||||
|
||||
|
@@ -6,10 +6,10 @@ import time
|
||||
import hmac
|
||||
import binascii
|
||||
import hashlib
|
||||
import netrc
|
||||
|
||||
|
||||
from .once import OnceIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -25,9 +25,6 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
mimetype2ext,
|
||||
find_xpath_attr,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
@@ -76,10 +73,10 @@ class ThePlatformBaseIE(OnceIE):
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'ext': mimetype2ext(mime),
|
||||
'url': src,
|
||||
}]
|
||||
})
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
@@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE):
|
||||
return self._parse_theplatform_metadata(info)
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
@@ -167,7 +164,6 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
@@ -202,96 +198,6 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
return '%s&sig=%s' % (url, sig)
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
|
||||
if token_expires and token_expires >= time.time():
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
login_info = netrc.netrc().authenticators(mso_id)
|
||||
if not login_info:
|
||||
return None
|
||||
|
||||
def post_form(form_page, note, data={}):
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
return self._download_webpage(
|
||||
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
provider_redirect_page = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page = post_form(
|
||||
provider_redirect_page, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
|
||||
'username': login_info[0],
|
||||
'password': login_info[2],
|
||||
})
|
||||
post_form(mvpd_confirm_page, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
'Retrieving Authorization Token', data=urlencode_postdata({
|
||||
'resource_id': resource,
|
||||
'requestor_id': requestor_id,
|
||||
'authentication_token': authn_token,
|
||||
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
|
||||
})
|
||||
|
||||
return self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
|
||||
video_id, 'Retrieving Media Token', data=urlencode_postdata({
|
||||
'authz_token': authz_token,
|
||||
'requestor_id': requestor_id,
|
||||
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
|
@@ -1,18 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
class Tube8IE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
@@ -33,47 +28,17 @@ class Tube8IE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
if not info['title']:
|
||||
info['title'] = self._html_search_regex(
|
||||
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for key, video_url in flashvars.items():
|
||||
if not isinstance(video_url, compat_str) or not video_url.startswith('http'):
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'quality_(\d+)[pP]', key, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
if flashvars.get('encrypted') is True:
|
||||
video_url = aes_decrypt_text(
|
||||
video_url, flashvars['video_title'], 32).decode('utf-8')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = flashvars.get('image_url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<span class="username">\s*(.+?)\s*<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
|
||||
@@ -86,18 +51,13 @@ class Tube8IE(InfoExtractor):
|
||||
r'<span id="allCommentsCount">(\d+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
info.update({
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -15,21 +15,31 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
IE_DESC = 'TV3Play and related services'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
tv(?:3|6|8|10)play\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
)/[^/]+/(?P<id>\d+)
|
||||
'''
|
||||
IE_NAME = 'mtg'
|
||||
IE_DESC = 'MTG services'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
mtg:|
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
)
|
||||
/(?:[^/]+/)+
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
@@ -194,9 +204,22 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# views is null
|
||||
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'mtg:418113',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -204,13 +227,13 @@ class TVPlayIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
title = video['title']
|
||||
|
||||
try:
|
||||
streams = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id,
|
||||
'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
|
||||
video_id, 'Downloading streams JSON')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
@@ -289,8 +312,61 @@ class TVPlayIE(InfoExtractor):
|
||||
'season_number': season_number,
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'view_count': int_or_none(video.get('views', {}).get('total')),
|
||||
'view_count': try_get(video, lambda x: x['views']['total'], int),
|
||||
'age_limit': int_or_none(video.get('age_limit', 0)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class ViafreeIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
viafree\.
|
||||
(?:
|
||||
(?:dk|no)/programmer|
|
||||
se/program
|
||||
)
|
||||
/(?:[^/]+/)+(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
|
||||
'info_dict': {
|
||||
'id': '395375',
|
||||
'ext': 'mp4',
|
||||
'title': 'Husräddarna S02E02',
|
||||
'description': 'md5:4db5c933e37db629b5a2f75dfb34829e',
|
||||
'series': 'Husräddarna',
|
||||
'season': 'Säsong 2',
|
||||
'season_number': 2,
|
||||
'duration': 2576,
|
||||
'timestamp': 1400596321,
|
||||
'upload_date': '20140520',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P<id>\d{6,})',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
|
||||
|
70
youtube_dl/extractor/uplynk.py
Normal file
70
youtube_dl/extractor/uplynk.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class UplynkIE(InfoExtractor):
|
||||
IE_NAME = 'uplynk'
|
||||
_VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?'
|
||||
_TEST = {
|
||||
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
|
||||
'info_dict': {
|
||||
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
|
||||
'ext': 'mp4',
|
||||
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
|
||||
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_uplynk_info(self, uplynk_content_url):
|
||||
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
|
||||
display_id = video_id or external_id
|
||||
formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
|
||||
if session_id:
|
||||
for f in formats:
|
||||
f['extra_param_to_segment_url'] = {
|
||||
'pbs': session_id,
|
||||
}
|
||||
self._sort_formats(formats)
|
||||
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
|
||||
if asset.get('error') == 1:
|
||||
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
|
||||
|
||||
return {
|
||||
'id': asset['asset'],
|
||||
'title': asset['desc'],
|
||||
'thumbnail': asset.get('default_poster_url'),
|
||||
'duration': float_or_none(asset.get('duration')),
|
||||
'uploader_id': asset.get('owner'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_uplynk_info(url)
|
||||
|
||||
|
||||
class UplynkPreplayIE(UplynkIE):
|
||||
IE_NAME = 'uplynk:preplay'
|
||||
_VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
|
||||
_TEST = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, external_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = video_id or external_id
|
||||
preplay = self._download_json(url, display_id)
|
||||
content_url = 'http://content.uplynk.com/%s.m3u8' % path
|
||||
session_id = preplay.get('sid')
|
||||
if session_id:
|
||||
content_url += '?pbs=' + session_id
|
||||
return self._extract_uplynk_info(content_url)
|
@@ -1,12 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vbox7.com/play:0946fff23c',
|
||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||
@@ -24,15 +26,27 @@ class Vbox7IE(InfoExtractor):
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
},
|
||||
'skip': 'georestricted',
|
||||
}, {
|
||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://vbox7.com/play:%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*)</title>', webpage, 'title').split('/')[0].strip()
|
||||
r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip()
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
|
||||
|
@@ -8,6 +8,7 @@ from .xstream import XstreamIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -129,6 +130,11 @@ class VGTVIE(XstreamIE):
|
||||
'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# geoblocked
|
||||
'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -196,6 +202,12 @@ class VGTVIE(XstreamIE):
|
||||
|
||||
info['formats'].extend(formats)
|
||||
|
||||
if not info['formats']:
|
||||
properties = try_get(
|
||||
data, lambda x: x['streamConfiguration']['properties'], list)
|
||||
if properties and 'geoblocked' in properties:
|
||||
raise self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
info.update({
|
||||
|
107
youtube_dl/extractor/viceland.py
Normal file
107
youtube_dl/extractor/viceland.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
str_or_none,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
|
||||
class VicelandIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
|
||||
'info_dict': {
|
||||
'id': '57608447973ee7705f6fbd4e',
|
||||
'ext': 'mp4',
|
||||
'title': 'CYBERWAR (Trailer)',
|
||||
'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.',
|
||||
'age_limit': 14,
|
||||
'timestamp': 1466008539,
|
||||
'upload_date': '20160615',
|
||||
'uploader_id': '11',
|
||||
'uploader': 'Viceland',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
watch_hub_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
|
||||
video_id = watch_hub_data['vms-id']
|
||||
title = watch_hub_data['video-title']
|
||||
|
||||
query = {}
|
||||
if watch_hub_data.get('video-locked') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
'VICELAND', title, video_id,
|
||||
watch_hub_data.get('video-rating'))
|
||||
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
|
||||
|
||||
# signature generation algorithm is reverse engineered from signatureGenerator in
|
||||
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
|
||||
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
|
||||
exp = int(time.time()) + 14400
|
||||
query.update({
|
||||
'exp': exp,
|
||||
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
|
||||
})
|
||||
|
||||
try:
|
||||
preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
error = json.loads(e.cause.read().decode())
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
|
||||
raise
|
||||
|
||||
video_data = preplay['video']
|
||||
base = video_data['base']
|
||||
uplynk_preplay_url = preplay['preplayURL']
|
||||
episode = video_data.get('episode', {})
|
||||
channel = video_data.get('channel', {})
|
||||
|
||||
subtitles = {}
|
||||
cc_url = preplay.get('ccURL')
|
||||
if cc_url:
|
||||
subtitles['en'] = [{
|
||||
'url': cc_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': uplynk_preplay_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': base.get('body'),
|
||||
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
|
||||
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
|
||||
'timestamp': int_or_none(video_data.get('created_at')),
|
||||
'age_limit': parse_age_limit(video_data.get('video_rating')),
|
||||
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
|
||||
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
|
||||
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
|
||||
'season_number': int_or_none(watch_hub_data.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
|
||||
'uploader_id': str_or_none(channel.get('id')),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'UplynkPreplay',
|
||||
}
|
@@ -13,6 +13,7 @@ class XiamiBaseIE(InfoExtractor):
|
||||
webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
|
||||
if '>Xiami is currently not available in your country.<' in webpage:
|
||||
self.raise_geo_restricted('Xiami is currently not available in your country')
|
||||
return webpage
|
||||
|
||||
def _extract_track(self, track, track_id=None):
|
||||
title = track['title']
|
||||
|
@@ -15,10 +15,10 @@ class XVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
|
||||
_TEST = {
|
||||
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
|
||||
'md5': '14cea69fcb84db54293b1e971466c2e1',
|
||||
'info_dict': {
|
||||
'id': '4588838',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biker Takes his Girl',
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -42,24 +42,24 @@ class XVideosIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'flv_url=(.+?)&', webpage, 'video URL', default=''))
|
||||
if video_url:
|
||||
formats.append({'url': video_url})
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'flv',
|
||||
})
|
||||
|
||||
player_args = self._search_regex(
|
||||
r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None)
|
||||
if player_args:
|
||||
for arg in player_args.split(','):
|
||||
format_url = self._search_regex(
|
||||
r'(["\'])(?P<url>https?://.+?)\1', arg, 'url',
|
||||
default=None, group='url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'mp4':
|
||||
formats.append({'url': format_url})
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
for kind, _, format_url in re.findall(
|
||||
r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage):
|
||||
format_id = kind.lower()
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif format_id in ('urllow', 'urlhigh'):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]),
|
||||
'quality': -2 if format_id.endswith('low') else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -1,94 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class ZippCastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
# m3u8, hq direct link
|
||||
'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81',
|
||||
'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6',
|
||||
'info_dict': {
|
||||
'id': 'c9cfd5c7e44dbc29c81',
|
||||
'ext': 'mp4',
|
||||
'title': '[Vinesauce] Vinny - Digital Space Traveler',
|
||||
'description': 'Muted on youtube, but now uploaded in it\'s original form.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'vinesauce',
|
||||
'view_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
# f4m, lq ipod direct link
|
||||
'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.zippcast.com/video/%s' % video_id, video_id)
|
||||
|
||||
formats = []
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1', webpage,
|
||||
'video url', default=None, group='url')
|
||||
if video_url:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'preference': 0, # direct link is almost always of worse quality
|
||||
})
|
||||
src_url = self._search_regex(
|
||||
r'src\s*:\s*(?:escape\()?(["\'])(?P<url>http://.+?)\1',
|
||||
webpage, 'src', default=None, group='url')
|
||||
ext = determine_ext(src_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src_url, video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
uploader = self._search_regex(
|
||||
r'<a[^>]+href="https?://[^/]+/profile/[^>]+>([^<]+)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'>([\d,.]+) views!', webpage, 'view count', fatal=False))
|
||||
|
||||
categories = re.findall(
|
||||
r'<a[^>]+href="https?://[^/]+/categories/[^"]+">([^<]+),?<',
|
||||
webpage)
|
||||
tags = re.findall(
|
||||
r'<a[^>]+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<',
|
||||
webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.08.13'
|
||||
__version__ = '2016.08.17'
|
||||
|
Reference in New Issue
Block a user