Compare commits

...

16 Commits

Author SHA1 Message Date
55d4de2283 release 2017.02.10 2017-02-10 01:27:33 +07:00
61ee556aea [ChangeLog] Actualize 2017-02-10 01:26:00 +07:00
ff24261ba0 [kaltura] Add explicit port to regexes
They should not match e.g. cdnapi.kaltura.computernetworks.com/...
2017-02-10 01:24:14 +07:00
fbc6dc525e [xtube] Fix shortcuts 2017-02-10 01:06:23 +07:00
9150d1eb69 [xtube] Fix extraction (closes #12023) 2017-02-10 01:03:35 +07:00
b7f9843bec [pornhub] Simplify (closes #12018) 2017-02-10 00:57:44 +07:00
e64b0fca14 [pornhub] Fix extraction (closes #12007) 2017-02-10 00:56:12 +07:00
78ef214d2d [facebook] Improve JS data regex (closes #12042) 2017-02-09 23:42:40 +07:00
be670b8e8f [external:ffmpeg] do not assume that ffmpeg unknown version format is new 2017-02-09 17:36:59 +01:00
37084f6641 [kaltura] improve embed partner id extraction(fixes #12041) 2017-02-09 16:24:54 +01:00
b04975733c [sprout] Add new extractor 2017-02-09 09:13:29 +01:00
c8b8fb0a99 [sixplay] improve extraction
- skip drm protected formats
- extract more and better formats
- skip duplicate asset urls
2017-02-08 22:56:10 +01:00
8298018273 [scrippsnetworks:watch] Add new extractor(closes #10765) 2017-02-08 20:44:23 +01:00
ae8d5a5c59 [go] add support for adobe pass auth(closes #11468)(closes #10831) 2017-02-08 18:57:07 +01:00
b9c9cb5f79 [6play] Fix extraction (closes #12011) 2017-02-08 23:15:39 +07:00
fdf9b959bc [nbc] add support adobe pass auth(closes #12006) 2017-02-08 16:23:42 +01:00
15 changed files with 344 additions and 78 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.02.07 [debug] youtube-dl version 2017.02.10
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,18 @@
version 2017.02.10
Extractors
* [xtube] Fix extraction (#12023)
* [pornhub] Fix extraction (#12007, #12018)
* [facebook] Improve JS data regular expression (#12042)
* [kaltura] Improve embed partner id extraction (#12041)
+ [sprout] Add support for sproutonline.com
* [6play] Improve extraction
+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
+ [go] Add support for Adobe Pass authentication (#11468, #10831)
* [6play] Fix extraction (#12011)
+ [nbc] Add support for Adobe Pass authentication (#12006)
version 2017.02.07 version 2017.02.07
Core Core

View File

@ -11,6 +11,7 @@
- **4tube** - **4tube**
- **56.com** - **56.com**
- **5min** - **5min**
- **6play**
- **8tracks** - **8tracks**
- **91porn** - **91porn**
- **9c9media** - **9c9media**
@ -667,6 +668,7 @@
- **screen.yahoo:search**: Yahoo screen search - **screen.yahoo:search**: Yahoo screen search
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **scrippsnetworks:watch**
- **Seeker** - **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
@ -676,7 +678,6 @@
- **Shared**: shared.sx - **Shared**: shared.sx
- **ShowRoomLive** - **ShowRoomLive**
- **Sina** - **Sina**
- **SixPlay**
- **skynewsarabia:article** - **skynewsarabia:article**
- **skynewsarabia:video** - **skynewsarabia:video**
- **SkySports** - **SkySports**
@ -711,6 +712,7 @@
- **SportBoxEmbed** - **SportBoxEmbed**
- **SportDeutschland** - **SportDeutschland**
- **Sportschau** - **Sportschau**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk - **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR** - **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites

View File

@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD):
args += ['-f', 'mpegts'] args += ['-f', 'mpegts']
else: else:
args += ['-f', 'mp4'] args += ['-f', 'mp4']
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
args += ['-bsf:a', 'aac_adtstoasc'] args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp': elif protocol == 'rtmp':
args += ['-f', 'flv'] args += ['-f', 'flv']

View File

@ -838,6 +838,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE
from .seeker import SeekerIE from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
@ -895,6 +896,7 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .sportschau import SportschauIE from .sportschau import SportschauIE
from .sprout import SproutIE
from .srgssr import ( from .srgssr import (
SRGSSRIE, SRGSSRIE,
SRGSSRPlayIE, SRGSSRPlayIE,

View File

@ -134,6 +134,20 @@ class FacebookIE(InfoExtractor):
'upload_date': '20161030', 'upload_date': '20161030',
'uploader': 'CNN', 'uploader': 'CNN',
}, },
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
'info_dict': {
'id': '1417995061575415',
'ext': 'mp4',
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
'timestamp': 1486648217,
'upload_date': '20170209',
'uploader': 'Yaroslav Korpan',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -262,7 +276,7 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
server_js_data = self._parse_json( server_js_data = self._parse_json(
self._search_regex( self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
webpage, 'js data', default='{}'), webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False) video_id, transform_source=js_to_json, fatal=False)
if server_js_data: if server_js_data:

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .adobepass import AdobePassIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
determine_ext, determine_ext,
@ -13,15 +13,30 @@ from ..utils import (
) )
class GoIE(InfoExtractor): class GoIE(AdobePassIE):
_BRANDS = { _SITE_INFO = {
'abc': '001', 'abc': {
'freeform': '002', 'brand': '001',
'watchdisneychannel': '004', 'requestor_id': 'ABC',
'watchdisneyjunior': '008', },
'watchdisneyxd': '009', 'freeform': {
'brand': '002',
'requestor_id': 'ABCFamily',
},
'watchdisneychannel': {
'brand': '004',
'requestor_id': 'Disney',
},
'watchdisneyjunior': {
'brand': '008',
'requestor_id': 'DisneyJunior',
},
'watchdisneyxd': {
'brand': '009',
'requestor_id': 'DisneyXD',
}
} }
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys()) _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_TESTS = [{ _TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': { 'info_dict': {
@ -47,7 +62,8 @@ class GoIE(InfoExtractor):
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'" # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id') r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
brand = self._BRANDS[sub_domain] site_info = self._SITE_INFO[sub_domain]
brand = site_info['brand']
video_data = self._download_json( video_data = self._download_json(
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
video_id)['video'][0] video_id)['video'][0]
@ -63,14 +79,26 @@ class GoIE(InfoExtractor):
if ext == 'm3u8': if ext == 'm3u8':
video_type = video_data.get('type') video_type = video_data.get('type')
if video_type == 'lf': if video_type == 'lf':
data = {
'video_id': video_data['id'],
'video_type': video_type,
'brand': brand,
'device': '001',
}
if video_data.get('accesslevel') == '1':
requestor_id = site_info['requestor_id']
resource = self._get_mvpd_resource(
requestor_id, title, video_id, None)
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
data.update({
'token': auth,
'token_type': 'ap',
'adobe_requestor_id': requestor_id,
})
entitlement = self._download_json( entitlement = self._download_json(
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
video_id, data=urlencode_postdata({ video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
'video_id': video_data['id'],
'video_type': video_type,
'brand': brand,
'device': '001',
}))
errors = entitlement.get('errors', {}).get('errors', []) errors = entitlement.get('errors', {}).get('errors', [])
if errors: if errors:
error_message = ', '.join([error['message'] for error in errors]) error_message = ', '.join([error['message'] for error in errors])

View File

@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
(?: (?:
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)| kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
https?:// https?://
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/ (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?: (?:
(?: (?:
# flash player # flash player
index\.php/kwidget| index\.php/(?:kwidget|extwidget/preview)|
# html5 player # html5 player
html5/html5lib/[^/]+/mwEmbedFrame\.php html5/html5lib/[^/]+/mwEmbedFrame\.php
) )
@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
},
{
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
},
{
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
} }
] ]
@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
re.search( re.search(
r'''(?xs) r'''(?xs)
(?P<q1>["\']) (?P<q1>["\'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*? (?P=q1).*?
(?: (?:
entry_?[Ii]d| entry_?[Ii]d|
@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
partner_id = params['wid'][0][1:] partner_id = params['wid'][0][1:]
elif 'p' in params: elif 'p' in params:
partner_id = params['p'][0] partner_id = params['p'][0]
elif 'partner_id' in params:
partner_id = params['partner_id'][0]
else: else:
raise ExtractorError('Invalid URL', expected=True) raise ExtractorError('Invalid URL', expected=True)
if 'entry_id' in params: if 'entry_id' in params:

View File

@ -4,23 +4,26 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
find_xpath_attr, find_xpath_attr,
lowercase_escape, lowercase_escape,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
int_or_none,
) )
class NBCIE(InfoExtractor): class NBCIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nbc.com/the-tonight-show/segments/112966', 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
'info_dict': { 'info_dict': {
'id': '112966', 'id': '2848237',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
# HLS streams requires the 'hdnea3' cookie # HLS streams requires the 'hdnea3' cookie
'url': 'http://www.nbc.com/Kings/video/goliath/n1806', 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
'info_dict': { 'info_dict': {
'id': 'n1806', 'id': '101528f5a9e8127b107e98c5e6ce4638',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Goliath', 'title': 'Goliath',
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( info = {
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
'url': smuggle_url(theplatform_url, {'source_url': url}),
'id': video_id, 'id': video_id,
} }
video_data = None
preload = self._search_regex(
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
if preload:
preload_data = self._parse_json(preload, video_id)
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
entity_id = preload_data.get('xref', {}).get(path)
video_data = preload_data.get('entities', {}).get(entity_id)
if video_data:
query = {
'mbr': 'true',
'manifest': 'm3u',
}
video_id = video_data['guid']
title = video_data['title']
if video_data.get('entitlement') == 'auth':
resource = self._get_mvpd_resource(
'nbcentertainment', title, video_id,
video_data.get('vChipRating'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
query), {'force_smil_url': True})
info.update({
'id': video_id,
'title': title,
'url': theplatform_url,
'description': video_data.get('description'),
'keywords': video_data.get('keywords'),
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('showName'),
})
else:
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
return info
class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsVPlayerIE(InfoExtractor):

View File

@ -156,11 +156,17 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_variables = {}
for video_variablename, quote, video_variable in re.findall(
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
video_variables[video_variablename] = video_variable
video_urls = [] video_urls = []
for quote, video_url in re.findall( for encoded_video_url in re.findall(
r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage): r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
video_urls.append(compat_urllib_parse_unquote(re.sub( for varname, varval in video_variables.items():
r'{0}\s*\+\s*{0}'.format(quote), '', video_url))) encoded_video_url = encoded_video_url.replace(varname, varval)
video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
if webpage.find('"encrypted":true') != -1: if webpage.find('"encrypted":true') != -1:
password = compat_urllib_parse_unquote_plus( password = compat_urllib_parse_unquote_plus(

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
int_or_none,
smuggle_url,
update_url_query,
)
class ScrippsNetworksWatchIE(AdobePassIE):
IE_NAME = 'scrippsnetworks:watch'
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
_TEST = {
'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
'md5': '26545fd676d939954c6808274bdb905a',
'info_dict': {
'id': '0256538',
'ext': 'mp4',
'title': 'Seeking a Wow House',
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
'uploader': 'SCNI',
'upload_date': '20170207',
'timestamp': 1486450493,
},
'skip': 'requires TV provider authentication',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
channel = self._parse_json(self._search_regex(
r'"channels"\s*:\s*(\[.+\])',
webpage, 'channels'), video_id)[0]
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
title = video_data['title']
release_url = video_data['releaseUrl']
if video_data.get('restricted'):
requestor_id = self._search_regex(
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
resource = self._get_mvpd_resource(
requestor_id, title, video_id,
video_data.get('ratings', [{}])[0].get('rating'))
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
release_url = update_url_query(release_url, {'auth': auth})
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': smuggle_url(release_url, {'force_smil_url': True}),
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnailUrl'),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'ie_key': 'ThePlatform',
}

View File

@ -1,64 +1,101 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
qualities,
int_or_none,
mimetype2ext,
determine_ext, determine_ext,
int_or_none,
try_get,
qualities,
) )
class SixPlayIE(InfoExtractor): class SixPlayIE(InfoExtractor):
IE_NAME = '6play'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)' _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320', 'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
'md5': '42310bffe4ba3982db112b9cd3467328', 'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': { 'info_dict': {
'id': '11495320', 'id': '11638450',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jamel et ses amis au Marrakech du rire 2015', 'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
'description': 'md5:ba2149d5c321d5201b78070ee839d872', 'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
'duration': 39,
'series': 'Le meilleur pâtissier',
},
'params': {
'skip_download': True,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
clip_data = self._download_json(
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
video_id)
video_data = clip_data['videoInfo']
data = self._download_json(
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
video_id, query={
'csa': 5,
'with': 'clips',
})
clip_data = data['clips'][0]
title = clip_data['title']
urls = []
quality_key = qualities(['lq', 'sd', 'hq', 'hd']) quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = [] formats = []
for source in clip_data['sources']: for asset in clip_data['assets']:
source_type, source_url = source.get('type'), source.get('src') asset_url = asset.get('full_physical_path')
if not source_url or source_type == 'hls/primetime': protocol = asset.get('protocol')
if not asset_url or protocol == 'primetime' or asset_url in urls:
continue continue
ext = mimetype2ext(source_type) or determine_ext(source_url) urls.append(asset_url)
if ext == 'm3u8': container = asset.get('video_container')
formats.extend(self._extract_m3u8_formats( ext = determine_ext(asset_url)
source_url, video_id, 'mp4', 'm3u8_native', if container == 'm3u8' or ext == 'm3u8':
m3u8_id='hls', fatal=False)) if protocol == 'usp':
formats.extend(self._extract_f4m_formats( asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
source_url.replace('.m3u8', '.f4m'), formats.extend(self._extract_m3u8_formats(
video_id, f4m_id='hds', fatal=False)) asset_url, video_id, 'mp4', 'm3u8_native',
elif ext == 'mp4': m3u8_id='hls', fatal=False))
quality = source.get('quality') formats.extend(self._extract_f4m_formats(
asset_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
asset_url.replace('.m3u8', '.mpd'),
video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
re.sub('/[^/]+\.m3u8', '/Manifest', asset_url),
video_id, ism_id='mss', fatal=False))
else:
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif container == 'mp4' or ext == 'mp4':
quality = asset.get('video_quality')
formats.append({ formats.append({
'url': source_url, 'url': asset_url,
'format_id': quality, 'format_id': quality,
'quality': quality_key(quality), 'quality': quality_key(quality),
'ext': ext, 'ext': ext,
}) })
self._sort_formats(formats) self._sort_formats(formats)
def get(getter):
for src in (data, clip_data):
v = try_get(src, getter, compat_str)
if v:
return v
return { return {
'id': video_id, 'id': video_id,
'title': video_data['title'].strip(), 'title': title,
'description': video_data.get('description'), 'description': get(lambda x: x['description']),
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(clip_data.get('duration')),
'series': video_data.get('titlePgm'), 'series': get(lambda x: x['program']['title']),
'formats': formats, 'formats': formats,
} }

View File

@ -0,0 +1,52 @@
# coding: utf-8
from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
extract_attributes,
update_url_query,
smuggle_url,
)
class SproutIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
'md5': '74bf14128578d1e040c3ebc82088f45f',
'info_dict': {
'id': '9dexnwtmh8_X',
'ext': 'mp4',
'title': 'A Cowboy Adventure',
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
'timestamp': 1437758640,
'upload_date': '20150724',
'uploader': 'NBCU-SPROUT-NEW',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_component = self._search_regex(
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
webpage, 'video component', default=None)
if video_component:
options = self._parse_json(extract_attributes(
video_component)['data-options'], video_id)
theplatform_url = options['video']
query = {
'mbr': 'true',
'manifest': 'm3u',
}
if options.get('protected'):
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
theplatform_url = smuggle_url(update_url_query(
theplatform_url, query), {'force_smil_url': True})
else:
iframe = self._search_regex(
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
webpage, 'iframe')
theplatform_url = extract_attributes(iframe)['src']
return self.url_result(theplatform_url, 'ThePlatform')

View File

@ -53,14 +53,15 @@ class XTubeIE(InfoExtractor):
if not display_id: if not display_id:
display_id = video_id display_id = video_id
url = 'http://www.xtube.com/watch.php?v=%s' % video_id url = 'http://www.xtube.com/video-watch/-%s' % video_id
req = sanitized_Request(url) req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)
sources = self._parse_json(self._search_regex( sources = self._parse_json(self._search_regex(
r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id) r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
webpage, 'sources', group='sources'), video_id)
formats = [] formats = []
for format_id, format_url in sources.items(): for format_id, format_url in sources.items():
@ -81,10 +82,10 @@ class XTubeIE(InfoExtractor):
r'<span[^>]+class="nickname"[^>]*>([^<]+)'), r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>', r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>', r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r'>Comments? \(([\d,\.]+)\)<', r'>Comments? \(([\d,\.]+)\)<',

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.02.07' __version__ = '2017.02.10'