Compare commits
39 Commits
2017.06.23
...
2017.07.02
Author | SHA1 | Date | |
---|---|---|---|
b6c9fe4162 | |||
4d9ba27bba | |||
50ae3f646e | |||
99a7e76240 | |||
a3a6d01a96 | |||
02d61a65e2 | |||
9b35297be1 | |||
4917478803 | |||
54faac2235 | |||
c69701c6ab | |||
d4f8ce6e91 | |||
b311b0ead2 | |||
72d256c434 | |||
b2ed954fc6 | |||
a919ca0ad6 | |||
88d6b7c2bd | |||
fd1c5fba6b | |||
0646e34c7d | |||
bf2dc9cc6e | |||
f1c051009b | |||
33ffb645a6 | |||
35544690e4 | |||
136503e302 | |||
4a87de72df | |||
a7ce8f16c4 | |||
a5aea53fc8 | |||
0c7a631b61 | |||
fd9ee4de8c | |||
5744cf6c03 | |||
9c48b5a193 | |||
449c665776 | |||
23aec3d623 | |||
27449ad894 | |||
bd65f18153 | |||
73af5cc817 | |||
b5f523ed62 | |||
4f4dd8d797 | |||
4cb18ab1b9 | |||
ac7409eec5 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.23**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.07.02**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.06.23
|
||||
[debug] youtube-dl version 2017.07.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
39
ChangeLog
39
ChangeLog
@ -1,3 +1,42 @@
|
||||
version 2017.07.02
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve _json_ld
|
||||
|
||||
Extractors
|
||||
+ [thisoldhouse] Add more fallbacks for video id
|
||||
* [thisoldhouse] Fix video id extraction (#13540, #13541)
|
||||
* [xfileshare] Extend format regular expression (#13536)
|
||||
* [ted] Fix extraction (#13535)
|
||||
+ [tastytrade] Add support for tastytrade.com (#13521)
|
||||
* [dplayit] Relax video id regular expression (#13524)
|
||||
+ [generic] Extract more generic metadata (#13527)
|
||||
+ [bbccouk] Capture and output error message (#13501, #13518)
|
||||
* [cbsnews] Relax video info regular expression (#13284, #13503)
|
||||
+ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
|
||||
* [soundcloud] Switch to https for API requests (#13502)
|
||||
* [pandatv] Switch to https for API and download URLs
|
||||
+ [pandatv] Add support for https URLs (#13491)
|
||||
+ [niconico] Support sp subdomain (#13494)
|
||||
|
||||
|
||||
version 2017.06.25
|
||||
|
||||
Core
|
||||
+ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
|
||||
* [YoutubeDL] Skip malformed formats for better extraction robustness
|
||||
|
||||
Extractors
|
||||
+ [wsj] Add support for barrons.com (#13470)
|
||||
+ [ign] Add another video id pattern (#13328)
|
||||
+ [raiplay:live] Add support for live streams (#13414)
|
||||
+ [redbulltv] Add support for live videos and segments (#13486)
|
||||
+ [onetpl] Add support for videos embedded via pulsembed (#13482)
|
||||
* [ooyala] Make more robust
|
||||
* [ooyala] Skip empty format URLs (#13471, #13476)
|
||||
* [hgtv.com:show] Fix typo
|
||||
|
||||
|
||||
version 2017.06.23
|
||||
|
||||
Core
|
||||
|
@ -644,6 +644,7 @@
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
- **RaiPlay**
|
||||
- **RaiPlayLive**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@ -767,6 +768,7 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
|
@ -1448,17 +1448,25 @@ class YoutubeDL(object):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
valid_url = url and isinstance(url, compat_str)
|
||||
if not valid_url:
|
||||
self.report_warning(
|
||||
'"url" field is missing or empty - skipping format, '
|
||||
'there is an error in extractor')
|
||||
return valid_url
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
# We check that all the formats have the format and format_id fields
|
||||
for i, format in enumerate(formats):
|
||||
if 'url' not in format:
|
||||
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||
|
||||
sanitize_string_field(format, 'format_id')
|
||||
sanitize_numeric_fields(format)
|
||||
format['url'] = sanitize_url(format['url'])
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
@ -24,6 +25,11 @@ MSO_INFO = {
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'ATTOTT': {
|
||||
'name': 'DIRECTV NOW',
|
||||
'username_field': 'email',
|
||||
'password_field': 'loginpassword',
|
||||
},
|
||||
'Rogers': {
|
||||
'name': 'Rogers',
|
||||
'username_field': 'UserName',
|
||||
@ -1316,6 +1322,8 @@ class AdobePassIE(InfoExtractor):
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
@ -1365,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
|
||||
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
|
||||
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
|
||||
|
||||
def extract_redirect_url(html, url=None, fatal=False):
|
||||
# TODO: eliminate code duplication with generic extractor and move
|
||||
# redirection code into _download_webpage_handle
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
redirect_url = self._search_regex(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||
html, 'meta refresh redirect',
|
||||
default=NO_DEFAULT if fatal else None, fatal=fatal)
|
||||
if not redirect_url:
|
||||
return None
|
||||
if url:
|
||||
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
|
||||
return redirect_url
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
@ -1414,16 +1437,15 @@ class AdobePassIE(InfoExtractor):
|
||||
if '<form name="signin"' in provider_redirect_page:
|
||||
provider_login_page_res = provider_redirect_page_res
|
||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'content="0;\s*url=([^\'"]+)',
|
||||
provider_redirect_page, 'meta refresh redirect')
|
||||
oauth_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, fatal=True)
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
oauth_redirect_url, video_id,
|
||||
'Downloading Provider Login Page')
|
||||
self._DOWNLOADING_LOGIN_PAGE)
|
||||
else:
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res,
|
||||
'Downloading Provider Login Page')
|
||||
self._DOWNLOADING_LOGIN_PAGE)
|
||||
|
||||
mvpd_confirm_page_res = post_form(
|
||||
provider_login_page_res, 'Logging in', {
|
||||
@ -1470,8 +1492,17 @@ class AdobePassIE(InfoExtractor):
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
'Downloading Provider Redirect Page (meta refresh)')
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
|
@ -36,7 +36,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
(?:
|
||||
programmes/(?!articles/)|
|
||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/clips[/#]|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
@ -229,8 +229,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
@ -523,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
error = self._search_regex(
|
||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
programme_id = None
|
||||
duration = None
|
||||
|
||||
|
@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url:
|
||||
entries.append(self.url_result(facebook_url))
|
||||
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||
entries.extend([
|
||||
self.url_result(facebook_url)
|
||||
for facebook_url in facebook_urls])
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@ -15,19 +15,23 @@ class CBSNewsIE(CBSIE):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
|
||||
# 60 minutes
|
||||
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
|
||||
'info_dict': {
|
||||
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
|
||||
'ext': 'flv',
|
||||
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
|
||||
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
|
||||
'duration': 791,
|
||||
'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
|
||||
'ext': 'mp4',
|
||||
'title': 'Artificial Intelligence',
|
||||
'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1606,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'timestamp': 1498431900,
|
||||
'upload_date': '20170625',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscribers only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
@ -52,6 +56,22 @@ class CBSNewsIE(CBSIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# 48 hours
|
||||
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||
'info_dict': {
|
||||
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cold as Ice',
|
||||
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
|
||||
'upload_date': '20170604',
|
||||
'timestamp': 1496538000,
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -60,7 +80,7 @@ class CBSNewsIE(CBSIE):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||
|
||||
if video_info:
|
||||
|
@ -1002,17 +1002,17 @@ class InfoExtractor(object):
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
if item_type in ('TVEpisode', 'Episode'):
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
@ -1022,10 +1022,10 @@ class InfoExtractor(object):
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
elif item_type == 'WebPage':
|
||||
video = e.get('video')
|
||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||
extract_video_object(video)
|
||||
continue
|
||||
video = e.get('video')
|
||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||
extract_video_object(video)
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
|
@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
info_url = self._search_regex(
|
||||
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
webpage, 'video id')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||
|
@ -824,6 +824,7 @@ from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import (
|
||||
RaiPlayIE,
|
||||
RaiPlayLiveIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
@ -972,6 +973,7 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
|
@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||
webpage):
|
||||
urls.append(mobj.group('url'))
|
||||
# Facebook API embed
|
||||
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
||||
mobj = re.search(r'''(?x)<div[^>]+
|
||||
for mobj in re.finditer(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
|
||||
urls.append(mobj.group('url'))
|
||||
return urls
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
|
@ -1522,6 +1522,21 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Facebook video #599637780109885',
|
||||
},
|
||||
},
|
||||
# Facebook <iframe> embed, plugin video
|
||||
{
|
||||
'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
|
||||
'info_dict': {
|
||||
'id': '1754168231264132',
|
||||
'ext': 'mp4',
|
||||
'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
|
||||
'uploader': 'Tariq Ramadan (official)',
|
||||
'timestamp': 1496758379,
|
||||
'upload_date': '20170606',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Facebook API embed
|
||||
{
|
||||
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
||||
@ -2033,6 +2048,13 @@ class GenericIE(InfoExtractor):
|
||||
video_description = self._og_search_description(webpage, default=None)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
info_dict.update({
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
# Look for Brightcove Legacy Studio embeds
|
||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
@ -2222,9 +2244,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Facebook player
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url is not None:
|
||||
return self.url_result(facebook_url, 'Facebook')
|
||||
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||
if facebook_urls:
|
||||
return self.playlist_from_matches(facebook_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded VK player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||
@ -2669,18 +2691,26 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||
|
||||
def merge_dicts(dict1, dict2):
|
||||
merged = {}
|
||||
for k, v in dict1.items():
|
||||
if v is not None:
|
||||
merged[k] = v
|
||||
for k, v in dict2.items():
|
||||
if v is None:
|
||||
continue
|
||||
if (k not in merged or
|
||||
(isinstance(v, compat_str) and v and
|
||||
isinstance(merged[k], compat_str) and
|
||||
not merged[k])):
|
||||
merged[k] = v
|
||||
return merged
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
if json_ld.get('url'):
|
||||
info_dict.update({
|
||||
'title': video_title or info_dict['title'],
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit
|
||||
})
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
return merge_dicts(json_ld, info_dict)
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
@ -2698,9 +2728,7 @@ class GenericIE(InfoExtractor):
|
||||
if jwplayer_data:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
if not info.get('title'):
|
||||
info['title'] = video_title
|
||||
return info
|
||||
return merge_dicts(info, info_dict)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
|
@ -28,7 +28,7 @@ class HGTVComShowIE(InfoExtractor):
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)data-(?:deferred)?-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
webpage, 'video config'),
|
||||
display_id)['channels'][0]
|
||||
|
||||
|
@ -89,6 +89,11 @@ class IGNIE(InfoExtractor):
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
@ -98,6 +103,8 @@ class IGNIE(InfoExtractor):
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
r'videoId"\s*:\s*"(.+?)"',
|
||||
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
||||
|
||||
|
@ -83,9 +83,12 @@ class NiconicoIE(InfoExtractor):
|
||||
'uploader_id': '312',
|
||||
},
|
||||
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
def _real_initialize(self):
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
NO_DEFAULT,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor):
|
||||
'upload_date': '20170214',
|
||||
'timestamp': 1487078046,
|
||||
},
|
||||
}, {
|
||||
# embedded via pulsembed
|
||||
'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
|
||||
'info_dict': {
|
||||
'id': '501235.965429946',
|
||||
'ext': 'mp4',
|
||||
'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
|
||||
'upload_date': '20170622',
|
||||
'timestamp': 1498159955,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
|
||||
'only_matching': True,
|
||||
@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _search_mvp_id(self, webpage, default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
|
||||
default=default)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mvp_id = self._search_regex(
|
||||
r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
|
||||
mvp_id = self._search_mvp_id(webpage, default=None)
|
||||
|
||||
if not mvp_id:
|
||||
pulsembed_url = self._search_regex(
|
||||
r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
|
||||
webpage, 'pulsembed url', group='url')
|
||||
webpage = self._download_webpage(
|
||||
pulsembed_url, video_id, 'Downloading pulsembed webpage')
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
return self.url_result(
|
||||
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
|
||||
|
@ -3,12 +3,14 @@ import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
@ -39,13 +41,15 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
formats = []
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
s_url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if s_url in urls:
|
||||
url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
|
||||
if not url_data:
|
||||
continue
|
||||
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
ext = determine_ext(s_url, None)
|
||||
delivery_type = stream['delivery_type']
|
||||
delivery_type = stream.get('delivery_type')
|
||||
if delivery_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
|
||||
@ -65,7 +69,7 @@ class OoyalaBaseIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'url': s_url,
|
||||
'ext': ext or stream.get('delivery_type'),
|
||||
'ext': ext or delivery_type,
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
@ -136,6 +140,11 @@ class OoyalaIE(OoyalaBaseIE):
|
||||
'title': 'Divide Tool Path.mp4',
|
||||
'duration': 204.405,
|
||||
}
|
||||
},
|
||||
{
|
||||
# empty stream['url']['data']
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -10,13 +10,13 @@ from ..utils import (
|
||||
|
||||
class PandaTVIE(InfoExtractor):
|
||||
IE_DESC = '熊猫TV'
|
||||
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.panda.tv/10091',
|
||||
_VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.panda.tv/66666',
|
||||
'info_dict': {
|
||||
'id': '10091',
|
||||
'id': '66666',
|
||||
'title': 're:.+',
|
||||
'uploader': '囚徒',
|
||||
'uploader': '刘杀鸡',
|
||||
'ext': 'flv',
|
||||
'is_live': True,
|
||||
},
|
||||
@ -24,13 +24,16 @@ class PandaTVIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Live stream is offline',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.panda.tv/66666',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
||||
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
|
||||
|
||||
error_code = config.get('errno', 0)
|
||||
if error_code is not 0:
|
||||
@ -74,7 +77,7 @@ class PandaTVIE(InfoExtractor):
|
||||
continue
|
||||
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
||||
formats.append({
|
||||
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
||||
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
|
||||
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
|
||||
'format_id': '%s-%s' % (k, ext),
|
||||
'quality': quality,
|
||||
|
@ -191,11 +191,12 @@ class RaiPlayIE(RaiBaseIE):
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if relinker_info.get(
|
||||
'is_live') else title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description'),
|
||||
'uploader': media.get('channel'),
|
||||
'creator': media.get('editor'),
|
||||
'uploader': strip_or_none(media.get('channel')),
|
||||
'creator': strip_or_none(media.get('editor')),
|
||||
'duration': parse_duration(video.get('duration')),
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
@ -208,10 +209,46 @@ class RaiPlayIE(RaiBaseIE):
|
||||
}
|
||||
|
||||
info.update(relinker_info)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class RaiPlayLiveIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||
'info_dict': {
|
||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||
'display_id': 'rainews24',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:6eca31500550f9376819f174e5644754',
|
||||
'uploader': 'Rai News 24',
|
||||
'creator': 'Rai News 24',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
||||
webpage, 'content id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RaiPlayIE.ie_key(),
|
||||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class RaiIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||
_TESTS = [{
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RedBullTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
|
||||
@ -42,6 +42,22 @@ class RedBullTVIE(InfoExtractor):
|
||||
'season_number': 2,
|
||||
'episode_number': 4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# segment
|
||||
'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
|
||||
'info_dict': {
|
||||
'id': 'AP-1QSAQJ6V52111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Semi Finals - Vans Park Series Pro Tour',
|
||||
'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
|
||||
'duration': 11791.991,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
|
||||
'only_matching': True,
|
||||
@ -82,7 +98,8 @@ class RedBullTVIE(InfoExtractor):
|
||||
title = info['title'].strip()
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['url'], video_id, 'mp4', 'm3u8_native')
|
||||
video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@ -136,7 +136,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
||||
track_id = compat_str(info['id'])
|
||||
@ -174,7 +174,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
# We have to retrieve the url
|
||||
format_dict = self._download_json(
|
||||
'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||
track_id, 'Downloading track url', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'secret_token': secret_token,
|
||||
@ -236,7 +236,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
track_id = mobj.group('track_id')
|
||||
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
token = mobj.group('secret_token')
|
||||
if token:
|
||||
@ -261,7 +261,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s' % resolve_title
|
||||
url = 'https://soundcloud.com/%s' % resolve_title
|
||||
info_json_url = self._resolv_url(url)
|
||||
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
||||
|
||||
@ -290,7 +290,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
||||
'id': '2284613',
|
||||
'title': 'The Royal Concept EP',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
|
||||
'only_matching': True,
|
||||
@ -304,7 +304,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group('slug_title')
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
token = mobj.group('token')
|
||||
if token:
|
||||
@ -380,7 +380,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||
'info_dict': {
|
||||
'id': '7098329',
|
||||
'title': 'GRYNPYRET (Spotlight)',
|
||||
'title': 'Grynpyret (Spotlight)',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
@ -410,7 +410,7 @@ class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group('user')
|
||||
|
||||
url = 'http://soundcloud.com/%s/' % uploader
|
||||
url = 'https://soundcloud.com/%s/' % uploader
|
||||
resolv_url = self._resolv_url(url)
|
||||
user = self._download_json(
|
||||
resolv_url, uploader, 'Downloading user info')
|
||||
@ -473,7 +473,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||
'url': 'https://api.soundcloud.com/playlists/4110309',
|
||||
'info_dict': {
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
|
43
youtube_dl/extractor/tastytrade.py
Normal file
43
youtube_dl/extractor/tastytrade.py
Normal file
@ -0,0 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TastyTradeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': OoyalaIE.ie_key(),
|
||||
'url': 'ooyala:%s' % ooyala_code,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
@ -6,7 +6,10 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TEDIE(InfoExtractor):
|
||||
@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||
webpage, 'info json')
|
||||
info_json = self._search_regex(
|
||||
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
|
||||
webpage, 'info json')
|
||||
return json.loads(info_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, name,
|
||||
'Downloading playlist webpage')
|
||||
info = self._extract_info(webpage)
|
||||
playlist_info = info['playlist']
|
||||
|
||||
playlist_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['playlist'],
|
||||
dict) or info['playlist']
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in info['talks']
|
||||
for talk in try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'],
|
||||
dict) or info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
playlist_entries,
|
||||
@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
def _talk_info(self, url, video_name):
|
||||
webpage = self._download_webpage(url, video_name)
|
||||
self.report_extraction(video_name)
|
||||
|
||||
talk_info = self._extract_info(webpage)['talks'][0]
|
||||
info = self._extract_info(webpage)
|
||||
|
||||
talk_info = try_get(
|
||||
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
||||
dict) or info['talks'][0]
|
||||
|
||||
title = talk_info['title'].strip()
|
||||
|
||||
external = talk_info.get('external')
|
||||
if external:
|
||||
@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
native_downloads = try_get(
|
||||
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
||||
dict) or talk_info['nativeDownloads']
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'format': format_id,
|
||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
|
||||
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
|
||||
if formats:
|
||||
for f in formats:
|
||||
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
||||
if finfo:
|
||||
f.update(finfo)
|
||||
|
||||
player_talk = talk_info['player_talks'][0]
|
||||
|
||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in talk_info['resources'].items():
|
||||
for format_id, resources in resources_.items():
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
video_id = compat_str(talk_info['id'])
|
||||
|
||||
thumbnail = talk_info['thumb']
|
||||
if not thumbnail.startswith('http'):
|
||||
thumbnail = 'http://' + thumbnail
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': talk_info['title'].strip(),
|
||||
'uploader': talk_info['speaker'],
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
|
||||
'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': self._get_subtitles(video_id, talk_info),
|
||||
'formats': formats,
|
||||
|
@ -2,13 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import try_get
|
||||
|
||||
|
||||
class ThisOldHouseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
|
||||
'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
|
||||
'md5': '568acf9ca25a639f0c4ff905826b662f',
|
||||
'info_dict': {
|
||||
'id': '2REGtUDQ',
|
||||
'ext': 'mp4',
|
||||
@ -28,8 +30,15 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = drupal_settings['jwplatform']['video_id']
|
||||
video_id = self._search_regex(
|
||||
(r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
|
||||
webpage, 'video id', default=None, group='id')
|
||||
if not video_id:
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = try_get(
|
||||
drupal_settings, lambda x: x['jwplatform']['video_id'],
|
||||
compat_str) or list(drupal_settings['comScore'])[0]
|
||||
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
|
||||
|
@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
||||
https?://(?:www\.)?wsj\.com/video/[^/]+/|
|
||||
https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
|
||||
wsj:
|
||||
)
|
||||
(?P<id>[a-fA-F0-9-]{36})
|
||||
@ -35,6 +35,9 @@ class WSJIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -157,7 +157,7 @@ class XFileShareIE(InfoExtractor):
|
||||
def extract_formats(default=NO_DEFAULT):
|
||||
urls = []
|
||||
for regex in (
|
||||
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.06.23'
|
||||
__version__ = '2017.07.02'
|
||||
|
Reference in New Issue
Block a user