Compare commits
14 Commits
2017.02.10
...
2017.02.11
Author | SHA1 | Date | |
---|---|---|---|
9b92a5917b | |||
3e2274c8b7 | |||
3d7e3aaa0e | |||
624c4b92ff | |||
2af12ad9d2 | |||
97eb9bd2ac | |||
71cdd75628 | |||
c7d6f614f3 | |||
08a00eef79 | |||
9dd5408c99 | |||
9510709575 | |||
5abcca9060 | |||
e01bfc19c3 | |||
4d32b63851 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.11**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.02.10
|
||||
[debug] youtube-dl version 2017.02.11
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
||||
version 2017.02.11
|
||||
|
||||
Core
|
||||
+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
|
||||
utility functions
|
||||
+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
|
||||
|
||||
Extractor
|
||||
* [pluralsight:course] Fix extraction (#12075)
|
||||
+ [bbc] Extract m3u8 formats with 320k audio
|
||||
* [facebook] Relax video id matching (#11017, #12055, #12056)
|
||||
+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
|
||||
+ [pluralsight] Detect blocked account error message (#12070)
|
||||
+ [bloomberg] Add another video id pattern (#12062)
|
||||
* [extractor/commonmistakes] Restrict URL regular expression (#12050)
|
||||
+ [tvplayer] Add support for tvplayer.com
|
||||
|
||||
|
||||
version 2017.02.10
|
||||
|
||||
Extractors
|
||||
|
@ -169,6 +169,7 @@
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
@ -309,7 +310,6 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **hgtv.com:show**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
@ -806,6 +806,7 @@
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
|
@ -34,6 +34,9 @@ from youtube_dl.utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@ -1124,6 +1127,32 @@ The first line
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
def test_get_element_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||
|
||||
def test_get_elements_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||
|
||||
def test_get_elements_by_attribute(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
|
@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# data-bmmrid=
|
||||
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='url', default=None)
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='id', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
|
@ -1208,6 +1208,9 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
|
@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
||||
class CommonMistakesIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:url|URL)
|
||||
(?:url|URL)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
|
72
youtube_dl/extractor/corus.py
Normal file
72
youtube_dl/extractor/corus.py
Normal file
@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CorusIE(ThePlatformFeedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
'title': 'Movie Night Popcorn with Bryan',
|
||||
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'upload_date': '20170206',
|
||||
'timestamp': 1486392197,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TP_FEEDS = {
|
||||
'globaltv': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'etcanada': {
|
||||
'feed_id': 'ChQqrem0lNUp',
|
||||
'account_id': 2269680845,
|
||||
},
|
||||
'hgtv': {
|
||||
'feed_id': 'L0BMHXi2no43',
|
||||
'account_id': 2414428465,
|
||||
},
|
||||
'foodnetwork': {
|
||||
'feed_id': 'ukK8o58zbRmJ',
|
||||
'account_id': 2414429569,
|
||||
},
|
||||
'slice': {
|
||||
'feed_id': '5tUJLgV2YNJ5',
|
||||
'account_id': 2414427935,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
||||
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
||||
'episode_number': int_or_none(e.get('pl1$episode')),
|
||||
'season_number': int_or_none(e.get('pl1$season')),
|
||||
'series': e.get('pl1$show'),
|
||||
}, {
|
||||
'HLS': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'DesktopHLS Default': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
'MP4 MBR': {
|
||||
'manifest': 'm3u',
|
||||
},
|
||||
}, feed_info['account_id'])
|
@ -202,6 +202,7 @@ from .commonprotocols import (
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
@ -381,10 +382,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import (
|
||||
HGTVIE,
|
||||
HGTVComShowIE,
|
||||
)
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
@ -1019,6 +1017,7 @@ from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -148,6 +149,32 @@ class FacebookIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
|
||||
'info_dict': {
|
||||
'id': '1072691702860471',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
|
||||
'timestamp': 1477305000,
|
||||
'upload_date': '20161024',
|
||||
'uploader': 'La Guía Del Varón',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -263,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
if video_item.get('video_id'):
|
||||
return video_item['videoData']
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
|
@ -2,50 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class HGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'aFH__I_5FBOX',
|
||||
'ext': 'mp4',
|
||||
'title': 'Overnight Success',
|
||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
||||
'uploader': 'SHWM-NEW',
|
||||
'timestamp': 1470320034,
|
||||
'upload_date': '20160804',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
||||
webpage, 'embed vars'), display_id, js_to_json)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'series': embed_vars.get('show'),
|
||||
'season_number': int_or_none(embed_vars.get('season')),
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
qualities,
|
||||
srt_subtitles_timecode,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||
if BLOCKED in response:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % BLOCKED, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
||||
@ -327,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course JSON')
|
||||
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
||||
'Downloading course JSON',
|
||||
data=json.dumps({
|
||||
'fn': 'bootstrapPlayer',
|
||||
'payload': {
|
||||
'courseId': course_id,
|
||||
}
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8'
|
||||
})['payload']['course']
|
||||
|
||||
title = course['title']
|
||||
course_name = course['name']
|
||||
course_data = course['modules']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for num, module in enumerate(course_data, 1):
|
||||
author = module.get('author')
|
||||
module_name = module.get('name')
|
||||
if not author or not module_name:
|
||||
continue
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
clip_index = int_or_none(clip.get('index'))
|
||||
if clip_index is None:
|
||||
continue
|
||||
clip_url = update_url_query(
|
||||
'%s/player' % self._API_BASE, query={
|
||||
'mode': 'live',
|
||||
'course': course_name,
|
||||
'author': author,
|
||||
'name': module_name,
|
||||
'clip': clip_index,
|
||||
})
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
||||
'url': clip_url,
|
||||
'ie_key': PluralsightIE.ie_key(),
|
||||
'chapter': module.get('title'),
|
||||
'chapter_number': num,
|
||||
|
@ -69,7 +69,7 @@ class SixPlayIE(InfoExtractor):
|
||||
asset_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
re.sub('/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -306,9 +306,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@ -333,7 +334,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
if asset_type in asset_types_query:
|
||||
query.update(asset_types_query[asset_type])
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
||||
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||
main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
|
||||
|
75
youtube_dl/extractor/tvplayer.py
Normal file
75
youtube_dl/extractor/tvplayer.py
Normal file
@ -0,0 +1,75 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tvplayer.com/watch/bbcone',
|
||||
'info_dict': {
|
||||
'id': '89',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
current_channel = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
|
||||
webpage, 'channel element'))
|
||||
title = current_channel['data-name']
|
||||
|
||||
resource_id = self._search_regex(
|
||||
r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
|
||||
platform = self._search_regex(
|
||||
r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
|
||||
token = self._search_regex(
|
||||
r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
|
||||
validate = self._search_regex(
|
||||
r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'http://api.tvplayer.com/api/v2/stream/live',
|
||||
resource_id, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}, data=urlencode_postdata({
|
||||
'service': 1,
|
||||
'platform': platform,
|
||||
'id': resource_id,
|
||||
'token': token,
|
||||
'validate': validate,
|
||||
}))['tvplayer']['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode(), resource_id)['tvplayer']['response']
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': resource_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
@ -337,17 +337,30 @@ def get_element_by_id(id, html):
|
||||
|
||||
|
||||
def get_element_by_class(class_name, html):
|
||||
return get_element_by_attribute(
|
||||
"""Return the content of the first tag with the specified class in the passed HTML document"""
|
||||
retval = get_elements_by_class(class_name, html)
|
||||
return retval[0] if retval else None
|
||||
|
||||
|
||||
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||
retval = get_elements_by_attribute(attribute, value, html, escape_value)
|
||||
return retval[0] if retval else None
|
||||
|
||||
|
||||
def get_elements_by_class(class_name, html):
|
||||
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
|
||||
return get_elements_by_attribute(
|
||||
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
||||
html, escape_value=False)
|
||||
|
||||
|
||||
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||
def get_elements_by_attribute(attribute, value, html, escape_value=True):
|
||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||
|
||||
value = re.escape(value) if escape_value else value
|
||||
|
||||
m = re.search(r'''(?xs)
|
||||
retlist = []
|
||||
for m in re.finditer(r'''(?xs)
|
||||
<([a-zA-Z0-9:._-]+)
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
||||
\s+%s=['"]?%s['"]?
|
||||
@ -355,16 +368,15 @@ def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||
\s*>
|
||||
(?P<content>.*?)
|
||||
</\1>
|
||||
''' % (re.escape(attribute), value), html)
|
||||
''' % (re.escape(attribute), value), html):
|
||||
res = m.group('content')
|
||||
|
||||
if not m:
|
||||
return None
|
||||
res = m.group('content')
|
||||
if res.startswith('"') or res.startswith("'"):
|
||||
res = res[1:-1]
|
||||
|
||||
if res.startswith('"') or res.startswith("'"):
|
||||
res = res[1:-1]
|
||||
retlist.append(unescapeHTML(res))
|
||||
|
||||
return unescapeHTML(res)
|
||||
return retlist
|
||||
|
||||
|
||||
class HTMLAttributeParser(compat_HTMLParser):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.02.10'
|
||||
__version__ = '2017.02.11'
|
||||
|
Reference in New Issue
Block a user