Compare commits
14 Commits
2017.02.10
...
2017.02.11
Author | SHA1 | Date | |
---|---|---|---|
9b92a5917b | |||
3e2274c8b7 | |||
3d7e3aaa0e | |||
624c4b92ff | |||
2af12ad9d2 | |||
97eb9bd2ac | |||
71cdd75628 | |||
c7d6f614f3 | |||
08a00eef79 | |||
9dd5408c99 | |||
9510709575 | |||
5abcca9060 | |||
e01bfc19c3 | |||
4d32b63851 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.11**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.02.10
|
[debug] youtube-dl version 2017.02.11
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
|||||||
|
version 2017.02.11
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
|
||||||
|
utility functions
|
||||||
|
+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [pluralsight:course] Fix extraction (#12075)
|
||||||
|
+ [bbc] Extract m3u8 formats with 320k audio
|
||||||
|
* [facebook] Relax video id matching (#11017, #12055, #12056)
|
||||||
|
+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
|
||||||
|
+ [pluralsight] Detect blocked account error message (#12070)
|
||||||
|
+ [bloomberg] Add another video id pattern (#12062)
|
||||||
|
* [extractor/commonmistakes] Restrict URL regular expression (#12050)
|
||||||
|
+ [tvplayer] Add support for tvplayer.com
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.10
|
version 2017.02.10
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -169,6 +169,7 @@
|
|||||||
- **ComedyCentralShortname**
|
- **ComedyCentralShortname**
|
||||||
- **ComedyCentralTV**
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **Corus**
|
||||||
- **Coub**
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
- **Crackle**
|
- **Crackle**
|
||||||
@ -309,7 +310,6 @@
|
|||||||
- **HellPorno**
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
- **HGTV**
|
|
||||||
- **hgtv.com:show**
|
- **hgtv.com:show**
|
||||||
- **HistoricFilms**
|
- **HistoricFilms**
|
||||||
- **history:topic**: History.com Topic
|
- **history:topic**: History.com Topic
|
||||||
@ -806,6 +806,7 @@
|
|||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
- **tvp:embed**: Telewizja Polska
|
- **tvp:embed**: Telewizja Polska
|
||||||
- **tvp:series**
|
- **tvp:series**
|
||||||
|
- **TVPlayer**
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
|
@ -34,6 +34,9 @@ from youtube_dl.utils import (
|
|||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
|
get_element_by_attribute,
|
||||||
|
get_elements_by_class,
|
||||||
|
get_elements_by_attribute,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
is_html,
|
is_html,
|
||||||
@ -1124,6 +1127,32 @@ The first line
|
|||||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||||
|
|
||||||
|
def test_get_element_by_attribute(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||||
|
|
||||||
|
def test_get_elements_by_class(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||||
|
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||||
|
|
||||||
|
def test_get_elements_by_attribute(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
|
||||||
class MediaSelectionError(Exception):
|
class MediaSelectionError(Exception):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
|
if re.search(self._USP_RE, href):
|
||||||
|
usp_formats = self._extract_m3u8_formats(
|
||||||
|
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||||
|
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for f in usp_formats:
|
||||||
|
if f.get('height') and f['height'] > 720:
|
||||||
|
continue
|
||||||
|
formats.append(f)
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=hds]',
|
'format': 'best[format_id^=hds]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# data-bmmrid=
|
||||||
|
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
|
|||||||
name = self._match_id(url)
|
name = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
webpage, 'id', group='url', default=None)
|
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'id', group='id', default=None)
|
||||||
if not video_id:
|
if not video_id:
|
||||||
bplayer_data = self._parse_json(self._search_regex(
|
bplayer_data = self._parse_json(self._search_regex(
|
||||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||||
|
@ -1208,6 +1208,9 @@ class InfoExtractor(object):
|
|||||||
m3u8_doc, urlh = res
|
m3u8_doc, urlh = res
|
||||||
m3u8_url = urlh.geturl()
|
m3u8_url = urlh.geturl()
|
||||||
|
|
||||||
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
|
return []
|
||||||
|
|
||||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||||
|
|
||||||
format_url = lambda u: (
|
format_url = lambda u: (
|
||||||
|
@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
|||||||
class CommonMistakesIE(InfoExtractor):
|
class CommonMistakesIE(InfoExtractor):
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:url|URL)
|
(?:url|URL)$
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
72
youtube_dl/extractor/corus.py
Normal file
72
youtube_dl/extractor/corus.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .theplatform import ThePlatformFeedIE
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class CorusIE(ThePlatformFeedIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||||
|
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '870923331648',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Movie Night Popcorn with Bryan',
|
||||||
|
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||||
|
'uploader': 'SHWM-NEW',
|
||||||
|
'upload_date': '20170206',
|
||||||
|
'timestamp': 1486392197,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_TP_FEEDS = {
|
||||||
|
'globaltv': {
|
||||||
|
'feed_id': 'ChQqrem0lNUp',
|
||||||
|
'account_id': 2269680845,
|
||||||
|
},
|
||||||
|
'etcanada': {
|
||||||
|
'feed_id': 'ChQqrem0lNUp',
|
||||||
|
'account_id': 2269680845,
|
||||||
|
},
|
||||||
|
'hgtv': {
|
||||||
|
'feed_id': 'L0BMHXi2no43',
|
||||||
|
'account_id': 2414428465,
|
||||||
|
},
|
||||||
|
'foodnetwork': {
|
||||||
|
'feed_id': 'ukK8o58zbRmJ',
|
||||||
|
'account_id': 2414429569,
|
||||||
|
},
|
||||||
|
'slice': {
|
||||||
|
'feed_id': '5tUJLgV2YNJ5',
|
||||||
|
'account_id': 2414427935,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
||||||
|
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
||||||
|
'episode_number': int_or_none(e.get('pl1$episode')),
|
||||||
|
'season_number': int_or_none(e.get('pl1$season')),
|
||||||
|
'series': e.get('pl1$show'),
|
||||||
|
}, {
|
||||||
|
'HLS': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
'DesktopHLS Default': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
'MP4 MBR': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
}, feed_info['account_id'])
|
@ -202,6 +202,7 @@ from .commonprotocols import (
|
|||||||
RtmpIE,
|
RtmpIE,
|
||||||
)
|
)
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
|
from .corus import CorusIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@ -381,10 +382,7 @@ from .heise import HeiseIE
|
|||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hgtv import (
|
from .hgtv import HGTVComShowIE
|
||||||
HGTVIE,
|
|
||||||
HGTVComShowIE,
|
|
||||||
)
|
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hitrecord import HitRecordIE
|
from .hitrecord import HitRecordIE
|
||||||
@ -1019,6 +1017,7 @@ from .tvplay import (
|
|||||||
TVPlayIE,
|
TVPlayIE,
|
||||||
ViafreeIE,
|
ViafreeIE,
|
||||||
)
|
)
|
||||||
|
from .tvplayer import TVPlayerIE
|
||||||
from .tweakers import TweakersIE
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -148,6 +149,32 @@ class FacebookIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1072691702860471',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
|
||||||
|
'timestamp': 1477305000,
|
||||||
|
'upload_date': '20161024',
|
||||||
|
'uploader': 'La Guía Del Varón',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1396382447100162',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||||
|
'timestamp': 1486035494,
|
||||||
|
'upload_date': '20170202',
|
||||||
|
'uploader': 'Elisabeth Ahtn',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -263,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
for item in instances:
|
for item in instances:
|
||||||
if item[1][0] == 'VideoConfig':
|
if item[1][0] == 'VideoConfig':
|
||||||
video_item = item[2][0]
|
video_item = item[2][0]
|
||||||
if video_item.get('video_id') == video_id:
|
if video_item.get('video_id'):
|
||||||
return video_item['videoData']
|
return video_item['videoData']
|
||||||
|
|
||||||
server_js_data = self._parse_json(self._search_regex(
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
|
@ -2,50 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
smuggle_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HGTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
|
||||||
'md5': '',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'aFH__I_5FBOX',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Overnight Success',
|
|
||||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
|
||||||
'uploader': 'SHWM-NEW',
|
|
||||||
'timestamp': 1470320034,
|
|
||||||
'upload_date': '20160804',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
embed_vars = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
|
||||||
webpage, 'embed vars'), display_id, js_to_json)
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': smuggle_url(
|
|
||||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
|
||||||
'force_smil_url': True
|
|
||||||
}),
|
|
||||||
'series': embed_vars.get('show'),
|
|
||||||
'season_number': int_or_none(embed_vars.get('season')),
|
|
||||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class HGTVComShowIE(InfoExtractor):
|
class HGTVComShowIE(InfoExtractor):
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||||
|
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||||
|
if BLOCKED in response:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % BLOCKED, expected=True)
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
||||||
@ -327,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
|||||||
# TODO: PSM cookie
|
# TODO: PSM cookie
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_json(
|
||||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
||||||
course_id, 'Downloading course JSON')
|
'Downloading course JSON',
|
||||||
|
data=json.dumps({
|
||||||
|
'fn': 'bootstrapPlayer',
|
||||||
|
'payload': {
|
||||||
|
'courseId': course_id,
|
||||||
|
}
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json;charset=utf-8'
|
||||||
|
})['payload']['course']
|
||||||
|
|
||||||
title = course['title']
|
title = course['title']
|
||||||
|
course_name = course['name']
|
||||||
|
course_data = course['modules']
|
||||||
description = course.get('description') or course.get('shortDescription')
|
description = course.get('description') or course.get('shortDescription')
|
||||||
|
|
||||||
course_data = self._download_json(
|
|
||||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
|
||||||
course_id, 'Downloading course data JSON')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for num, module in enumerate(course_data, 1):
|
for num, module in enumerate(course_data, 1):
|
||||||
for clip in module.get('clips', []):
|
author = module.get('author')
|
||||||
player_parameters = clip.get('playerParameters')
|
module_name = module.get('name')
|
||||||
if not player_parameters:
|
if not author or not module_name:
|
||||||
continue
|
continue
|
||||||
|
for clip in module.get('clips', []):
|
||||||
|
clip_index = int_or_none(clip.get('index'))
|
||||||
|
if clip_index is None:
|
||||||
|
continue
|
||||||
|
clip_url = update_url_query(
|
||||||
|
'%s/player' % self._API_BASE, query={
|
||||||
|
'mode': 'live',
|
||||||
|
'course': course_name,
|
||||||
|
'author': author,
|
||||||
|
'name': module_name,
|
||||||
|
'clip': clip_index,
|
||||||
|
})
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
'url': clip_url,
|
||||||
'ie_key': PluralsightIE.ie_key(),
|
'ie_key': PluralsightIE.ie_key(),
|
||||||
'chapter': module.get('title'),
|
'chapter': module.get('title'),
|
||||||
'chapter_number': num,
|
'chapter_number': num,
|
||||||
|
@ -69,7 +69,7 @@ class SixPlayIE(InfoExtractor):
|
|||||||
asset_url.replace('.m3u8', '.mpd'),
|
asset_url.replace('.m3u8', '.mpd'),
|
||||||
video_id, mpd_id='dash', fatal=False))
|
video_id, mpd_id='dash', fatal=False))
|
||||||
formats.extend(self._extract_ism_formats(
|
formats.extend(self._extract_ism_formats(
|
||||||
re.sub('/[^/]+\.m3u8', '/Manifest', asset_url),
|
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||||
video_id, ism_id='mss', fatal=False))
|
video_id, ism_id='mss', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
@ -306,9 +306,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
|
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||||
|
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -333,7 +334,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
if asset_type in asset_types_query:
|
if asset_type in asset_types_query:
|
||||||
query.update(asset_types_query[asset_type])
|
query.update(asset_types_query[asset_type])
|
||||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
||||||
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||||
formats.extend(cur_formats)
|
formats.extend(cur_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||||
|
|
||||||
|
75
youtube_dl/extractor/tvplayer.py
Normal file
75
youtube_dl/extractor/tvplayer.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
urlencode_postdata,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TVPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tvplayer.com/watch/bbcone',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '89',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
current_channel = extract_attributes(self._search_regex(
|
||||||
|
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
|
||||||
|
webpage, 'channel element'))
|
||||||
|
title = current_channel['data-name']
|
||||||
|
|
||||||
|
resource_id = self._search_regex(
|
||||||
|
r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
|
||||||
|
platform = self._search_regex(
|
||||||
|
r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
|
||||||
|
token = self._search_regex(
|
||||||
|
r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
|
||||||
|
validate = self._search_regex(
|
||||||
|
r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._download_json(
|
||||||
|
'http://api.tvplayer.com/api/v2/stream/live',
|
||||||
|
resource_id, headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
|
}, data=urlencode_postdata({
|
||||||
|
'service': 1,
|
||||||
|
'platform': platform,
|
||||||
|
'id': resource_id,
|
||||||
|
'token': token,
|
||||||
|
'validate': validate,
|
||||||
|
}))['tvplayer']['response']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
response = self._parse_json(
|
||||||
|
e.cause.read().decode(), resource_id)['tvplayer']['response']
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': resource_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': self._live_title(title),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@ -337,17 +337,30 @@ def get_element_by_id(id, html):
|
|||||||
|
|
||||||
|
|
||||||
def get_element_by_class(class_name, html):
|
def get_element_by_class(class_name, html):
|
||||||
return get_element_by_attribute(
|
"""Return the content of the first tag with the specified class in the passed HTML document"""
|
||||||
|
retval = get_elements_by_class(class_name, html)
|
||||||
|
return retval[0] if retval else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||||
|
retval = get_elements_by_attribute(attribute, value, html, escape_value)
|
||||||
|
return retval[0] if retval else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_elements_by_class(class_name, html):
|
||||||
|
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
|
||||||
|
return get_elements_by_attribute(
|
||||||
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
||||||
html, escape_value=False)
|
html, escape_value=False)
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
def get_elements_by_attribute(attribute, value, html, escape_value=True):
|
||||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||||
|
|
||||||
value = re.escape(value) if escape_value else value
|
value = re.escape(value) if escape_value else value
|
||||||
|
|
||||||
m = re.search(r'''(?xs)
|
retlist = []
|
||||||
|
for m in re.finditer(r'''(?xs)
|
||||||
<([a-zA-Z0-9:._-]+)
|
<([a-zA-Z0-9:._-]+)
|
||||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
||||||
\s+%s=['"]?%s['"]?
|
\s+%s=['"]?%s['"]?
|
||||||
@ -355,16 +368,15 @@ def get_element_by_attribute(attribute, value, html, escape_value=True):
|
|||||||
\s*>
|
\s*>
|
||||||
(?P<content>.*?)
|
(?P<content>.*?)
|
||||||
</\1>
|
</\1>
|
||||||
''' % (re.escape(attribute), value), html)
|
''' % (re.escape(attribute), value), html):
|
||||||
|
|
||||||
if not m:
|
|
||||||
return None
|
|
||||||
res = m.group('content')
|
res = m.group('content')
|
||||||
|
|
||||||
if res.startswith('"') or res.startswith("'"):
|
if res.startswith('"') or res.startswith("'"):
|
||||||
res = res[1:-1]
|
res = res[1:-1]
|
||||||
|
|
||||||
return unescapeHTML(res)
|
retlist.append(unescapeHTML(res))
|
||||||
|
|
||||||
|
return retlist
|
||||||
|
|
||||||
|
|
||||||
class HTMLAttributeParser(compat_HTMLParser):
|
class HTMLAttributeParser(compat_HTMLParser):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.02.10'
|
__version__ = '2017.02.11'
|
||||||
|
Reference in New Issue
Block a user