Compare commits
36 Commits
2016.06.19
...
2016.06.23
Author | SHA1 | Date | |
---|---|---|---|
011bd3221b | |||
b46eabecd3 | |||
0437307a41 | |||
22b7ac13ef | |||
96f88e91b7 | |||
3331a4644d | |||
adf1921dc1 | |||
97674f0419 | |||
73843ae8ac | |||
f2bb8c036a | |||
75ca6bcee2 | |||
089657ed1f | |||
b5eab86c24 | |||
c8e3e0974b | |||
dfc8f46e1c | |||
c143ddce5d | |||
169d836feb | |||
6ae938b295 | |||
cf40fdf5c1 | |||
23bdae0955 | |||
ca74c90bf5 | |||
7cfc1e2a10 | |||
1ac5705f62 | |||
e4f90ea0a7 | |||
cdfc187cd5 | |||
feef925f49 | |||
19e2d1cdea | |||
8369a4fe76 | |||
1f749b6658 | |||
819707920a | |||
43518503a6 | |||
5839d556e4 | |||
6c83e583b3 | |||
6aeb64b673 | |||
6cd64b6806 | |||
e154c65128 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.06.19.1
|
||||
[debug] youtube-dl version 2016.06.23.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
from youtube_dl.extractor import _ALL_CLASSES
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
|
||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||
module_template = f.read()
|
||||
|
||||
module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
|
||||
module_contents = [
|
||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
||||
|
||||
ie_template = '''
|
||||
class {name}(LazyLoadExtractor):
|
||||
class {name}({bases}):
|
||||
_VALID_URL = {valid_url!r}
|
||||
_module = '{module}'
|
||||
'''
|
||||
@ -34,10 +36,20 @@ make_valid_template = '''
|
||||
'''
|
||||
|
||||
|
||||
def get_base_name(base):
|
||||
if base is InfoExtractor:
|
||||
return 'LazyLoadExtractor'
|
||||
elif base is SearchInfoExtractor:
|
||||
return 'LazyLoadSearchExtractor'
|
||||
else:
|
||||
return base.__name__
|
||||
|
||||
|
||||
def build_lazy_ie(ie, name):
|
||||
valid_url = getattr(ie, '_VALID_URL', None)
|
||||
s = ie_template.format(
|
||||
name=name,
|
||||
bases=', '.join(map(get_base_name, ie.__bases__)),
|
||||
valid_url=valid_url,
|
||||
module=ie.__module__)
|
||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||
@ -47,12 +59,35 @@ def build_lazy_ie(ie, name):
|
||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||
return s
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
ordered_cls = []
|
||||
while classes:
|
||||
for c in classes[:]:
|
||||
bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
|
||||
stop = False
|
||||
for b in bases:
|
||||
if b not in classes and b not in ordered_cls:
|
||||
if b.__name__ == 'GenericIE':
|
||||
exit()
|
||||
classes.insert(0, b)
|
||||
stop = True
|
||||
if stop:
|
||||
break
|
||||
if all(b in ordered_cls for b in bases):
|
||||
ordered_cls.append(c)
|
||||
classes.remove(c)
|
||||
break
|
||||
ordered_cls.append(_ALL_CLASSES[-1])
|
||||
|
||||
names = []
|
||||
for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
|
||||
name = ie.ie_key() + 'IE'
|
||||
for ie in ordered_cls:
|
||||
name = ie.__name__
|
||||
src = build_lazy_ie(ie, name)
|
||||
module_contents.append(src)
|
||||
names.append(name)
|
||||
if ie in _ALL_CLASSES:
|
||||
names.append(name)
|
||||
|
||||
module_contents.append(
|
||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||
|
@ -248,7 +248,6 @@
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
@ -386,7 +385,6 @@
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
|
@ -2,14 +2,24 @@ from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import binascii
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
)
|
||||
|
||||
|
||||
@ -21,7 +31,7 @@ class HlsFD(FragmentFD):
|
||||
@staticmethod
|
||||
def can_download(manifest):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||
@ -39,7 +49,9 @@ class HlsFD(FragmentFD):
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
)
|
||||
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
@ -57,36 +69,60 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
fragment_urls = []
|
||||
total_frags = 0
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
fragment_urls.append(segment_url)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
total_frags += 1
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(fragment_urls),
|
||||
'total_frags': total_frags,
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
frags_filenames = []
|
||||
for i, frag_url in enumerate(fragment_urls):
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
frags_filenames.append(frag_sanitized)
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
frag_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@ -1,17 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSBaseIE(ThePlatformIE):
|
||||
class CBSBaseIE(ThePlatformFeedIE):
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
@ -21,9 +17,22 @@ class CBSBaseIE(ThePlatformIE):
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info(
|
||||
'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
|
||||
'series': entry.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
|
||||
'episode': entry.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
|
||||
}, {
|
||||
'StreamPack': {
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@ -38,25 +47,7 @@ class CBSIE(CBSBaseIE):
|
||||
'upload_date': '20131127',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
||||
'info_dict': {
|
||||
'id': 'WWF_5KqY3PK1',
|
||||
'display_id': 'st-vincent',
|
||||
'ext': 'flv',
|
||||
'title': 'Live on Letterman - St. Vincent',
|
||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
||||
'duration': 3221,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
@ -68,44 +59,5 @@ class CBSIE(CBSBaseIE):
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not content_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
pid = xpath_text(item, 'pid')
|
||||
if not pid:
|
||||
continue
|
||||
tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
|
||||
if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
|
||||
tp_release_url += '&manifest=m3u'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info('byGuid=%s' % content_id, content_id)
|
||||
|
@ -30,9 +30,12 @@ class CBSNewsIE(CBSBaseIE):
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||
'upload_date': '19700101',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
'subtitles': {
|
||||
@ -58,30 +61,8 @@ class CBSNewsIE(CBSBaseIE):
|
||||
webpage, 'video JSON info'), video_id)
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
title = item.get('articleTitle') or item.get('hed')
|
||||
duration = item.get('duration')
|
||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
guid = item['mpxRefId']
|
||||
return self._extract_video_info('byGuid=%s' % guid, guid)
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
|
@ -1,30 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSBaseIE
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
|
||||
'info_dict': {
|
||||
'id': '_d5_GbO8p1sT',
|
||||
'ext': 'flv',
|
||||
'title': 'US Open flashbacks: 1990s',
|
||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||
'id': '708337219968',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ben Simmons the next LeBron? Not so fast',
|
||||
'description': 'md5:854294f627921baba1f4b9a990d87197',
|
||||
'timestamp': 1466293740,
|
||||
'upload_date': '20160618',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
section = mobj.group('section')
|
||||
video_id = mobj.group('id')
|
||||
all_videos = self._download_json(
|
||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||
video_id)
|
||||
# The json file contains the info of all the videos in the section
|
||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
|
@ -53,6 +53,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
)
|
||||
|
||||
|
||||
@ -1150,23 +1151,11 @@ class InfoExtractor(object):
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_info[m.group('key')] = v
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_media[m.group('key')] = v
|
||||
last_media = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
|
@ -285,7 +285,6 @@ from .gameone import (
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
@ -481,7 +480,6 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
|
@ -239,6 +239,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in video_data.items():
|
||||
if f and isinstance(f, dict):
|
||||
f = [f]
|
||||
if not f or not isinstance(f, list):
|
||||
continue
|
||||
for quality in ('sd', 'hd'):
|
||||
|
@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'gA0bHB3Ladz3',
|
||||
'ext': 'flv',
|
||||
'id': 'i0qKWsk3qJaM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
'upload_date': '20150423',
|
||||
'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor):
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
config['releaseURL'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}), {'force_smil_url': True}))
|
||||
|
@ -1,19 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
dict_get,
|
||||
)
|
||||
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
class GameSpotIE(OnceIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(
|
||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video = json.loads(unescapeHTML(data_video_json))
|
||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
||||
streams = data_video['videoStreams']
|
||||
|
||||
manifest_url = None
|
||||
formats = []
|
||||
f4m_url = streams.get('f4m_stream')
|
||||
if f4m_url is not None:
|
||||
# Transform the manifest url to a link to the mp4 files
|
||||
# they are used in mobile devices.
|
||||
f4m_path = compat_urlparse.urlparse(f4m_url).path
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',')
|
||||
http_path = f4m_path[1:].split('/', 1)[1]
|
||||
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
|
||||
http_template = http_template.replace('.csmil/manifest.f4m', '')
|
||||
http_template = compat_urlparse.urljoin(
|
||||
'http://video.gamespotcdn.com/', http_template)
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': http_template % q,
|
||||
'ext': 'mp4',
|
||||
'format_id': q,
|
||||
})
|
||||
else:
|
||||
if f4m_url:
|
||||
manifest_url = f4m_url
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url = streams.get('m3u8_stream')
|
||||
if m3u8_url:
|
||||
manifest_url = m3u8_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
progressive_url = dict_get(
|
||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
|
||||
if progressive_url and manifest_url:
|
||||
qualities_basename = self._search_regex(
|
||||
'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if qualities_basename:
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if qualities:
|
||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
||||
http_url_basename = url_basename(progressive_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
|
||||
onceux_json = self._search_regex(
|
||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
||||
if onceux_json:
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
# It's actually a link to a flv file
|
||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
||||
@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'format_id': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
|
@ -1,62 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class GametrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
|
||||
'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
|
||||
'info_dict': {
|
||||
'id': '2983958',
|
||||
'ext': 'mp4',
|
||||
'display_id': '116437-Just-Cause-3-Review',
|
||||
'title': 'Just Cause 3 - Review',
|
||||
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)\|', webpage, 'title').strip()
|
||||
embed_url = self._proto_relative_url(
|
||||
self._search_regex(
|
||||
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
|
||||
'embed url'),
|
||||
scheme='http:')
|
||||
video_id = url_basename(embed_url)
|
||||
embed_page = self._download_webpage(embed_url, video_id)
|
||||
embed_vars_json = self._search_regex(
|
||||
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
|
||||
'embed vars')
|
||||
info = self._parse_json(embed_vars_json, video_id)
|
||||
|
||||
formats = []
|
||||
for media in info['media']:
|
||||
if media['mediaPurpose'] == 'play':
|
||||
formats.append({
|
||||
'url': media['uri'],
|
||||
'height': media['height'],
|
||||
'width:': media['width'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('thumbUri'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': int_or_none(info.get('videoLengthInSeconds')),
|
||||
'age_limit': parse_age_limit(info.get('audienceRating')),
|
||||
}
|
@ -9,10 +9,6 @@ from ..utils import (
|
||||
lowercase_escape,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
([^/]+/)*(?P<display_id>[^/?]+))
|
||||
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'timestamp': 1401363060,
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
||||
'md5': 'fdbf39ab73a72df5896b6234ff98518a',
|
||||
'info_dict': {
|
||||
'id': 'Wjf9EDR3A_60',
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'FULL EPISODE: Family Business',
|
||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||
@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
'timestamp': 1423104900,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'upload_date': '20150205',
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'info_dict': {
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
|
||||
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
|
||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||
'upload_date': '20150922',
|
||||
'timestamp': 1442917800,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
'expected_warnings': ['http-6000 is not available']
|
||||
},
|
||||
{
|
||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||
@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||
'upload_date': '20160420',
|
||||
'timestamp': 1461152093,
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': '314487875924',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE):
|
||||
}
|
||||
else:
|
||||
# "feature" and "nightly-news" pages use theplatform.com
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, display_id, transform_source=unescapeHTML)
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
else:
|
||||
info = bootstrap
|
||||
video_id = info['mpxId']
|
||||
title = info['title']
|
||||
|
||||
subtitles = {}
|
||||
caption_links = info.get('captionLinks')
|
||||
if caption_links:
|
||||
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
|
||||
sub_url = caption_links.get(sub_key)
|
||||
if sub_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': sub_url,
|
||||
'ext': sub_ext,
|
||||
})
|
||||
|
||||
formats = []
|
||||
for video_asset in info['videoAssets']:
|
||||
video_url = video_asset.get('publicUrl')
|
||||
if not video_url:
|
||||
continue
|
||||
container = video_asset.get('format')
|
||||
asset_type = video_asset.get('assetType') or ''
|
||||
if container == 'ISM' or asset_type == 'FireTV-Once':
|
||||
continue
|
||||
elif asset_type == 'OnceURL':
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
video_url, video_id)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
video_id = mobj.group('mpx_id')
|
||||
if not video_id.isdigit():
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = None
|
||||
bootstrap_json = self._search_regex(
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
else:
|
||||
tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000)
|
||||
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
|
||||
video_url = update_url_query(
|
||||
video_url, {'format': 'redirect'})
|
||||
# resolve the url so that we can check availability and detect the correct extension
|
||||
head = self._request_webpage(
|
||||
HEADRequest(video_url), video_id,
|
||||
'Checking %s url' % format_id,
|
||||
'%s is not available' % format_id,
|
||||
fatal=False)
|
||||
if head:
|
||||
video_url = head.geturl()
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(video_asset.get('width')),
|
||||
'height': int_or_none(video_asset.get('height')),
|
||||
'tbr': tbr,
|
||||
'container': video_asset.get('format'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = bootstrap
|
||||
video_id = info['mpxId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
|
||||
'ie_key': 'ThePlatformFeed',
|
||||
}
|
||||
|
||||
|
||||
class MSNBCIE(InfoExtractor):
|
||||
# https URLs redirect to corresponding http ones
|
||||
_VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'uploader': 'NBCU-NEWS',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_meta('embedURL', webpage)
|
||||
return self.url_result(embed_url)
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import(
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
)
|
||||
|
@ -6,7 +6,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor):
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', orig_webpage)
|
||||
post = urlencode_postdata(fields)
|
||||
|
||||
self._sleep(12, video_id)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = sanitized_Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, 'video URL')
|
||||
url, video_id, data=urlencode_postdata(fields), headers={
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
try:
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, 'video URL')
|
||||
except ExtractorError:
|
||||
message = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
|
||||
webpage, 'message', default=None, group='message')
|
||||
if message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
raise
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
|
@ -6,17 +6,14 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SVTBaseIE(InfoExtractor):
|
||||
def _extract_video(self, url, video_id):
|
||||
info = self._download_json(url, video_id)
|
||||
|
||||
title = info['context']['title']
|
||||
thumbnail = info['context'].get('thumbnailImage')
|
||||
|
||||
video_info = info['video']
|
||||
def _extract_video(self, video_info, video_id):
|
||||
formats = []
|
||||
for vr in video_info['videoReferences']:
|
||||
player_type = vr.get('playerType')
|
||||
@ -40,27 +37,49 @@ class SVTBaseIE(InfoExtractor):
|
||||
'format_id': player_type,
|
||||
'url': vurl,
|
||||
})
|
||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
||||
self.raise_geo_restricted('This video is only available in Sweden')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_references = video_info.get('subtitleReferences')
|
||||
subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
|
||||
if isinstance(subtitle_references, list):
|
||||
for sr in subtitle_references:
|
||||
subtitle_url = sr.get('url')
|
||||
subtitle_lang = sr.get('language', 'sv')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('sv', []).append({'url': subtitle_url})
|
||||
if determine_ext(subtitle_url) == 'm3u8':
|
||||
# TODO(yan12125): handle WebVTT in m3u8 manifests
|
||||
continue
|
||||
|
||||
duration = video_info.get('materialLength')
|
||||
age_limit = 18 if video_info.get('inappropriateForChildren') else 0
|
||||
subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
|
||||
|
||||
title = video_info.get('title')
|
||||
|
||||
series = video_info.get('programTitle')
|
||||
season_number = int_or_none(video_info.get('season'))
|
||||
episode = video_info.get('episodeTitle')
|
||||
episode_number = int_or_none(video_info.get('episodeNumber'))
|
||||
|
||||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
||||
age_limit = None
|
||||
adult = dict_get(
|
||||
video_info, ('inappropriateForChildren', 'blockedForChildren'),
|
||||
skip_false_values=False)
|
||||
if adult is not None:
|
||||
age_limit = 18 if adult else 0
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
|
||||
@ -68,11 +87,11 @@ class SVTIE(SVTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
|
||||
'md5': '9648197555fc1b49e3dc22db4af51d46',
|
||||
'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
@ -89,15 +108,20 @@ class SVTIE(SVTBaseIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
widget_id = mobj.group('widget_id')
|
||||
article_id = mobj.group('id')
|
||||
return self._extract_video(
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
|
||||
article_id)
|
||||
|
||||
info_dict = self._extract_video(info['video'], article_id)
|
||||
info_dict['title'] = info['context']['title']
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
'info_dict': {
|
||||
@ -113,12 +137,47 @@ class SVTPlayIE(SVTBaseIE):
|
||||
}]
|
||||
},
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# geo restricted to Sweden
|
||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
return self._extract_video(
|
||||
'http://www.%s.se/video/%s?output=json' % (host, video_id),
|
||||
video_id)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'root\["__svtplay"\]\s*=\s*([^;]+);',
|
||||
webpage, 'embedded data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
if data:
|
||||
video_info = try_get(
|
||||
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
|
||||
dict)
|
||||
if video_info:
|
||||
info_dict = self._extract_video(video_info, video_id)
|
||||
info_dict.update({
|
||||
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
if video_id:
|
||||
data = self._download_json(
|
||||
'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
info_dict['title'] = re.sub(
|
||||
r'\s*\|\s*.+?$', '',
|
||||
info_dict.get('episode') or self._og_search_title(webpage))
|
||||
return info_dict
|
||||
|
@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@ -277,9 +277,9 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
|
||||
_TEST = {
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
|
||||
_TESTS = [{
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
'md5': '6e32495b5073ab414471b615c5ded394',
|
||||
@ -295,32 +295,38 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
provider_id = mobj.group('provider_id')
|
||||
feed_id = mobj.group('feed_id')
|
||||
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
|
||||
feed = self._download_json(real_url, video_id)
|
||||
entry = feed['entries'][0]
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
first_video_id = None
|
||||
duration = None
|
||||
asset_types = []
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&mbr=true'
|
||||
smil_url = item['plfile$url']
|
||||
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
duration = float_or_none(item.get('plfile$duration'))
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
for asset_type in item['plfile$assetTypes']:
|
||||
if asset_type in asset_types:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'formats': item['plfile$format'],
|
||||
'assetTypes': asset_type,
|
||||
}
|
||||
if asset_type in asset_types_query:
|
||||
query.update(asset_types_query[asset_type])
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
||||
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -344,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
})
|
||||
if custom_fields:
|
||||
ret.update(custom_fields(entry))
|
||||
|
||||
return ret
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
provider_id = mobj.group('provider_id')
|
||||
feed_id = mobj.group('feed_id')
|
||||
filter_query = mobj.group('filter')
|
||||
|
||||
return self._extract_feed_info(provider_id, feed_id, filter_query, video_id)
|
||||
|
@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
\.
|
||||
)?
|
||||
vimeo(?P<pro>pro)?\.com/
|
||||
(?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
|
||||
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)?
|
||||
(?:
|
||||
(?:
|
||||
@ -227,8 +227,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
{
|
||||
'url': 'http://vimeo.com/channels/keypeele/75629013',
|
||||
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
|
||||
'note': 'Video is freely available via original URL '
|
||||
'and protected with password when accessed via http://vimeo.com/75629013',
|
||||
'info_dict': {
|
||||
'id': '75629013',
|
||||
'ext': 'mp4',
|
||||
@ -272,7 +270,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
{
|
||||
# contains original format
|
||||
'url': 'https://vimeo.com/33951933',
|
||||
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||
'md5': '2d9f5475e0537f013d0073e812ab89e6',
|
||||
'info_dict': {
|
||||
'id': '33951933',
|
||||
'ext': 'mp4',
|
||||
@ -284,6 +282,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
},
|
||||
},
|
||||
{
|
||||
# only available via https://vimeo.com/channels/tributes/6213729 and
|
||||
# not via https://vimeo.com/6213729
|
||||
'url': 'https://vimeo.com/channels/tributes/6213729',
|
||||
'info_dict': {
|
||||
'id': '6213729',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vimeo Tribute: The Shining',
|
||||
'uploader': 'Casey Donahue',
|
||||
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
||||
'uploader_id': 'caseydonahue',
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/109815029',
|
||||
'note': 'Video not completely processed, "failed" seed status',
|
||||
@ -293,6 +314,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/album/2632481/video/79010983',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# source file returns 403: Forbidden
|
||||
'url': 'https://vimeo.com/7809605',
|
||||
@ -369,7 +394,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
orig_url = url
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'https://player.vimeo.com/video/' + video_id
|
||||
else:
|
||||
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
@ -630,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
webpage = self._login_list_password(page_url, list_id, webpage)
|
||||
yield self._extract_list_title(webpage)
|
||||
|
||||
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
|
||||
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
# Try extracting href first since not all videos are available via
|
||||
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
|
||||
clips = re.findall(
|
||||
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
|
||||
if clips:
|
||||
for video_id, video_url in clips:
|
||||
yield self.url_result(
|
||||
compat_urlparse.urljoin(base_url, video_url),
|
||||
VimeoIE.ie_key(), video_id=video_id)
|
||||
# More relaxed fallback
|
||||
else:
|
||||
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
|
||||
yield self.url_result(
|
||||
'https://vimeo.com/%s' % video_id,
|
||||
VimeoIE.ie_key(), video_id=video_id)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
@ -668,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:album'
|
||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
|
||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/album/2632481',
|
||||
@ -688,6 +726,13 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: respect page number
|
||||
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
|
@ -24,6 +24,7 @@ class VineIE(InfoExtractor):
|
||||
'upload_date': '20130519',
|
||||
'uploader': 'Jack Dorsey',
|
||||
'uploader_id': '76',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
@ -39,6 +40,7 @@ class VineIE(InfoExtractor):
|
||||
'upload_date': '20140815',
|
||||
'uploader': 'Mars Ruiz',
|
||||
'uploader_id': '1102363502380728320',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
@ -54,6 +56,7 @@ class VineIE(InfoExtractor):
|
||||
'upload_date': '20130430',
|
||||
'uploader': 'Z3k3',
|
||||
'uploader_id': '936470460173008896',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
@ -71,6 +74,7 @@ class VineIE(InfoExtractor):
|
||||
'upload_date': '20150705',
|
||||
'uploader': 'Pimry_zaa',
|
||||
'uploader_id': '1135760698325307392',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
@ -109,6 +113,7 @@ class VineIE(InfoExtractor):
|
||||
'upload_date': unified_strdate(data.get('created')),
|
||||
'uploader': username,
|
||||
'uploader_id': data.get('userIdStr'),
|
||||
'view_count': int_or_none(data.get('loops', {}).get('count')),
|
||||
'like_count': int_or_none(data.get('likes', {}).get('count')),
|
||||
'comment_count': int_or_none(data.get('comments', {}).get('count')),
|
||||
'repost_count': int_or_none(data.get('reposts', {}).get('count')),
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
@ -10,7 +11,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@ -190,7 +190,7 @@ class VKIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
login_page, url_handle = self._download_webpage_handle(
|
||||
'https://vk.com', None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
@ -200,11 +200,26 @@ class VKIE(InfoExtractor):
|
||||
'pass': password.encode('cp1251'),
|
||||
})
|
||||
|
||||
request = sanitized_Request(
|
||||
'https://login.vk.com/?act=login',
|
||||
urlencode_postdata(login_form))
|
||||
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
|
||||
# and expects the first one to be set rather than second (see
|
||||
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
|
||||
# As of RFC6265 the newer one cookie should be set into cookie store
|
||||
# what actually happens.
|
||||
# We will workaround this VK issue by resetting the remixlhk cookie to
|
||||
# the first one manually.
|
||||
cookies = url_handle.headers.get('Set-Cookie')
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
|
||||
if remixlhk:
|
||||
value, domain = remixlhk.groups()
|
||||
self._set_cookie(domain, 'remixlhk', value)
|
||||
|
||||
login_page = self._download_webpage(
|
||||
request, None, note='Logging in as %s' % username)
|
||||
'https://login.vk.com/?act=login', None,
|
||||
note='Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
raise ExtractorError(
|
||||
|
@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
|
||||
_TEST = {
|
||||
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
'md5': '0831677e2b4761795f68d417e0b7b445',
|
||||
_VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
|
||||
'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0',
|
||||
'info_dict': {
|
||||
'id': '1135332',
|
||||
'id': '55awb78',
|
||||
'ext': 'flv',
|
||||
'title': 'lida » Naked Funny Actress (5)',
|
||||
'title': 'Skyrim Test Video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.xnxx.com/video-55awb78/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -131,8 +131,9 @@ class JSInterpreter(object):
|
||||
if variable in local_vars:
|
||||
obj = local_vars[variable]
|
||||
else:
|
||||
obj = self._objects.setdefault(
|
||||
variable, self.extract_object(variable))
|
||||
if variable not in self._objects:
|
||||
self._objects[variable] = self.extract_object(variable)
|
||||
obj = self._objects[variable]
|
||||
|
||||
if arg_str is None:
|
||||
# Member access
|
||||
@ -203,7 +204,8 @@ class JSInterpreter(object):
|
||||
argvals = tuple([
|
||||
int(v) if v.isdigit() else local_vars[v]
|
||||
for v in m.group('args').split(',')])
|
||||
self._functions.setdefault(fname, self.extract_function(fname))
|
||||
if fname not in self._functions:
|
||||
self._functions[fname] = self.extract_function(fname)
|
||||
return self._functions[fname](argvals)
|
||||
|
||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||
@ -230,7 +232,7 @@ class JSInterpreter(object):
|
||||
def extract_function(self, funcname):
|
||||
func_m = re.search(
|
||||
r'''(?x)
|
||||
(?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
||||
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
\{(?P<code>[^}]+)\}''' % (
|
||||
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
||||
|
@ -2852,3 +2852,12 @@ def decode_packed_codes(code):
|
||||
return re.sub(
|
||||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
||||
obfucasted_code)
|
||||
|
||||
|
||||
def parse_m3u8_attributes(attrib):
|
||||
info = {}
|
||||
for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
|
||||
if val.startswith('"'):
|
||||
val = val[1:-1]
|
||||
info[key] = val
|
||||
return info
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.06.19.1'
|
||||
__version__ = '2016.06.23.1'
|
||||
|
Reference in New Issue
Block a user