Compare commits
3 Commits
2014.02.20
...
2014.02.21
Author | SHA1 | Date | |
---|---|---|---|
43e77ca455 | |||
da36297988 | |||
dbb94fb044 |
@ -6,14 +6,15 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class WimpIE(InfoExtractor):
|
class WimpIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
|
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.wimp.com/deerfence/',
|
'url': 'http://www.wimp.com/maruexhausted/',
|
||||||
'file': 'deerfence.flv',
|
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
|
||||||
'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Watch Till End: Herd of deer jump over a fence.",
|
'id': 'maruexhausted',
|
||||||
"description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
|
'ext': 'flv',
|
||||||
|
'title': 'Maru is exhausted.',
|
||||||
|
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1443,9 +1443,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@ -1493,29 +1493,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
||||||
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||||
|
|
||||||
|
url = self._TEMPLATE_URL % playlist_id
|
||||||
|
page = self._download_webpage(url, playlist_id)
|
||||||
|
more_widget_html = content_html = page
|
||||||
|
|
||||||
# Extract the video ids from the playlist pages
|
# Extract the video ids from the playlist pages
|
||||||
ids = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
matches = re.finditer(self._VIDEO_RE, content_html)
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
|
||||||
matches = re.finditer(self._VIDEO_RE, page)
|
|
||||||
# We remove the duplicates and the link with index 0
|
# We remove the duplicates and the link with index 0
|
||||||
# (it's not the first video of the playlist)
|
# (it's not the first video of the playlist)
|
||||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||||
|
if not mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
more = self._download_json(
|
||||||
playlist_title = self._og_search_title(page)
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
|
||||||
except RegexNotFoundError:
|
content_html = more['content_html']
|
||||||
self.report_warning(
|
more_widget_html = more['load_more_widget_html']
|
||||||
u'Playlist page is missing OpenGraph title, falling back ...',
|
|
||||||
playlist_id)
|
|
||||||
playlist_title = self._html_search_regex(
|
playlist_title = self._html_search_regex(
|
||||||
r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
|
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
|
||||||
|
|
||||||
url_results = self._ids_to_results(ids)
|
url_results = self._ids_to_results(ids)
|
||||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.20'
|
__version__ = '2014.02.21'
|
||||||
|
Reference in New Issue
Block a user