Compare commits

...

3 Commits

3 changed files with 25 additions and 22 deletions

View File

@ -6,14 +6,15 @@ from .common import InfoExtractor
class WimpIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_TEST = {
'url': 'http://www.wimp.com/deerfence/',
'file': 'deerfence.flv',
'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': {
"title": "Watch Till End: Herd of deer jump over a fence.",
"description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
'id': 'maruexhausted',
'ext': 'flv',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}
@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}
}

View File

@ -1443,9 +1443,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
def _real_initialize(self):
@ -1493,29 +1493,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
raise ExtractorError(u'For downloading YouTube.com top lists, use '
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
matches = re.finditer(self._VIDEO_RE, page)
matches = re.finditer(self._VIDEO_RE, content_html)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
try:
playlist_title = self._og_search_title(page)
except RegexNotFoundError:
self.report_warning(
u'Playlist page is missing OpenGraph title, falling back ...',
playlist_id)
playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
more = self._download_json(
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.20'
__version__ = '2014.02.21'