parent
2ab2c0d1f5
commit
019f4c0371
@ -1,6 +1,7 @@
|
|||||||
version <unreleased>
|
version <unreleased>
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||||
* [iwara] Fix extraction (#11781)
|
* [iwara] Fix extraction (#11781)
|
||||||
* [googledrive] Fix extraction on Python 3.6
|
* [googledrive] Fix extraction on Python 3.6
|
||||||
|
|
||||||
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
# not all tracks have songs
|
||||||
|
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'we-are-the-plague',
|
||||||
|
'title': 'WE ARE THE PLAGUE',
|
||||||
|
'uploader_id': 'insulters',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
album_id = mobj.group('album_id')
|
album_id = mobj.group('album_id')
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
track_elements = re.findall(
|
||||||
if not tracks_paths:
|
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||||
|
if not track_elements:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
for t_path in tracks_paths]
|
for elem_content, t_path in track_elements
|
||||||
|
if self._html_search_meta('duration', elem_content, default=None)]
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||||
webpage, 'title', fatal=False)
|
webpage, 'title', fatal=False)
|
||||||
|
Loading…
Reference in New Issue
Block a user