[youtube] Fix categories and improve tags extraction
This commit is contained in:
parent
ed604ce7bc
commit
dbeafce5d5
@ -2356,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
m_cat_container = self._search_regex(
|
m_cat_container = self._search_regex(
|
||||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||||
video_webpage, 'categories', default=None)
|
video_webpage, 'categories', default=None)
|
||||||
|
category = None
|
||||||
if m_cat_container:
|
if m_cat_container:
|
||||||
category = self._html_search_regex(
|
category = self._html_search_regex(
|
||||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||||
default=None)
|
default=None)
|
||||||
video_categories = None if category is None else [category]
|
if not category:
|
||||||
else:
|
category = try_get(
|
||||||
video_categories = None
|
microformat, lambda x: x['category'], compat_str)
|
||||||
|
video_categories = None if category is None else [category]
|
||||||
|
|
||||||
video_tags = [
|
video_tags = [
|
||||||
unescapeHTML(m.group('content'))
|
unescapeHTML(m.group('content'))
|
||||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||||
|
if not video_tags:
|
||||||
|
video_tags = try_get(video_details, lambda x: x['keywords'], list)
|
||||||
|
|
||||||
def _extract_count(count_name):
|
def _extract_count(count_name):
|
||||||
return str_to_int(self._search_regex(
|
return str_to_int(self._search_regex(
|
||||||
|
Loading…
Reference in New Issue
Block a user