[extractor/common] Relax interaction count extraction in _json_ld
This commit is contained in:
parent
ad06b99dd4
commit
ce5b904050
@ -68,6 +68,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -1248,7 +1249,10 @@ class InfoExtractor(object):
|
|||||||
interaction_type = is_e.get('interactionType')
|
interaction_type = is_e.get('interactionType')
|
||||||
if not isinstance(interaction_type, compat_str):
|
if not isinstance(interaction_type, compat_str):
|
||||||
continue
|
continue
|
||||||
interaction_count = int_or_none(is_e.get('userInteractionCount'))
|
# For interaction count some sites provide string instead of
|
||||||
|
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||||
|
# so extracting count with more relaxed str_to_int
|
||||||
|
interaction_count = str_to_int(is_e.get('userInteractionCount'))
|
||||||
if interaction_count is None:
|
if interaction_count is None:
|
||||||
continue
|
continue
|
||||||
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
||||||
|
Loading…
Reference in New Issue
Block a user