[bbccouk] Improve extraction (Closes #5530)
This commit is contained in:
parent
1dbd717eb4
commit
8683b4d8d9
@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
|||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
@ -326,16 +329,29 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
programme_id = None
|
||||||
|
|
||||||
|
tviplayer = self._search_regex(
|
||||||
|
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||||
|
webpage, 'player', default=None)
|
||||||
|
|
||||||
|
if tviplayer:
|
||||||
|
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||||
|
duration = int_or_none(player.get('duration'))
|
||||||
|
programme_id = player.get('vpid')
|
||||||
|
|
||||||
|
if not programme_id:
|
||||||
programme_id = self._search_regex(
|
programme_id = self._search_regex(
|
||||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||||
|
|
||||||
if programme_id:
|
if programme_id:
|
||||||
player = self._download_json(
|
|
||||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
|
||||||
group_id)['jsConf']['player']
|
|
||||||
title = player['title']
|
|
||||||
description = player['subtitle']
|
|
||||||
duration = player['duration']
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'<p class="medium-description">([^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
else:
|
else:
|
||||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
|
|
||||||
@ -345,6 +361,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user