2013-08-29 12:51:38 -05:00
# coding: utf-8
2013-08-29 19:16:07 +02:00
import re
import xml . etree . ElementTree
import json
from . common import InfoExtractor
from . . utils import (
compat_urlparse ,
ExtractorError ,
find_xpath_attr ,
)
class ORFIE ( InfoExtractor ) :
_VALID_URL = r ' https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id> \ d+) '
_TEST = {
u ' url ' : u ' http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter ' ,
u ' file ' : u ' 6566957.flv ' ,
u ' info_dict ' : {
u ' title ' : u ' Wetter ' ,
u ' description ' : u ' Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich. \r \r Mehr Wetter unter wetter.ORF.at ' ,
} ,
u ' params ' : {
# It uses rtmp
u ' skip_download ' : True ,
}
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
playlist_id = mobj . group ( ' id ' )
webpage = self . _download_webpage ( url , playlist_id )
flash_xml = self . _search_regex ( ' ORF.flashXML = \' (.+?) \' ' , webpage , u ' flash xml ' )
flash_xml = compat_urlparse . parse_qs ( ' xml= ' + flash_xml ) [ ' xml ' ] [ 0 ]
flash_config = xml . etree . ElementTree . fromstring ( flash_xml . encode ( ' utf-8 ' ) )
playlist_json = self . _search_regex ( r ' playlist \' : \' ( \ [.*? \ ]) \' ' , webpage , u ' playlist ' ) . replace ( r ' \ " ' , ' " ' )
playlist = json . loads ( playlist_json )
videos = [ ]
ns = ' { http://tempuri.org/XMLSchema.xsd} '
xpath = ' %(ns)s Playlist/ %(ns)s Items/ %(ns)s Item ' % { ' ns ' : ns }
webpage_description = self . _og_search_description ( webpage )
for ( i , ( item , info ) ) in enumerate ( zip ( flash_config . findall ( xpath ) , playlist ) , 1 ) :
# Get best quality url
rtmp_url = None
for q in [ ' Q6A ' , ' Q4A ' , ' Q1A ' ] :
video_url = find_xpath_attr ( item , ' %s VideoUrl ' % ns , ' quality ' , q )
if video_url is not None :
rtmp_url = video_url . text
break
if rtmp_url is None :
raise ExtractorError ( u ' Couldn \' t get video url: %s ' % info [ ' id ' ] )
description = self . _html_search_regex (
r ' id= " playlist_entry_ %s " .*?<p>(.*?)</p> ' % i , webpage ,
u ' description ' , default = webpage_description , flags = re . DOTALL )
videos . append ( {
' _type ' : ' video ' ,
' id ' : info [ ' id ' ] ,
' title ' : info [ ' title ' ] ,
' url ' : rtmp_url ,
' ext ' : ' flv ' ,
' description ' : description ,
} )
return videos