2013-08-29 10:51:38 -07:00
# coding: utf-8
2014-01-06 08:15:27 -08:00
from __future__ import unicode_literals
2013-08-29 10:51:38 -07:00
2014-01-06 08:15:27 -08:00
import re
2013-08-29 10:16:07 -07:00
from . common import InfoExtractor
2016-10-14 09:43:09 -07:00
from . . compat import compat_str
2013-08-29 10:16:07 -07:00
from . . utils import (
2020-01-10 11:18:36 -08:00
clean_html ,
2017-09-14 11:37:46 -07:00
determine_ext ,
float_or_none ,
2014-01-06 08:15:27 -08:00
HEADRequest ,
2015-03-07 03:31:03 -08:00
int_or_none ,
2017-09-14 11:37:46 -07:00
orderedSet ,
2015-03-07 03:31:03 -08:00
remove_end ,
2020-01-10 11:18:36 -08:00
str_or_none ,
2017-09-14 11:37:46 -07:00
strip_jsonp ,
2016-10-14 09:43:09 -07:00
unescapeHTML ,
2017-09-14 11:37:46 -07:00
unified_strdate ,
2018-11-02 09:46:56 -07:00
url_or_none ,
2013-08-29 10:16:07 -07:00
)
2014-01-06 08:15:27 -08:00
2014-08-09 11:21:16 -07:00
class ORFTVthekIE ( InfoExtractor ) :
IE_NAME = ' orf:tvthek '
IE_DESC = ' ORF TVthek '
2016-10-14 09:43:09 -07:00
_VALID_URL = r ' https?://tvthek \ .orf \ .at/(?:[^/]+/)+(?P<id> \ d+) '
2014-01-06 08:15:27 -08:00
2014-12-16 07:45:28 -08:00
_TESTS = [ {
2014-12-13 03:41:31 -08:00
' url ' : ' http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389 ' ,
' playlist ' : [ {
' md5 ' : ' 2942210346ed779588f428a92db88712 ' ,
' info_dict ' : {
' id ' : ' 8896777 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Aufgetischt: Mit der Steirischen Tafelrunde ' ,
' description ' : ' md5:c1272f0245537812d4e36419c207b67d ' ,
' duration ' : 2668 ,
' upload_date ' : ' 20141208 ' ,
} ,
} ] ,
2014-12-16 07:45:28 -08:00
' skip ' : ' Blocked outside of Austria / Germany ' ,
} , {
' url ' : ' http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256 ' ,
2016-07-07 14:39:39 -07:00
' info_dict ' : {
' id ' : ' 7982259 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Best of Ingrid Thurnher ' ,
' upload_date ' : ' 20140527 ' ,
' description ' : ' Viele Jahre war Ingrid Thurnher das " Gesicht " der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem " Land und Leute " , " Österreich-Bild " und " Niederösterreich heute " . ' ,
} ,
' params ' : {
' skip_download ' : True , # rtsp downloads
} ,
2017-11-26 05:53:10 -08:00
' skip ' : ' Blocked outside of Austria / Germany ' ,
2016-10-14 09:43:09 -07:00
} , {
' url ' : ' http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141 ' ,
2017-11-26 05:53:10 -08:00
' only_matching ' : True ,
2016-10-14 09:43:09 -07:00
} , {
' url ' : ' http://tvthek.orf.at/profile/Universum/35429 ' ,
2017-11-26 05:53:10 -08:00
' only_matching ' : True ,
2014-12-16 07:45:28 -08:00
} ]
2013-08-29 10:16:07 -07:00
def _real_extract ( self , url ) :
2014-12-13 03:41:31 -08:00
playlist_id = self . _match_id ( url )
2013-08-29 10:16:07 -07:00
webpage = self . _download_webpage ( url , playlist_id )
2016-10-14 09:43:09 -07:00
data_jsb = self . _parse_json (
self . _search_regex (
r ' <div[^>]+class=([ " \' ]).*?VideoPlaylist.*? \ 1[^>]+data-jsb=([ " \' ])(?P<json>.+?) \ 2 ' ,
webpage , ' playlist ' , group = ' json ' ) ,
playlist_id , transform_source = unescapeHTML ) [ ' playlist ' ] [ ' videos ' ]
2014-01-06 08:15:27 -08:00
entries = [ ]
2016-10-14 09:43:09 -07:00
for sd in data_jsb :
video_id , title = sd . get ( ' id ' ) , sd . get ( ' title ' )
if not video_id or not title :
continue
video_id = compat_str ( video_id )
2018-10-30 15:44:50 -07:00
formats = [ ]
for fd in sd [ ' sources ' ] :
2018-11-02 09:46:56 -07:00
src = url_or_none ( fd . get ( ' src ' ) )
if not src :
continue
format_id_list = [ ]
for key in ( ' delivery ' , ' quality ' , ' quality_string ' ) :
value = fd . get ( key )
if value :
format_id_list . append ( value )
format_id = ' - ' . join ( format_id_list )
2019-10-02 10:55:46 -07:00
ext = determine_ext ( src )
if ext == ' m3u8 ' :
2018-10-30 15:44:50 -07:00
formats . extend ( self . _extract_m3u8_formats (
2019-10-02 10:55:46 -07:00
src , video_id , ' mp4 ' , m3u8_id = format_id , fatal = False ) )
elif ext == ' f4m ' :
2018-10-30 15:44:50 -07:00
formats . extend ( self . _extract_f4m_formats (
2019-10-02 10:55:46 -07:00
src , video_id , f4m_id = format_id , fatal = False ) )
2018-11-02 09:46:56 -07:00
else :
formats . append ( {
' format_id ' : format_id ,
' url ' : src ,
' protocol ' : fd . get ( ' protocol ' ) ,
} )
2014-01-06 08:15:27 -08:00
# Check for geoblocking.
# There is a property is_geoprotection, but that's always false
geo_str = sd . get ( ' geoprotection_string ' )
if geo_str :
try :
http_url = next (
f [ ' url ' ]
for f in formats
if re . match ( r ' ^https?://.* \ .mp4$ ' , f [ ' url ' ] ) )
except StopIteration :
pass
else :
req = HEADRequest ( http_url )
2014-01-06 20:51:46 -08:00
self . _request_webpage (
2014-01-06 08:15:27 -08:00
req , video_id ,
note = ' Testing for geoblocking ' ,
errnote = ( (
' This video seems to be blocked outside of %s . '
' You may want to try the streaming-* formats. ' )
% geo_str ) ,
fatal = False )
2016-02-16 08:23:38 -08:00
self . _check_formats ( formats , video_id )
2014-01-06 08:15:27 -08:00
self . _sort_formats ( formats )
2016-10-19 03:34:15 -07:00
subtitles = { }
for sub in sd . get ( ' subtitles ' , [ ] ) :
sub_src = sub . get ( ' src ' )
if not sub_src :
continue
subtitles . setdefault ( sub . get ( ' lang ' , ' de-AT ' ) , [ ] ) . append ( {
' url ' : sub_src ,
} )
2016-10-14 09:43:09 -07:00
upload_date = unified_strdate ( sd . get ( ' created_date ' ) )
2014-01-06 08:15:27 -08:00
entries . append ( {
2013-08-29 10:16:07 -07:00
' _type ' : ' video ' ,
2014-01-06 08:15:27 -08:00
' id ' : video_id ,
2016-10-14 09:43:09 -07:00
' title ' : title ,
2014-01-06 08:15:27 -08:00
' formats ' : formats ,
2016-10-19 03:34:15 -07:00
' subtitles ' : subtitles ,
2014-01-06 08:15:27 -08:00
' description ' : sd . get ( ' description ' ) ,
2016-10-14 09:43:09 -07:00
' duration ' : int_or_none ( sd . get ( ' duration_in_seconds ' ) ) ,
2014-01-06 08:15:27 -08:00
' upload_date ' : upload_date ,
' thumbnail ' : sd . get ( ' image_full_url ' ) ,
} )
return {
' _type ' : ' playlist ' ,
' entries ' : entries ,
' id ' : playlist_id ,
}
2014-08-09 11:21:16 -07:00
2017-01-08 06:49:54 -08:00
class ORFRadioIE ( InfoExtractor ) :
2014-08-09 11:21:16 -07:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2017-01-08 06:49:54 -08:00
station = mobj . group ( ' station ' )
2014-08-09 11:21:16 -07:00
show_date = mobj . group ( ' date ' )
show_id = mobj . group ( ' show ' )
data = self . _download_json (
2020-01-10 11:18:36 -08:00
' http://audioapi.orf.at/ %s /api/json/current/broadcast/ %s / %s '
% ( station , show_id , show_date ) , show_id )
entries = [ ]
for info in data [ ' streams ' ] :
loop_stream_id = str_or_none ( info . get ( ' loopStreamId ' ) )
if not loop_stream_id :
continue
title = str_or_none ( data . get ( ' title ' ) )
if not title :
continue
start = int_or_none ( info . get ( ' start ' ) , scale = 1000 )
end = int_or_none ( info . get ( ' end ' ) , scale = 1000 )
duration = end - start if end and start else None
entries . append ( {
' id ' : loop_stream_id . replace ( ' .mp3 ' , ' ' ) ,
' url ' : ' http://loopstream01.apa.at/?channel= %s &id= %s ' % ( station , loop_stream_id ) ,
2014-08-09 11:21:16 -07:00
' title ' : title ,
2020-01-10 11:18:36 -08:00
' description ' : clean_html ( data . get ( ' subtitle ' ) ) ,
' duration ' : duration ,
' timestamp ' : start ,
2019-03-16 19:57:02 -07:00
' ext ' : ' mp3 ' ,
2020-01-10 11:18:36 -08:00
' series ' : data . get ( ' programTitle ' ) ,
} )
2014-08-09 11:21:16 -07:00
return {
' _type ' : ' playlist ' ,
' id ' : show_id ,
2020-01-10 11:18:36 -08:00
' title ' : data . get ( ' title ' ) ,
' description ' : clean_html ( data . get ( ' subtitle ' ) ) ,
' entries ' : entries ,
2014-11-23 11:41:03 -08:00
}
2015-03-07 03:31:03 -08:00
2017-01-08 06:49:54 -08:00
class ORFFM4IE ( ORFRadioIE ) :
IE_NAME = ' orf:fm4 '
IE_DESC = ' radio FM4 '
2020-01-10 10:51:15 -08:00
_VALID_URL = r ' https?://(?P<station>fm4) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show>4 \ w+) '
2017-01-08 06:49:54 -08:00
2017-05-13 14:30:29 -07:00
_TEST = {
2020-01-10 10:51:15 -08:00
' url ' : ' http://fm4.orf.at/player/20170107/4CC ' ,
2017-05-13 14:30:29 -07:00
' md5 ' : ' 2b0be47375432a7ef104453432a19212 ' ,
' info_dict ' : {
' id ' : ' 2017-01-07_2100_tl_54_7DaysSat18_31295 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Solid Steel Radioshow ' ,
' description ' : ' Die Mixshow von Coldcut und Ninja Tune. ' ,
' duration ' : 3599 ,
' timestamp ' : 1483819257 ,
' upload_date ' : ' 20170107 ' ,
} ,
2020-01-10 11:18:36 -08:00
' skip ' : ' Shows from ORF radios are only available for 7 days. ' ,
' only_matching ' : True ,
2017-05-13 14:30:29 -07:00
}
2017-01-08 06:49:54 -08:00
class ORFOE1IE ( ORFRadioIE ) :
IE_NAME = ' orf:oe1 '
IE_DESC = ' Radio Österreich 1 '
2017-05-13 14:30:29 -07:00
_VALID_URL = r ' https?://(?P<station>oe1) \ .orf \ .at/player/(?P<date>[0-9]+)/(?P<show> \ w+) '
2017-01-08 06:49:54 -08:00
2017-05-13 14:30:29 -07:00
_TEST = {
' url ' : ' http://oe1.orf.at/player/20170108/456544 ' ,
' md5 ' : ' 34d8a6e67ea888293741c86a099b745b ' ,
' info_dict ' : {
' id ' : ' 2017-01-08_0759_tl_51_7DaysSun6_256141 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Morgenjournal ' ,
' duration ' : 609 ,
' timestamp ' : 1483858796 ,
' upload_date ' : ' 20170108 ' ,
} ,
' skip ' : ' Shows from ORF radios are only available for 7 days. '
}
2017-01-08 06:49:54 -08:00
2015-03-07 03:31:03 -08:00
class ORFIPTVIE ( InfoExtractor ) :
IE_NAME = ' orf:iptv '
IE_DESC = ' iptv.ORF.at '
2016-03-21 08:36:32 -07:00
_VALID_URL = r ' https?://iptv \ .orf \ .at/(?:#/)?stories/(?P<id> \ d+) '
2015-03-07 03:31:03 -08:00
_TEST = {
2015-04-25 08:06:27 -07:00
' url ' : ' http://iptv.orf.at/stories/2275236/ ' ,
' md5 ' : ' c8b22af4718a4b4af58342529453e3e5 ' ,
2015-03-07 03:31:03 -08:00
' info_dict ' : {
2015-04-25 08:06:27 -07:00
' id ' : ' 350612 ' ,
2015-03-07 03:31:03 -08:00
' ext ' : ' flv ' ,
2015-04-25 08:06:27 -07:00
' title ' : ' Weitere Evakuierungen um Vulkan Calbuco ' ,
' description ' : ' md5:d689c959bdbcf04efeddedbf2299d633 ' ,
' duration ' : 68.197 ,
2017-01-02 04:08:07 -08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2015-04-25 08:06:27 -07:00
' upload_date ' : ' 20150425 ' ,
2015-03-07 03:31:03 -08:00
} ,
}
def _real_extract ( self , url ) :
story_id = self . _match_id ( url )
webpage = self . _download_webpage (
' http://iptv.orf.at/stories/ %s ' % story_id , story_id )
video_id = self . _search_regex (
r ' data-video(?:id)?= " ( \ d+) " ' , webpage , ' video id ' )
data = self . _download_json (
' http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s ' % video_id ,
video_id ) [ 0 ]
duration = float_or_none ( data [ ' duration ' ] , 1000 )
video = data [ ' sources ' ] [ ' default ' ]
load_balancer_url = video [ ' loadBalancerUrl ' ]
abr = int_or_none ( video . get ( ' audioBitrate ' ) )
vbr = int_or_none ( video . get ( ' bitrate ' ) )
fps = int_or_none ( video . get ( ' videoFps ' ) )
width = int_or_none ( video . get ( ' videoWidth ' ) )
height = int_or_none ( video . get ( ' videoHeight ' ) )
thumbnail = video . get ( ' preview ' )
rendition = self . _download_json (
load_balancer_url , video_id , transform_source = strip_jsonp )
f = {
' abr ' : abr ,
' vbr ' : vbr ,
' fps ' : fps ,
' width ' : width ,
' height ' : height ,
}
formats = [ ]
for format_id , format_url in rendition [ ' redirect ' ] . items ( ) :
if format_id == ' rtmp ' :
ff = f . copy ( )
ff . update ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
formats . append ( ff )
elif determine_ext ( format_url ) == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id ) )
elif determine_ext ( format_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id ) )
else :
continue
self . _sort_formats ( formats )
title = remove_end ( self . _og_search_title ( webpage ) , ' - iptv.ORF.at ' )
description = self . _og_search_description ( webpage )
upload_date = unified_strdate ( self . _html_search_meta (
' dc.date ' , webpage , ' upload date ' ) )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' formats ' : formats ,
}
2017-09-14 11:37:46 -07:00
class ORFFM4StoryIE ( InfoExtractor ) :
IE_NAME = ' orf:fm4:story '
IE_DESC = ' fm4.orf.at stories '
_VALID_URL = r ' https?://fm4 \ .orf \ .at/stories/(?P<id> \ d+) '
_TEST = {
' url ' : ' http://fm4.orf.at/stories/2865738/ ' ,
' playlist ' : [ {
' md5 ' : ' e1c2c706c45c7b34cf478bbf409907ca ' ,
' info_dict ' : {
' id ' : ' 547792 ' ,
' ext ' : ' flv ' ,
' title ' : ' Manu Delago und Inner Tongue live ' ,
' description ' : ' Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video. ' ,
' duration ' : 1748.52 ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20170913 ' ,
} ,
} , {
' md5 ' : ' c6dd2179731f86f4f55a7b49899d515f ' ,
' info_dict ' : {
' id ' : ' 547798 ' ,
' ext ' : ' flv ' ,
' title ' : ' Manu Delago und Inner Tongue live (2) ' ,
' duration ' : 1504.08 ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' upload_date ' : ' 20170913 ' ,
' description ' : ' Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video. ' ,
} ,
} ] ,
}
def _real_extract ( self , url ) :
story_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , story_id )
entries = [ ]
all_ids = orderedSet ( re . findall ( r ' data-video(?:id)?= " ( \ d+) " ' , webpage ) )
for idx , video_id in enumerate ( all_ids ) :
data = self . _download_json (
' http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s ' % video_id ,
video_id ) [ 0 ]
duration = float_or_none ( data [ ' duration ' ] , 1000 )
video = data [ ' sources ' ] [ ' q8c ' ]
load_balancer_url = video [ ' loadBalancerUrl ' ]
abr = int_or_none ( video . get ( ' audioBitrate ' ) )
vbr = int_or_none ( video . get ( ' bitrate ' ) )
fps = int_or_none ( video . get ( ' videoFps ' ) )
width = int_or_none ( video . get ( ' videoWidth ' ) )
height = int_or_none ( video . get ( ' videoHeight ' ) )
thumbnail = video . get ( ' preview ' )
rendition = self . _download_json (
load_balancer_url , video_id , transform_source = strip_jsonp )
f = {
' abr ' : abr ,
' vbr ' : vbr ,
' fps ' : fps ,
' width ' : width ,
' height ' : height ,
}
formats = [ ]
for format_id , format_url in rendition [ ' redirect ' ] . items ( ) :
if format_id == ' rtmp ' :
ff = f . copy ( )
ff . update ( {
' url ' : format_url ,
' format_id ' : format_id ,
} )
formats . append ( ff )
elif determine_ext ( format_url ) == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
format_url , video_id , f4m_id = format_id ) )
elif determine_ext ( format_url ) == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , m3u8_id = format_id ) )
else :
continue
self . _sort_formats ( formats )
title = remove_end ( self . _og_search_title ( webpage ) , ' - fm4.ORF.at ' )
if idx > = 1 :
# Titles are duplicates, make them unique
title + = ' ( ' + str ( idx + 1 ) + ' ) '
description = self . _og_search_description ( webpage )
upload_date = unified_strdate ( self . _html_search_meta (
' dc.date ' , webpage , ' upload date ' ) )
entries . append ( {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' formats ' : formats ,
} )
return self . playlist_result ( entries )