2016-01-01 12:03:16 -08:00
# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . compat import compat_str
from . . utils import (
int_or_none ,
parse_iso8601 ,
unescapeHTML ,
2016-01-01 12:53:19 -08:00
qualities ,
2016-01-01 12:03:16 -08:00
)
class Revision3IE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?(?P<domain>(?:revision3|testtube|animalist) \ .com)/(?P<id>[^/]+(?:/[^/?#]+)?) '
_TESTS = [ {
' url ' : ' http://www.revision3.com/technobuffalo/5-google-predictions-for-2016 ' ,
' md5 ' : ' d94a72d85d0a829766de4deb8daaf7df ' ,
' info_dict ' : {
' id ' : ' 73034 ' ,
2016-01-01 12:53:19 -08:00
' display_id ' : ' technobuffalo/5-google-predictions-for-2016 ' ,
2016-01-01 12:03:16 -08:00
' ext ' : ' webm ' ,
' title ' : ' 5 Google Predictions for 2016 ' ,
' description ' : ' Google had a great 2015, but it \' s already time to look ahead. Here are our five predictions for 2016. ' ,
' upload_date ' : ' 20151228 ' ,
' timestamp ' : 1451325600 ,
' duration ' : 187 ,
2016-01-01 12:53:19 -08:00
' uploader ' : ' TechnoBuffalo ' ,
' uploader_id ' : ' technobuffalo ' ,
2016-01-01 12:03:16 -08:00
}
} , {
2016-03-05 04:43:29 -08:00
# Show
2016-01-01 12:03:16 -08:00
' url ' : ' http://testtube.com/brainstuff ' ,
' info_dict ' : {
' id ' : ' 251 ' ,
' title ' : ' BrainStuff ' ,
' description ' : ' Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff. ' ,
} ,
' playlist_mincount ' : 93 ,
2016-01-01 12:53:19 -08:00
} , {
' url ' : ' https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial ' ,
' info_dict ' : {
' id ' : ' 60163 ' ,
' display_id ' : ' dnews/5-weird-ways-plants-can-eat-animals ' ,
' duration ' : 275 ,
' ext ' : ' webm ' ,
' title ' : ' 5 Weird Ways Plants Can Eat Animals ' ,
' description ' : ' Why have some plants evolved to eat meat? ' ,
' upload_date ' : ' 20150120 ' ,
' timestamp ' : 1421763300 ,
' uploader ' : ' DNews ' ,
' uploader_id ' : ' dnews ' ,
} ,
2016-03-05 03:14:48 -08:00
} , {
' url ' : ' http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min ' ,
' info_dict ' : {
' id ' : ' 73573 ' ,
' ext ' : ' mp4 ' ,
' display_id ' : ' tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min ' ,
' title ' : ' The Israel-Palestine Conflict Explained in Ten Minutes ' ,
' description ' : ' If you \' d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start ' ,
' uploader ' : ' Editors \' Picks ' ,
' uploader_id ' : ' tt-editors-picks ' ,
' timestamp ' : 1453309200 ,
' upload_date ' : ' 20160120 ' ,
} ,
' add_ie ' : [ ' Youtube ' ] ,
2016-03-05 04:43:29 -08:00
} , {
# Tag
' url ' : ' http://testtube.com/tech-news ' ,
' info_dict ' : {
' id ' : ' 21018 ' ,
' title ' : ' tech news ' ,
} ,
' playlist_mincount ' : 9 ,
2016-01-01 12:03:16 -08:00
} ]
_PAGE_DATA_TEMPLATE = ' http://www. %s /apiProxy/ddn/ %s ?domain= %s '
_API_KEY = ' ba9c741bce1b9d8e3defcc22193f3651b8867e62 '
def _real_extract ( self , url ) :
domain , display_id = re . match ( self . _VALID_URL , url ) . groups ( )
page_info = self . _download_json (
self . _PAGE_DATA_TEMPLATE % ( domain , display_id , domain ) , display_id )
2016-03-05 03:14:48 -08:00
page_data = page_info [ ' data ' ]
page_type = page_data [ ' type ' ]
2016-03-05 04:43:29 -08:00
if page_type in ( ' episode ' , ' embed ' ) :
2016-03-05 03:14:48 -08:00
show_data = page_data [ ' show ' ] [ ' data ' ]
video_id = compat_str ( page_data [ ' video ' ] [ ' data ' ] [ ' id ' ] )
preference = qualities ( [ ' mini ' , ' small ' , ' medium ' , ' large ' ] )
thumbnails = [ {
' url ' : image_url ,
' id ' : image_id ,
' preference ' : preference ( image_id )
} for image_id , image_url in page_data . get ( ' images ' , { } ) . items ( ) ]
info = {
' id ' : video_id ,
' display_id ' : display_id ,
' title ' : unescapeHTML ( page_data [ ' name ' ] ) ,
' description ' : unescapeHTML ( page_data . get ( ' summary ' ) ) ,
' timestamp ' : parse_iso8601 ( page_data . get ( ' publishTime ' ) , ' ' ) ,
' author ' : page_data . get ( ' author ' ) ,
' uploader ' : show_data . get ( ' name ' ) ,
' uploader_id ' : show_data . get ( ' slug ' ) ,
' thumbnails ' : thumbnails ,
}
if page_type == ' embed ' :
info . update ( {
' _type ' : ' url_transparent ' ,
' url ' : page_data [ ' video ' ] [ ' data ' ] [ ' embed ' ] ,
} )
return info
2016-01-01 12:03:16 -08:00
video_data = self . _download_json (
' http://revision3.com/api/getPlaylist.json?api_key= %s &codecs=h264,vp8,theora&video_id= %s ' % ( self . _API_KEY , video_id ) ,
video_id ) [ ' items ' ] [ 0 ]
formats = [ ]
2016-01-01 12:53:19 -08:00
for vcodec , media in video_data [ ' media ' ] . items ( ) :
2016-01-01 12:03:16 -08:00
for quality_id , quality in media . items ( ) :
if quality_id == ' hls ' :
formats . extend ( self . _extract_m3u8_formats (
quality [ ' url ' ] , video_id , ' mp4 ' ,
' m3u8_native ' , m3u8_id = ' hls ' , fatal = False ) )
else :
formats . append ( {
' url ' : quality [ ' url ' ] ,
2016-01-01 12:53:19 -08:00
' format_id ' : ' %s - %s ' % ( vcodec , quality_id ) ,
2016-01-01 12:03:16 -08:00
' tbr ' : int_or_none ( quality . get ( ' bitrate ' ) ) ,
2016-01-01 12:53:19 -08:00
' vcodec ' : vcodec ,
2016-01-01 12:03:16 -08:00
} )
self . _sort_formats ( formats )
2016-03-05 03:14:48 -08:00
info . update ( {
2016-01-01 12:03:16 -08:00
' title ' : unescapeHTML ( video_data [ ' title ' ] ) ,
' description ' : unescapeHTML ( video_data . get ( ' summary ' ) ) ,
2016-01-01 12:53:19 -08:00
' uploader ' : video_data . get ( ' show ' , { } ) . get ( ' name ' ) ,
' uploader_id ' : video_data . get ( ' show ' , { } ) . get ( ' slug ' ) ,
2016-01-01 12:03:16 -08:00
' duration ' : int_or_none ( video_data . get ( ' duration ' ) ) ,
' formats ' : formats ,
2016-03-05 03:14:48 -08:00
} )
return info
2016-01-01 12:03:16 -08:00
else :
2016-03-05 04:43:29 -08:00
list_data = page_info [ page_type ] [ ' data ' ]
2016-01-01 12:03:16 -08:00
episodes_data = page_info [ ' episodes ' ] [ ' data ' ]
num_episodes = page_info [ ' meta ' ] [ ' totalEpisodes ' ]
processed_episodes = 0
entries = [ ]
page_num = 1
while True :
entries . extend ( [ self . url_result (
2016-03-05 04:43:29 -08:00
' http:// %s %s ' % ( domain , episode [ ' path ' ] ) ) for episode in episodes_data ] )
2016-01-01 12:03:16 -08:00
processed_episodes + = len ( episodes_data )
if processed_episodes == num_episodes :
break
page_num + = 1
episodes_data = self . _download_json ( self . _PAGE_DATA_TEMPLATE % (
domain , display_id + ' / ' + compat_str ( page_num ) , domain ) ,
display_id ) [ ' episodes ' ] [ ' data ' ]
return self . playlist_result (
2016-03-05 04:43:29 -08:00
entries , compat_str ( list_data [ ' id ' ] ) ,
list_data . get ( ' name ' ) , list_data . get ( ' summary ' ) )