2013-07-16 11:50:16 -07:00
# coding: utf-8
2014-01-16 18:32:02 -08:00
from __future__ import unicode_literals
2013-07-16 11:50:16 -07:00
import re
from . common import InfoExtractor
2014-12-13 03:24:42 -08:00
from . . compat import (
2013-07-16 11:50:16 -07:00
compat_urllib_parse_urlparse ,
compat_urlparse ,
)
2014-12-13 03:24:42 -08:00
from . . utils import (
2016-08-04 10:28:49 -07:00
determine_ext ,
2017-03-22 09:22:14 -07:00
extract_attributes ,
2016-08-04 10:28:49 -07:00
int_or_none ,
2017-03-22 09:22:14 -07:00
js_to_json ,
mimetype2ext ,
orderedSet ,
2016-08-04 10:28:49 -07:00
parse_iso8601 ,
2014-12-13 03:24:42 -08:00
)
2013-07-16 11:50:16 -07:00
class CondeNastIE ( InfoExtractor ) :
"""
Condé Nast is a media group , some of its sites use a custom HTML5 player
that works the same in all of them .
"""
# The keys are the supported sites and the values are the name to be shown
# to the user and in the extractor description.
2014-01-16 18:32:02 -08:00
_SITES = {
2015-09-23 09:48:39 -07:00
' allure ' : ' Allure ' ,
' architecturaldigest ' : ' Architectural Digest ' ,
' arstechnica ' : ' Ars Technica ' ,
2015-09-24 16:15:21 -07:00
' bonappetit ' : ' Bon Appétit ' ,
2015-09-23 09:48:39 -07:00
' brides ' : ' Brides ' ,
' cnevids ' : ' Condé Nast ' ,
' cntraveler ' : ' Condé Nast Traveler ' ,
' details ' : ' Details ' ,
' epicurious ' : ' Epicurious ' ,
' glamour ' : ' Glamour ' ,
' golfdigest ' : ' Golf Digest ' ,
2014-01-16 18:32:02 -08:00
' gq ' : ' GQ ' ,
2015-09-23 09:48:39 -07:00
' newyorker ' : ' The New Yorker ' ,
' self ' : ' SELF ' ,
' teenvogue ' : ' Teen Vogue ' ,
' vanityfair ' : ' Vanity Fair ' ,
2014-01-16 18:32:02 -08:00
' vogue ' : ' Vogue ' ,
2015-09-23 09:48:39 -07:00
' wired ' : ' WIRED ' ,
2014-01-16 18:32:02 -08:00
' wmagazine ' : ' W Magazine ' ,
}
2013-07-16 11:50:16 -07:00
2017-05-12 08:17:32 -07:00
_VALID_URL = r ''' (?x)https?://(?:video|www|player(?:-backend)?) \ .(?: %s ) \ .com/
( ? :
( ? :
embed ( ? : js ) ? |
( ? : script | inline ) / video
) / ( ? P < id > [ 0 - 9 a - f ] { 24 } ) ( ? : / ( ? P < player_id > [ 0 - 9 a - f ] { 24 } ) ) ? ( ? : . + ? \btarget = ( ? P < target > [ ^ & ] + ) ) ? |
( ? P < type > watch | series | video ) / ( ? P < display_id > [ ^ / ? #]+)
) ''' % ' | ' .join(_SITES.keys())
2014-01-16 18:32:02 -08:00
IE_DESC = ' Condé Nast media group: %s ' % ' , ' . join ( sorted ( _SITES . values ( ) ) )
2013-07-16 11:50:16 -07:00
2017-05-12 08:17:32 -07:00
EMBED_URL = r ' (?:https?:)?//player(?:-backend)? \ .(?: %s ) \ .com/(?:embed(?:js)?|(?:script|inline)/video)/.+? ' % ' | ' . join ( _SITES . keys ( ) )
2014-10-13 05:59:35 -07:00
2015-09-26 16:53:21 -07:00
_TESTS = [ {
2014-01-16 18:32:02 -08:00
' url ' : ' http://video.wired.com/watch/3d-printed-speakers-lit-with-led ' ,
' md5 ' : ' 1921f713ed48aabd715691f774c451f7 ' ,
' info_dict ' : {
2014-04-20 20:47:52 -07:00
' id ' : ' 5171b343c2b4c00dd0c1ccb3 ' ,
' ext ' : ' mp4 ' ,
2014-01-16 18:32:02 -08:00
' title ' : ' 3D Printed Speakers Lit With LED ' ,
' description ' : ' Check out these beautiful 3D printed LED speakers. You can \' t actually buy them, but LumiGeek is working on a board that will let you make you \' re own. ' ,
2016-08-04 10:28:49 -07:00
' uploader ' : ' wired ' ,
' upload_date ' : ' 20130314 ' ,
' timestamp ' : 1363219200 ,
2013-07-16 11:50:16 -07:00
}
2017-03-22 09:22:14 -07:00
} , {
' url ' : ' http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series ' ,
' info_dict ' : {
' id ' : ' 58d1865bfd2e6126e2000015 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The Only True Surprise? Trump’ s an Idiot ' ,
' uploader ' : ' gq ' ,
' upload_date ' : ' 20170321 ' ,
' timestamp ' : 1490126427 ,
} ,
2015-09-26 16:53:21 -07:00
} , {
# JS embed
' url ' : ' http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js ' ,
' md5 ' : ' f1a6f9cafb7083bab74a710f65d08999 ' ,
' info_dict ' : {
' id ' : ' 55f9cf8b61646d1acf00000c ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 3D printed TSA Travel Sentry keys really do open TSA locks ' ,
2016-08-04 10:28:49 -07:00
' uploader ' : ' arstechnica ' ,
' upload_date ' : ' 20150916 ' ,
' timestamp ' : 1442434955 ,
2015-09-26 16:53:21 -07:00
}
2017-05-12 08:17:32 -07:00
} , {
' url ' : ' https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://player-backend.cnevids.com/script/video/59138decb57ac36b83000005.js ' ,
' only_matching ' : True ,
2015-09-26 16:53:21 -07:00
} ]
2013-07-16 11:50:16 -07:00
def _extract_series ( self , url , webpage ) :
2016-08-04 10:28:49 -07:00
title = self . _html_search_regex (
r ' (?s)<div class= " cne-series-info " >.*?<h1>(.+?)</h1> ' ,
webpage , ' series title ' )
2013-07-16 11:50:16 -07:00
url_object = compat_urllib_parse_urlparse ( url )
base_url = ' %s :// %s ' % ( url_object . scheme , url_object . netloc )
2016-08-04 10:28:49 -07:00
m_paths = re . finditer (
r ' (?s)<p class= " cne-thumb-title " >.*?<a href= " (/watch/.+?)[ " \ ?] ' , webpage )
2013-07-16 11:50:16 -07:00
paths = orderedSet ( m . group ( 1 ) for m in m_paths )
build_url = lambda path : compat_urlparse . urljoin ( base_url , path )
entries = [ self . url_result ( build_url ( path ) , ' CondeNast ' ) for path in paths ]
return self . playlist_result ( entries , playlist_title = title )
2017-09-14 11:59:47 -07:00
def _extract_video_params ( self , webpage , display_id ) :
query = self . _parse_json (
self . _search_regex (
r ' (?s)var \ s+params \ s*= \ s*( { .+?})[;,] ' , webpage , ' player params ' ,
default = ' {} ' ) ,
display_id , transform_source = js_to_json , fatal = False )
if query :
query [ ' videoId ' ] = self . _search_regex (
r ' (?:data-video-id=|currentVideoId \ s*= \ s*)[ " \' ]([ \ da-f]+) ' ,
webpage , ' video id ' , default = None )
2014-04-20 20:47:52 -07:00
else :
2016-08-04 10:28:49 -07:00
params = extract_attributes ( self . _search_regex (
r ' (<[^>]+data-js= " video-player " [^>]+>) ' ,
webpage , ' player params element ' ) )
query . update ( {
' videoId ' : params [ ' data-video ' ] ,
' playerId ' : params [ ' data-player ' ] ,
' target ' : params [ ' id ' ] ,
} )
2017-05-12 08:17:32 -07:00
return query
def _extract_video ( self , params ) :
video_id = params [ ' videoId ' ]
2016-08-05 13:01:16 -07:00
video_info = None
2017-09-14 11:59:47 -07:00
# New API path
query = params . copy ( )
query [ ' embedType ' ] = ' inline '
info_page = self . _download_json (
' http://player.cnevids.com/embed-api.json ' , video_id ,
' Downloading embed info ' , fatal = False , query = query )
# Old fallbacks
if not info_page :
if params . get ( ' playerId ' ) :
info_page = self . _download_json (
' http://player.cnevids.com/player/video.js ' , video_id ,
' Downloading video info ' , fatal = False , query = params )
if info_page :
video_info = info_page . get ( ' video ' )
if not video_info :
info_page = self . _download_webpage (
' http://player.cnevids.com/player/loader.js ' ,
video_id , ' Downloading loader info ' , query = params )
if not video_info :
2016-08-05 13:01:16 -07:00
info_page = self . _download_webpage (
2017-05-12 08:17:32 -07:00
' https://player.cnevids.com/inline/video/ %s .js ' % video_id ,
video_id , ' Downloading inline info ' , query = {
' target ' : params . get ( ' target ' , ' embedplayer ' )
} )
if not video_info :
2017-03-22 09:22:14 -07:00
video_info = self . _parse_json (
self . _search_regex (
r ' (?s)var \ s+config \ s*= \ s*( { .+?}); ' , info_page , ' config ' ) ,
video_id , transform_source = js_to_json ) [ ' video ' ]
2016-08-04 10:28:49 -07:00
title = video_info [ ' title ' ]
formats = [ ]
2017-03-22 09:22:14 -07:00
for fdata in video_info [ ' sources ' ] :
2016-08-04 10:28:49 -07:00
src = fdata . get ( ' src ' )
if not src :
continue
ext = mimetype2ext ( fdata . get ( ' type ' ) ) or determine_ext ( src )
2017-03-22 09:22:14 -07:00
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
src , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
continue
2016-08-04 10:28:49 -07:00
quality = fdata . get ( ' quality ' )
formats . append ( {
' format_id ' : ext + ( ' - %s ' % quality if quality else ' ' ) ,
' url ' : src ,
' ext ' : ext ,
' quality ' : 1 if quality == ' high ' else 0 ,
} )
2014-01-16 18:36:03 -08:00
self . _sort_formats ( formats )
2013-07-16 11:50:16 -07:00
2017-05-12 08:17:32 -07:00
return {
2014-01-16 18:36:03 -08:00
' id ' : video_id ,
' formats ' : formats ,
2016-08-04 10:28:49 -07:00
' title ' : title ,
' thumbnail ' : video_info . get ( ' poster_frame ' ) ,
' uploader ' : video_info . get ( ' brand ' ) ,
' duration ' : int_or_none ( video_info . get ( ' duration ' ) ) ,
' tags ' : video_info . get ( ' tags ' ) ,
' series ' : video_info . get ( ' series_title ' ) ,
' season ' : video_info . get ( ' season_title ' ) ,
' timestamp ' : parse_iso8601 ( video_info . get ( ' premiere_date ' ) ) ,
2017-05-12 08:17:32 -07:00
' categories ' : video_info . get ( ' categories ' ) ,
}
2013-07-16 11:50:16 -07:00
def _real_extract ( self , url ) :
2017-05-12 08:17:32 -07:00
video_id , player_id , target , url_type , display_id = re . match ( self . _VALID_URL , url ) . groups ( )
2013-07-16 11:50:16 -07:00
2017-05-12 08:17:32 -07:00
if video_id :
return self . _extract_video ( {
' videoId ' : video_id ,
' playerId ' : player_id ,
' target ' : target ,
} )
2015-09-26 16:53:21 -07:00
2017-05-12 08:17:32 -07:00
webpage = self . _download_webpage ( url , display_id )
2013-07-16 11:50:16 -07:00
if url_type == ' series ' :
return self . _extract_series ( url , webpage )
else :
2017-09-14 11:59:47 -07:00
params = self . _extract_video_params ( webpage , display_id )
2017-05-12 08:17:32 -07:00
info = self . _search_json_ld (
webpage , display_id , fatal = False )
info . update ( self . _extract_video ( params ) )
return info