2013-08-22 03:37:34 -07:00
# coding: utf-8
2014-02-08 09:43:12 -08:00
from __future__ import unicode_literals
2013-08-18 07:11:47 -07:00
import re
from . common import InfoExtractor
2013-10-07 05:33:23 -07:00
2013-08-18 07:11:47 -07:00
class JeuxVideoIE ( InfoExtractor ) :
2016-03-21 08:36:32 -07:00
_VALID_URL = r ' https?://.*? \ .jeuxvideo \ .com/.*/(.*?) \ .htm '
2013-08-18 07:11:47 -07:00
2015-07-14 10:07:40 -07:00
_TESTS = [ {
2014-02-08 09:43:12 -08:00
' url ' : ' http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm ' ,
' md5 ' : ' 046e491afb32a8aaac1f44dd4ddd54ee ' ,
' info_dict ' : {
2015-03-12 14:33:59 -07:00
' id ' : ' 114765 ' ,
2014-02-08 09:43:12 -08:00
' ext ' : ' mp4 ' ,
2015-03-12 14:33:59 -07:00
' title ' : ' Tearaway : GC 2013 : Tearaway nous présente ses papiers d \' identité ' ,
' description ' : ' Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s \' attendre à un résultat original et fort attrayant. ' ,
2013-08-22 03:37:34 -07:00
} ,
2015-07-14 10:07:40 -07:00
} , {
' url ' : ' http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm ' ,
' only_matching ' : True ,
} ]
2013-08-22 03:37:34 -07:00
2013-08-18 07:11:47 -07:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2013-11-19 21:27:48 -08:00
title = mobj . group ( 1 )
2013-08-18 07:11:47 -07:00
webpage = self . _download_webpage ( url , title )
2015-10-15 19:44:19 -07:00
title = self . _html_search_meta ( ' name ' , webpage ) or self . _og_search_title ( webpage )
2015-03-12 14:33:59 -07:00
config_url = self . _html_search_regex (
2017-10-09 09:50:53 -07:00
r ' data-src(?:set-video)?= " (/contenu/medias/video \ .php.*?) " ' ,
2014-02-08 09:43:12 -08:00
webpage , ' config URL ' )
2015-03-12 14:33:59 -07:00
config_url = ' http://www.jeuxvideo.com ' + config_url
2014-11-23 11:41:03 -08:00
2013-10-07 05:33:23 -07:00
video_id = self . _search_regex (
2015-03-12 14:33:59 -07:00
r ' id=( \ d+) ' ,
config_url , ' video ID ' )
2013-08-18 07:11:47 -07:00
2015-03-12 14:33:59 -07:00
config = self . _download_json (
config_url , title , ' Downloading JSON config ' )
2014-11-23 11:41:03 -08:00
2015-03-12 14:33:59 -07:00
formats = [ {
' url ' : source [ ' file ' ] ,
' format_id ' : source [ ' label ' ] ,
' resolution ' : source [ ' label ' ] ,
} for source in reversed ( config [ ' sources ' ] ) ]
2013-08-18 07:11:47 -07:00
2013-10-07 05:33:23 -07:00
return {
' id ' : video_id ,
2015-03-12 14:33:59 -07:00
' title ' : title ,
' formats ' : formats ,
2013-10-07 05:33:23 -07:00
' description ' : self . _og_search_description ( webpage ) ,
2015-03-12 14:33:59 -07:00
' thumbnail ' : config . get ( ' image ' ) ,
2013-10-07 05:33:23 -07:00
}