2014-01-29 06:34:35 -08:00
from __future__ import unicode_literals
2013-06-23 12:14:19 -07:00
import base64
from . common import InfoExtractor
2015-12-13 01:29:27 -08:00
from . . compat import compat_urllib_parse_unquote
2013-06-23 12:14:19 -07:00
class InfoQIE ( InfoExtractor ) :
2015-06-25 06:54:44 -07:00
_VALID_URL = r ' https?://(?:www \ .)?infoq \ .com/(?:[^/]+/)+(?P<id>[^/]+) '
2014-04-19 16:01:37 -07:00
2015-06-25 06:54:44 -07:00
_TESTS = [ {
2014-04-20 17:55:35 -07:00
' url ' : ' http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things ' ,
2014-04-20 18:21:34 -07:00
' md5 ' : ' b5ca0e0a8c1fed93b0e65e48e462f9a2 ' ,
2014-04-20 17:55:35 -07:00
' info_dict ' : {
' id ' : ' 12-jan-pythonthings ' ,
' ext ' : ' mp4 ' ,
' description ' : ' Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience. ' ,
' title ' : ' A Few of My Favorite [Python] Things ' ,
2013-06-27 11:27:08 -07:00
} ,
2015-06-25 06:54:44 -07:00
} , {
' url ' : ' http://www.infoq.com/fr/presentations/changez-avis-sur-javascript ' ,
' only_matching ' : True ,
} ]
2013-06-23 12:14:19 -07:00
def _real_extract ( self , url ) :
2014-12-13 03:24:42 -08:00
video_id = self . _match_id ( url )
2014-02-02 03:03:36 -08:00
webpage = self . _download_webpage ( url , video_id )
2013-06-23 12:14:19 -07:00
2014-04-19 16:01:37 -07:00
video_title = self . _html_search_regex ( r ' <title>(.*?)</title> ' , webpage , ' title ' )
video_description = self . _html_search_meta ( ' description ' , webpage , ' description ' )
2013-06-23 12:14:19 -07:00
2014-04-19 16:10:30 -07:00
# The server URL is hardcoded
2014-04-19 16:01:37 -07:00
video_url = ' rtmpe://video.infoq.com/cfx/st/ '
2014-04-19 16:10:30 -07:00
# Extract video URL
2014-04-20 18:21:34 -07:00
encoded_id = self . _search_regex (
r " jsclassref \ s*= \ s* ' ([^ ' ]*) ' " , webpage , ' encoded id ' )
2015-07-17 10:39:54 -07:00
real_id = compat_urllib_parse_unquote ( base64 . b64decode ( encoded_id . encode ( ' ascii ' ) ) . decode ( ' utf-8 ' ) )
2014-04-19 16:10:30 -07:00
playpath = ' mp4: ' + real_id
2013-06-23 12:14:19 -07:00
2014-04-19 16:01:37 -07:00
video_filename = playpath . split ( ' / ' ) [ - 1 ]
2013-06-23 12:14:19 -07:00
video_id , extension = video_filename . split ( ' . ' )
2015-12-13 01:29:27 -08:00
http_video_url = self . _search_regex ( r ' P \ .s \ s*= \ s* \' ([^ \' ]+) \' ' , webpage , ' video URL ' )
policy = self . _search_regex ( r ' InfoQConstants.scp \ s*= \ s* \' ([^ \' ]+) \' ' , webpage , ' policy ' )
signature = self . _search_regex ( r ' InfoQConstants.scs \ s*= \ s* \' ([^ \' ]+) \' ' , webpage , ' signature ' )
key_pair_id = self . _search_regex ( r ' InfoQConstants.sck \ s*= \ s* \' ([^ \' ]+) \' ' , webpage , ' key-pair-id ' )
2014-04-20 18:21:34 -07:00
formats = [ {
' format_id ' : ' rtmp ' ,
' url ' : video_url ,
' ext ' : extension ,
' play_path ' : playpath ,
} , {
' format_id ' : ' http ' ,
2015-12-13 01:29:27 -08:00
' url ' : http_video_url ,
' http_headers ' : {
' Cookie ' : ' CloudFront-Policy= %s ; CloudFront-Signature= %s ; CloudFront-Key-Pair-Id= %s ' % (
policy , signature , key_pair_id ) ,
} ,
2014-04-20 18:21:34 -07:00
} ]
self . _sort_formats ( formats )
2014-04-20 17:55:35 -07:00
return {
2013-06-23 12:14:19 -07:00
' id ' : video_id ,
' title ' : video_title ,
' description ' : video_description ,
2014-04-20 18:21:34 -07:00
' formats ' : formats ,
2014-04-20 17:55:35 -07:00
}