Compare commits

...

12 Commits

Author SHA1 Message Date
Philipp Hagemeister
61aabb9d70 release 2014.08.02 2014-08-02 12:25:40 +02:00
Philipp Hagemeister
62af3a0eb5 [youtube] Use new signature cache ID for in-memory cache as well 2014-08-02 12:23:18 +02:00
Philipp Hagemeister
60064c53f1 [youtube] Make cache ID a tuple of lengths instead of just the whole length 2014-08-02 12:21:53 +02:00
Philipp Hagemeister
98eb1c3fa2 [youtube] Clean up -v signature output 2014-08-02 11:55:20 +02:00
Philipp Hagemeister
201e9eaa0e [youtube] Show format ID in signature deobfuscation -v output 2014-08-02 06:35:18 +02:00
Sergey M․
9afa6ede21 Merge branch 'naglis-izlesene' 2014-08-01 19:08:27 +07:00
Sergey M․
f4776371ae [izlesene] Minor changes 2014-08-01 19:08:09 +07:00
Sergey M․
328a20bf9c Merge branch 'izlesene' of https://github.com/naglis/youtube-dl into naglis-izlesene 2014-08-01 18:16:47 +07:00
Sergey M․
5622f29ae4 [ard] Quote path part instead of whole URL encode 2014-07-31 21:23:15 +07:00
Sergey M․
b4f23afbd1 [ard] Encode url (Closes #3412) 2014-07-31 20:35:29 +07:00
Sergey M․
0138968a6a [vidme] Add extractor (Closes #3404) 2014-07-31 20:26:52 +07:00
Naglis Jonaitis
366b1f3cfe [izlesene] Add new extractor. Closes #3184 2014-07-26 14:35:23 +03:00
7 changed files with 236 additions and 51 deletions

View File

@@ -143,6 +143,7 @@ from .ivi import (
IviIE, IviIE,
IviCompilationIE IviCompilationIE
) )
from .izlesene import IzleseneIE
from .jadorecettepub import JadoreCettePubIE from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
@@ -347,6 +348,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videott import VideoTtIE from .videott import VideoTtIE
from .videoweed import VideoWeedIE from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vimeo import ( from .vimeo import (
VimeoIE, VimeoIE,
VimeoChannelIE, VimeoChannelIE,

View File

@@ -8,6 +8,8 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
qualities, qualities,
compat_urllib_parse_urlparse,
compat_urllib_parse,
) )
@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
else: else:
video_id = m.group('video_id') video_id = m.group('video_id')
urlp = compat_urllib_parse_urlparse(url)
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( title = self._html_search_regex(

View File

@@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
get_element_by_id,
parse_iso8601,
determine_ext,
int_or_none,
str_to_int,
)
class IzleseneIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
_TEST = {
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
'md5': '4384f9f0ea65086734b881085ee05ac2',
'info_dict': {
'id': '7599694',
'ext': 'mp4',
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'pelikzzle',
'timestamp': 1404298698,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://www.izlesene.com/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date', fatal=False))
duration = int_or_none(self._html_search_regex(
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
if duration:
duration /= 1000.0
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
comment_count = self._html_search_regex(
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
family_friendly = self._html_search_meta(
'isFamilyFriendly', webpage, 'age limit', fatal=False)
content_url = self._html_search_meta(
'contentURL', webpage, 'content URL', fatal=False)
ext = determine_ext(content_url, 'mp4')
# Might be empty for some videos.
qualities = self._html_search_regex(
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
formats = []
for quality in qualities.split('|'):
json = self._download_json(
self._STREAM_URL.format(id=video_id, format=quality), video_id,
note='Getting video URL for "%s" quality' % quality,
errnote='Failed to get video URL for "%s" quality' % quality
)
formats.append({
'url': json.get('streamurl'),
'ext': ext,
'format_id': '%sp' % quality if quality else 'sd',
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader_id': uploader,
'timestamp': timestamp,
'duration': duration,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
'age_limit': 18 if family_friendly == 'False' else 0,
'formats': formats,
}

View File

@@ -0,0 +1,68 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
str_to_int,
)
class VidmeIE(InfoExtractor):
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
_TEST = {
'url': 'https://vid.me/QNB',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
'info_dict': {
'id': 'QNB',
'ext': 'mp4',
'title': 'Fishing for piranha - the easy way',
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
'duration': 119.92,
'timestamp': 1406313244,
'upload_date': '20140725',
'thumbnail': 're:^https?://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default='')
thumbnail = self._og_search_thumbnail(webpage)
timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
webpage, 'comment count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'width': width,
'height': height,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
}

View File

@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol.""" """Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url, slen): def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match( id_m = re.match(
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$', r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
player_url) player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_id = id_m.group('id') player_id = id_m.group('id')
# Read from filesystem cache # Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen) func_id = '%s_%s_%s' % (
player_type, player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params) cache_dir = get_cachedir(self._downloader.params)
@@ -388,7 +393,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if cache_enabled: if cache_enabled:
try: try:
test_string = u''.join(map(compat_chr, range(slen))) test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string) cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res] cache_spec = [ord(c) for c in cache_res]
try: try:
@@ -404,7 +409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return res return res
def _print_sig_code(self, func, slen): def _print_sig_code(self, func, example_sig):
def gen_sig_code(idxs): def gen_sig_code(idxs):
def _genslice(start, end, step): def _genslice(start, end, step):
starts = u'' if start == 0 else str(start) starts = u'' if start == 0 else str(start)
@@ -433,11 +438,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
yield _genslice(start, i, step) yield _genslice(start, i, step)
test_string = u''.join(map(compat_chr, range(slen))) test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = func(test_string) cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res] cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec)) expr_code = u' + '.join(gen_sig_code(cache_spec))
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) signature_id_tuple = '(%s)' % (
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
u' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen(u'Extracted signature function:\n' + code) self.to_screen(u'Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode): def _parse_sig_js(self, jscode):
@@ -465,20 +473,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if player_url.startswith(u'//'): if player_url.startswith(u'//'):
player_url = u'https:' + player_url player_url = u'https:' + player_url
try: try:
player_id = (player_url, len(s)) player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache: if player_id not in self._player_cache:
func = self._extract_signature_function( func = self._extract_signature_function(
video_id, player_url, len(s) video_id, player_url, s
) )
self._player_cache[player_id] = func self._player_cache[player_id] = func
func = self._player_cache[player_id] func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'): if self._downloader.params.get('youtube_print_sig_code'):
self._print_sig_code(func, len(s)) self._print_sig_code(func, s)
return func(s) return func(s)
except Exception as e: except Exception as e:
tb = traceback.format_exc() tb = traceback.format_exc()
raise ExtractorError( raise ExtractorError(
u'Automatic signature extraction failed: ' + tb, cause=e) u'Signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
try: try:
@@ -806,8 +814,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url_map = {} url_map = {}
for url_data_str in encoded_url_map.split(','): for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str) url_data = compat_parse_qs(url_data_str)
if 'itag' in url_data and 'url' in url_data: if 'itag' not in url_data or 'url' not in url_data:
continue
format_id = url_data['itag'][0]
url = url_data['url'][0] url = url_data['url'][0]
if 'sig' in url_data: if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0] url += '&signature=' + url_data['sig'][0]
elif 's' in url_data: elif 's' in url_data:
@@ -841,16 +852,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'html5 player', fatal=False) 'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % self.to_screen(u'{%s} signature length %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) (format_id, parts_sizes, player_desc))
signature = self._decrypt_signature( signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate) encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature url += '&signature=' + signature
if 'ratebypass' not in url: if 'ratebypass' not in url:
url += '&ratebypass=yes' url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url url_map[format_id] = url
formats = _map_to_format_list(url_map) formats = _map_to_format_list(url_map)
elif video_info.get('hlsvp'): elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0] manifest_url = video_info['hlsvp'][0]

View File

@@ -852,6 +852,8 @@ def unified_strdate(date_str):
return upload_date return upload_date
def determine_ext(url, default_ext=u'unknown_video'): def determine_ext(url, default_ext=u'unknown_video'):
if url is None:
return default_ext
guess = url.partition(u'?')[0].rpartition(u'.')[2] guess = url.partition(u'?')[0].rpartition(u'.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess): if re.match(r'^[A-Za-z0-9]+$', guess):
return guess return guess

View File

@@ -1,2 +1,2 @@
__version__ = '2014.07.30' __version__ = '2014.08.02'