release 2014.08.02

[youtube] Use new signature cache ID for in-memory cache as well
[youtube] Make cache ID a tuple of lengths instead of just the whole length
2014-08-02 12:25:40 +02:00 · 2014-08-02 12:23:18 +02:00 · 2014-08-02 12:21:53 +02:00 · 2014-08-02 11:55:20 +02:00 · 2014-08-02 06:35:18 +02:00 · 2014-08-01 19:08:27 +07:00
7 changed files with 236 additions and 51 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -143,6 +143,7 @@ from .ivi import (
    IviIE,
    IviCompilationIE
 )
 from .izlesene import IzleseneIE
 from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
@@ -347,6 +348,7 @@ from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
 from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,6 +8,8 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    qualities,
    compat_urllib_parse_urlparse,
    compat_urllib_parse,
 )
@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
        else:
            video_id = m.group('video_id')
        urlp = compat_urllib_parse_urlparse(url)
        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -0,0 +1,97 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    get_element_by_id,
    parse_iso8601,
    determine_ext,
    int_or_none,
    str_to_int,
 )
 class IzleseneIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
    _TEST = {
        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
        'md5': '4384f9f0ea65086734b881085ee05ac2',
        'info_dict': {
            'id': '7599694',
            'ext': 'mp4',
            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
            'thumbnail': 're:^http://.*\.jpg',
            'uploader_id': 'pelikzzle',
            'timestamp': 1404298698,
            'upload_date': '20140702',
            'duration': 95.395,
            'age_limit': 0,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        url = 'http://www.izlesene.com/video/%s' % video_id
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
        timestamp = parse_iso8601(self._html_search_meta(
            'uploadDate', webpage, 'upload date', fatal=False))
        duration = int_or_none(self._html_search_regex(
            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
        if duration:
            duration /= 1000.0
        view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
        comment_count = self._html_search_regex(
            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
        family_friendly = self._html_search_meta(
            'isFamilyFriendly', webpage, 'age limit', fatal=False)
        content_url = self._html_search_meta(
            'contentURL', webpage, 'content URL', fatal=False)
        ext = determine_ext(content_url, 'mp4')
        # Might be empty for some videos.
        qualities = self._html_search_regex(
            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
        formats = []
        for quality in qualities.split('|'):
            json = self._download_json(
                self._STREAM_URL.format(id=video_id, format=quality), video_id,
                note='Getting video URL for "%s" quality' % quality,
                errnote='Failed to get video URL for "%s" quality' % quality
            )
            formats.append({
                'url': json.get('streamurl'),
                'ext': ext,
                'format_id': '%sp' % quality if quality else 'sd',
            })
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader_id': uploader,
            'timestamp': timestamp,
            'duration': duration,
            'view_count': int_or_none(view_count),
            'comment_count': int_or_none(comment_count),
            'age_limit': 18 if family_friendly == 'False' else 0,
            'formats': formats,
        }
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -0,0 +1,68 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    float_or_none,
    str_to_int,
 )
 class VidmeIE(InfoExtractor):
    _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
    _TEST = {
        'url': 'https://vid.me/QNB',
        'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
        'info_dict': {
            'id': 'QNB',
            'ext': 'mp4',
            'title': 'Fishing for piranha - the easy way',
            'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
            'duration': 119.92,
            'timestamp': 1406313244,
            'upload_date': '20140725',
            'thumbnail': 're:^https?://.*\.jpg',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage, default='')
        thumbnail = self._og_search_thumbnail(webpage)
        timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
        width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
        height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
        duration = float_or_none(self._html_search_regex(
            r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
        view_count = str_to_int(self._html_search_regex(
            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
        like_count = str_to_int(self._html_search_regex(
            r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
            webpage, 'like count', fatal=False))
        comment_count = str_to_int(self._html_search_regex(
            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
            webpage, 'comment count', fatal=False))
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'width': width,
            'height': height,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        """Indicate the download will use the RTMP protocol."""
        self.to_screen(u'RTMP download detected')
-    def _extract_signature_function(self, video_id, player_url, slen):
+    def _signature_cache_id(self, example_sig):
        """ Return a string representation of a signature """
        return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
    def _extract_signature_function(self, video_id, player_url, example_sig):
        id_m = re.match(
            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
            player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        player_id = id_m.group('id')
        # Read from filesystem cache
-        func_id = '%s_%s_%d' % (player_type, player_id, slen)
+        func_id = '%s_%s_%s' % (
            player_type, player_id, self._signature_cache_id(example_sig))
        assert os.path.basename(func_id) == func_id
        cache_dir = get_cachedir(self._downloader.params)
@@ -388,7 +393,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        if cache_enabled:
            try:
-                test_string = u''.join(map(compat_chr, range(slen)))
+                test_string = u''.join(map(compat_chr, range(len(example_sig))))
                cache_res = res(test_string)
                cache_spec = [ord(c) for c in cache_res]
                try:
@@ -404,7 +409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        return res
-    def _print_sig_code(self, func, slen):
+    def _print_sig_code(self, func, example_sig):
        def gen_sig_code(idxs):
            def _genslice(start, end, step):
                starts = u'' if start == 0 else str(start)
@@ -433,11 +438,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            else:
                yield _genslice(start, i, step)
-        test_string = u''.join(map(compat_chr, range(slen)))
+        test_string = u''.join(map(compat_chr, range(len(example_sig))))
        cache_res = func(test_string)
        cache_spec = [ord(c) for c in cache_res]
        expr_code = u' + '.join(gen_sig_code(cache_spec))
-        code = u'if len(s) == %d:\n    return %s\n' % (slen, expr_code)
+        signature_id_tuple = '(%s)' % (
            ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
        code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
                u'    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen(u'Extracted signature function:\n' + code)
    def _parse_sig_js(self, jscode):
@@ -465,20 +473,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        if player_url.startswith(u'//'):
            player_url = u'https:' + player_url
        try:
-            player_id = (player_url, len(s))
+            player_id = (player_url, self._signature_cache_id(s))
            if player_id not in self._player_cache:
                func = self._extract_signature_function(
-                    video_id, player_url, len(s)
+                    video_id, player_url, s
                )
                self._player_cache[player_id] = func
            func = self._player_cache[player_id]
            if self._downloader.params.get('youtube_print_sig_code'):
-                self._print_sig_code(func, len(s))
+                self._print_sig_code(func, s)
            return func(s)
        except Exception as e:
            tb = traceback.format_exc()
            raise ExtractorError(
-                u'Automatic signature extraction failed: ' + tb, cause=e)
+                u'Signature extraction failed: ' + tb, cause=e)
    def _get_available_subtitles(self, video_id, webpage):
        try:
@@ -806,8 +814,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            url_map = {}
            for url_data_str in encoded_url_map.split(','):
                url_data = compat_parse_qs(url_data_str)
-                if 'itag' in url_data and 'url' in url_data:
+                if 'itag' not in url_data or 'url' not in url_data:
                    continue
                format_id = url_data['itag'][0]
                url = url_data['url'][0]
                if 'sig' in url_data:
                    url += '&signature=' + url_data['sig'][0]
                elif 's' in url_data:
@@ -841,16 +852,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                    'html5 player', fatal=False)
                                player_desc = u'html5 player %s' % player_version
-                            parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
+                        parts_sizes = self._signature_cache_id(encrypted_sig)
-                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
+                        self.to_screen(u'{%s} signature length %s, %s' %
-                                (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
+                            (format_id, parts_sizes, player_desc))
                    signature = self._decrypt_signature(
                        encrypted_sig, video_id, player_url, age_gate)
                    url += '&signature=' + signature
                if 'ratebypass' not in url:
                    url += '&ratebypass=yes'
-                    url_map[url_data['itag'][0]] = url
+                url_map[format_id] = url
            formats = _map_to_format_list(url_map)
        elif video_info.get('hlsvp'):
            manifest_url = video_info['hlsvp'][0]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -852,6 +852,8 @@ def unified_strdate(date_str):
    return upload_date
 def determine_ext(url, default_ext=u'unknown_video'):
    if url is None:
        return default_ext
    guess = url.partition(u'?')[0].rpartition(u'.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
        return guess
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.07.30'
+__version__ = '2014.08.02'
Author	SHA1	Message	Date
Philipp Hagemeister	61aabb9d70	release 2014.08.02	2014-08-02 12:25:40 +02:00
Philipp Hagemeister	62af3a0eb5	[youtube] Use new signature cache ID for in-memory cache as well	2014-08-02 12:23:18 +02:00
Philipp Hagemeister	60064c53f1	[youtube] Make cache ID a tuple of lengths instead of just the whole length	2014-08-02 12:21:53 +02:00
Philipp Hagemeister	98eb1c3fa2	[youtube] Clean up -v signature output	2014-08-02 11:55:20 +02:00
Philipp Hagemeister	201e9eaa0e	[youtube] Show format ID in signature deobfuscation -v output	2014-08-02 06:35:18 +02:00
Sergey M․	9afa6ede21	Merge branch 'naglis-izlesene'	2014-08-01 19:08:27 +07:00
Sergey M․	f4776371ae	[izlesene] Minor changes	2014-08-01 19:08:09 +07:00
Sergey M․	328a20bf9c	Merge branch 'izlesene' of https://github.com/naglis/youtube-dl into naglis-izlesene	2014-08-01 18:16:47 +07:00
Sergey M․	5622f29ae4	[ard] Quote path part instead of whole URL encode	2014-07-31 21:23:15 +07:00
Sergey M․	b4f23afbd1	[ard] Encode url (Closes #3412 )	2014-07-31 20:35:29 +07:00
Sergey M․	0138968a6a	[vidme] Add extractor (Closes #3404 )	2014-07-31 20:26:52 +07:00
Naglis Jonaitis	366b1f3cfe	[izlesene] Add new extractor. Closes #3184	2014-07-26 14:35:23 +03:00
`@@ -1,2 +1,2 @@`

	`__version__ = '2014.07.30'`	`__version__ = '2014.08.02'`