release 2013.07.18

Remove the test to signature of lengths 43,43
It's already covered by the test for length 87
2013-07-18 12:41:49 +02:00 · 2013-07-18 12:29:09 +02:00 · 2013-07-18 12:25:41 +02:00 · 2013-07-18 09:54:56 +02:00 · 2013-07-17 17:43:44 +02:00 · 2013-07-17 14:39:02 +02:00
10 changed files with 200 additions and 22 deletions
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -5,6 +5,12 @@
 import sys

 tests = [
+    # 92 - vflQw-fB4 2013/07/17
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
+     "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
+    # 90
+    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
+     "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
    # 88
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
--- a/test/test_youtube_sig.py
+++ b/test/test_youtube_sig.py
@@ -13,9 +13,14 @@ from helper import FakeYDL
 sig = YoutubeIE(FakeYDL())._decrypt_signature

 class TestYoutubeSig(unittest.TestCase):
-    def test_43_43(self):
-        wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135'
-        right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE'
+    def test_92(self):
+        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
+        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
+        self.assertEqual(sig(wrong), right)
+
+    def test_90(self):
+        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
+        right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
        self.assertEqual(sig(wrong), right)

    def test_88(self):
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -580,7 +580,7 @@ def _real_main(argv=None):
        })

    if opts.verbose:
-        ydl.to_screen(u'[debug] youtube-dl version ' + __version__)
+        sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
        try:
            sp = subprocess.Popen(
                ['git', 'rev-parse', '--short', 'HEAD'],
@@ -589,11 +589,14 @@ def _real_main(argv=None):
            out, err = sp.communicate()
            out = out.decode().strip()
            if re.match('[0-9a-f]+', out):
-                ydl.to_screen(u'[debug] Git HEAD: ' + out)
+                sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
        except:
-            sys.exc_clear()
-        ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
-        ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
+            try:
+                sys.exc_clear()
+            except:
+                pass
+        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')

    ydl.add_default_info_extractors()

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -9,6 +9,7 @@ from .brightcove import BrightcoveIE
 from .canalplus import CanalplusIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
+from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE
@@ -58,6 +59,7 @@ from .steam import SteamIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
 from .tf1 import TF1IE
+from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -14,6 +14,7 @@ from ..utils import (
    clean_html,
    compiled_regex_type,
    ExtractorError,
+    unescapeHTML,
 )

 class InfoExtractor(object):
@@ -270,7 +271,8 @@ class InfoExtractor(object):
    def _og_search_property(self, prop, html, name=None, **kargs):
        if name is None:
            name = 'OpenGraph %s' % prop
-        return self._html_search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+        escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+        return unescapeHTML(escaped)

    def _og_search_thumbnail(self, html, **kargs):
        return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    orderedSet,
+    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
+
+
+class CondeNastIE(InfoExtractor):
+    """
+    Condé Nast is a media group, some of its sites use a custom HTML5 player
+    that works the same in all of them.
+    """
+
+    # The keys are the supported sites and the values are the name to be shown
+    # to the user and in the extractor description.
+    _SITES = {'wired': u'WIRED',
+              'gq': u'GQ',
+              'vogue': u'Vogue',
+              'glamour': u'Glamour',
+              'wmagazine': u'W Magazine',
+              'vanityfair': u'Vanity Fair',
+              }
+
+    _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
+    IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+
+    _TEST = {
+        u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+        u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
+        u'md5': u'1921f713ed48aabd715691f774c451f7',
+        u'info_dict': {
+            u'title': u'3D Printed Speakers Lit With LED',
+            u'description': u'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+        }
+    }
+
+    def _extract_series(self, url, webpage):
+        title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
+                                        webpage, u'series title', flags=re.DOTALL)
+        url_object = compat_urllib_parse_urlparse(url)
+        base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
+        m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
+                              webpage, flags=re.DOTALL)
+        paths = orderedSet(m.group(1) for m in m_paths)
+        build_url = lambda path: compat_urlparse.urljoin(base_url, path)
+        entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
+        return self.playlist_result(entries, playlist_title=title)
+
+    def _extract_video(self, webpage):
+        description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
+                                               r'<div class="video-post-content">(.+?)</div>',
+                                               ],
+                                              webpage, u'description',
+                                              fatal=False, flags=re.DOTALL)
+        params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
+                                    u'player params', flags=re.DOTALL)
+        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
+        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
+        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
+        data = compat_urllib_parse.urlencode({'videoId': video_id,
+                                              'playerId': player_id,
+                                              'target': target,
+                                              })
+        base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
+                                           webpage, u'base info url',
+                                           default='http://player.cnevids.com/player/loader.js?')
+        info_url = base_info_url + data
+        info_page = self._download_webpage(info_url, video_id,
+                                           u'Downloading video info')
+        video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
+        video_info = json.loads(video_info)
+
+        def _formats_sort_key(f):
+            type_ord = 1 if f['type'] == 'video/mp4' else 0
+            quality_ord = 1 if f['quality'] == 'high' else 0
+            return (quality_ord, type_ord)
+        best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
+
+        return {'id': video_id,
+                'url': best_format['src'],
+                'ext': best_format['type'].split('/')[-1],
+                'title': video_info['title'],
+                'thumbnail': video_info['poster_frame'],
+                'description': description,
+                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        site = mobj.group('site')
+        url_type = mobj.group('type')
+        id = mobj.group('id')
+
+        self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
+        webpage = self._download_webpage(url, id)
+
+        if url_type == 'series':
+            return self._extract_series(url, webpage)
+        else:
+            return self._extract_video(webpage)
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -37,7 +37,8 @@ class MetacafeIE(InfoExtractor):
        u"file": u"an-dVVXnuY7Jh77J.mp4",
        u"info_dict": {
            u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
-            u"uploader": u"AnyClip",
+            u"uploader": u"anyclip",
+            u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
        }
    }]

@@ -118,18 +119,19 @@ class MetacafeIE(InfoExtractor):
                video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
                video_ext = determine_ext(video_url)

-        mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
-        if mobj is None:
-            raise ExtractorError(u'Unable to extract title')
-        video_title = mobj.group(1).decode('utf-8')
+        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
+        description = self._og_search_description(webpage)
+        video_uploader = self._html_search_regex(
+                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+                webpage, u'uploader nickname', fatal=False)

-        video_uploader = self._html_search_regex(r'submitter=(.*?);|<p class="By">\s*By\s*<a[^>]*>(.*?)</a>', webpage, u'uploader nickname', fatal=False)
-
-        return [{
+        return {
+            '_type':    'video',
            'id':       video_id,
            'url':      video_url,
+            'description': description,
            'uploader': video_uploader,
            'upload_date':  None,
            'title':    video_title,
            'ext':      video_ext,
-        }]
+        }
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@@ -0,0 +1,47 @@
+#coding: utf-8
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+class ThisAVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
+    _TEST = {
+        u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
+        u"file": u"47734.flv",
+        u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
+        u"info_dict": {
+            u"title": u"高樹マリア - Just fit",
+            u"uploader": u"dj7970",
+            u"uploader_id": u"dj7970"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
+        video_url = self._html_search_regex(
+            r"addVariable\('file','([^']+)'\);", webpage, u'video url')
+        uploader = self._html_search_regex(
+            r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
+            webpage, u'uploader name', fatal=False)
+        uploader_id = self._html_search_regex(
+            r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
+            webpage, u'uploader id', fatal=False)
+        ext = determine_ext(video_url)
+        
+        return {
+            '_type':       'video',
+            'id':          video_id,
+            'url':         video_url,
+            'uploader':    uploader,
+            'uploader_id': uploader_id,
+            'title':       title,
+            'ext':         ext,
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -179,7 +179,11 @@ class YoutubeIE(InfoExtractor):
    def _decrypt_signature(self, s):
        """Turn the encrypted s field into a working signature"""

-        if len(s) == 88:
+        if len(s) == 92:
+            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+        elif len(s) == 90:
+            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
+        elif len(s) == 88:
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
        elif len(s) == 87:
            return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
@@ -589,8 +593,9 @@ class YoutubeIE(InfoExtractor):
                            else:
                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                    'html5 player', fatal=False)
-                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' %
-                                (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
+                            parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
+                            self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
+                                (len(s), parts_sizes, url_data['itag'][0], player))
                        signature = self._decrypt_signature(url_data['s'][0])
                        url += '&signature=' + signature
                    if 'ratebypass' not in url:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.07.17'
+__version__ = '2013.07.18'
Author	SHA1	Message	Date
Philipp Hagemeister	ad433bb372	release 2013.07.18	2013-07-18 12:41:49 +02:00
Jaime Marquínez Ferrándiz	3e0b3a1428	Remove the test to signature of lengths 43,43 It's already covered by the test for length 87	2013-07-18 12:29:09 +02:00
Jaime Marquínez Ferrándiz	444b116597	YoutubeIE: add algo for length 90 (closes #1064 ) Order the cases from higher to lower length.	2013-07-18 12:25:41 +02:00
Jaime Marquínez Ferrándiz	2aea08eda1	Merge pull request #1068 from MiLk/genalgo-youtube-92 [youtube] Add generator for signature 92	2013-07-18 09:54:56 +02:00
Emilien Kenler	ccf365475a	[youtube] Add generator for signature 92	2013-07-17 17:43:44 +02:00
Jaime Marquínez Ferrándiz	e1fb245690	Add CondeNastIE It supports some of the websites of the Condé Nast group: WIRED, GQ, Vogue, Glamour, W Magazine and Vanity Fair.	2013-07-17 14:39:02 +02:00
Jaime Marquínez Ferrándiz	5a76c6517e	YoutubeIE: some encrypted signatures have more than two parts, print the size of all the parts	2013-07-17 12:08:10 +02:00
Philipp Hagemeister	1bb9568776	release 2013.07.17.1	2013-07-17 11:18:35 +02:00
Philipp Hagemeister	ecd1c2f7e9	[thisav] add a test for video MD5	2013-07-17 11:18:14 +02:00
Philipp Hagemeister	466de68801	[thisav] Add IE (Fixes #1056 )	2013-07-17 11:16:53 +02:00
Philipp Hagemeister	88d4111cfa	[youtube] Add code for signature 92 (Closes #1060 )	2013-07-17 11:06:34 +02:00
Philipp Hagemeister	51fb64bab1	Mark test_youtube_sig as non-executable (#1066 )	2013-07-17 11:04:07 +02:00
Philipp Hagemeister	be547e1d3b	Revert "[youtube] improved decrypt_signature, closes #1060 " This reverts commit `fe6fad1242` and closes #1066.	2013-07-17 11:01:40 +02:00
Philipp Hagemeister	bf85454116	[metacafe] Fix test	2013-07-17 10:50:30 +02:00
Philipp Hagemeister	5910724b11	[metacafe] New result format	2013-07-17 10:49:49 +02:00
Philipp Hagemeister	7e24b09da9	[metacafe] Extract description	2013-07-17 10:45:35 +02:00
Philipp Hagemeister	f085f960e7	[metacafe] Fix uploader detection	2013-07-17 10:45:24 +02:00
Philipp Hagemeister	f38de77f6e	Use unescapeHTML for OpenGraph properties These are attribute values, so we don't need the more complex and whitespace-destroying cleanHTML - we just need to unescape quotes, that's it.	2013-07-17 10:38:23 +02:00
Philipp Hagemeister	58e7d46d1b	Merge remote-tracking branch 'Forever-Young/patch-1'	2013-07-17 09:25:52 +02:00
Philipp Hagemeister	2a5201638d	[youtube] Add sig test for 92 (Thanks to @patrickslin)	2013-07-17 09:23:38 +02:00
Anton Novosyolov	fe6fad1242	[youtube] improved decrypt_signature, closes #1060	2013-07-17 10:41:43 +04:00
Philipp Hagemeister	ec00e1d8a0	[metacafe] Use modern helper methods	2013-07-17 01:35:33 +02:00
Philipp Hagemeister	de29c4144e	Ignore errors in git error handling in verbose mode in Python 3	2013-07-17 01:33:28 +02:00
Philipp Hagemeister	f3bab0044e	Write debugging output to stderr (#1059 )	2013-07-17 01:30:34 +02:00