release 2015.01.23.2

[utils] Simplify HTTPS socket creation
We were duplicating (bad) code and doing crazy things with SSL. Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect. Fixes #4696.
2015-01-23 11:20:24 +01:00 · 2015-01-23 11:15:18 +01:00 · 2015-01-23 01:34:24 +01:00 · 2015-01-23 01:32:52 +01:00 · 2015-01-23 01:22:19 +01:00 · 2015-01-23 01:21:30 +01:00
6 changed files with 53 additions and 27 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -28,6 +28,7 @@ from youtube_dl.utils import (
    fix_xml_ampersands,
    InAdvancePagedList,
    intlist_to_bytes,
+    is_html,
    js_to_json,
    limit_length,
    OnDemandPagedList,
@@ -417,5 +418,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
        self.assertTrue(age_restricted(18, 14))
        self.assertFalse(age_restricted(18, 18))

+    def test_is_html(self):
+        self.assertFalse(is_html(b'\x49\x44\x43<html'))
+        self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
+        self.assertTrue(is_html(  # UTF-8 with BOM
+            b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
+        self.assertTrue(is_html(  # UTF-16-LE
+            b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
+        ))
+        self.assertTrue(is_html(  # UTF-16-BE
+            b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
+        ))
+        self.assertTrue(is_html(  # UTF-32-BE
+            b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
+        self.assertTrue(is_html(  # UTF-32-LE
+            b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dl/extractor/gamestar.py
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
@@ -29,9 +27,7 @@ class GameStarIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        og_title = self._og_search_title(webpage)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -17,6 +17,7 @@ from ..utils import (
    ExtractorError,
    float_or_none,
    HEADRequest,
+    is_html,
    orderedSet,
    parse_xml,
    smuggle_url,
@@ -647,7 +648,7 @@ class GenericIE(InfoExtractor):
        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        first_bytes = full_response.read(512)
-        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            upload_date = unified_strdate(
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -509,6 +509,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
            metadata['artist'] = info['uploader']
        elif info.get('uploader_id') is not None:
            metadata['artist'] = info['uploader_id']
+        if info.get('description') is not None:
+            metadata['description'] = info['description']
+        if info.get('webpage_url') is not None:
+            metadata['comment'] = info['webpage_url']

        if not metadata:
            self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -411,25 +411,9 @@ def make_HTTPS_handler(params, **kwargs):
            pass

    if sys.version_info < (3, 2):
-        import httplib
-
-        class HTTPSConnectionV3(httplib.HTTPSConnection):
-            def __init__(self, *args, **kwargs):
-                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
-
-            def connect(self):
-                sock = socket.create_connection((self.host, self.port), self.timeout)
-                if getattr(self, '_tunnel_host', False):
-                    self.sock = sock
-                    self._tunnel()
-                try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
-                except ssl.SSLError:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
-
-        return YoutubeDLHTTPSHandler(params, https_conn_class=HTTPSConnectionV3, **kwargs)
+        return YoutubeDLHTTPSHandler(params, **kwargs)
    else:  # Python < 3.4
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
        context.verify_mode = (ssl.CERT_NONE
                               if opts_no_check_certificate
                               else ssl.CERT_REQUIRED)
@@ -560,7 +544,9 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
                sock = compat_socket_create_connection(
                    (self.host, self.port), self.timeout, sa)
                if is_https:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
+                    self.sock = ssl.wrap_socket(
+                        sock, self.key_file, self.cert_file,
+                        ssl_version=ssl.PROTOCOL_TLSv1)
                else:
                    self.sock = sock
            hc.connect = functools.partial(_hc_connect, hc)
@@ -612,7 +598,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):

    def http_request(self, req):
        for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
                req.add_header(h, v)
        if 'Youtubedl-no-compression' in req.headers:
            if 'Accept-encoding' in req.headers:
@@ -1631,3 +1619,23 @@ def age_restricted(content_limit, age_limit):
    if content_limit is None:
        return False  # Content available for everyone
    return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.01.23.1'
+__version__ = '2015.01.23.2'
Author	SHA1	Message	Date
Philipp Hagemeister	ba55168157	release 2015.01.23.2	2015-01-23 11:20:24 +01:00
Philipp Hagemeister	d79323136f	[utils] Simplify HTTPS socket creation We were duplicating (bad) code and doing crazy things with SSL. Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect. Fixes #4696.	2015-01-23 11:15:18 +01:00
Philipp Hagemeister	08ff6ab07e	[gamestar] Modernize slightly	2015-01-23 01:34:24 +01:00
Philipp Hagemeister	ba655a0e4c	Merge remote-tracking branch 'derrotebaron/master'	2015-01-23 01:32:52 +01:00
Philipp Hagemeister	b59c17e543	Merge pull request #4745 from BitLooter/master Embed description and URL as MP4 tags	2015-01-23 01:22:19 +01:00
Philipp Hagemeister	61ca9a80b3	[generic] Add support for BOMs (Fixes #4753 )	2015-01-23 01:21:30 +01:00
David Powell	bd3cbe0716	Embed description and URL as MP4 tags	2015-01-18 15:01:05 -08:00
Johannes Knoedtel	3d5f7a3947	[utils] Prevent override of custom headers. The dict of headers of request objects in urllib has its keys always capitalized. This causes the lookup to fail and overwrite the header. If for example a Extractor tries to add a "User-Agent" header the internal representation in the request object is "User-agent". The header is therefore clobbered by the "User-Agent" in std_headers, because the strings are not equal.	2015-01-12 22:38:51 +01:00