release 2013.11.29

[podomatic] Add extractor
Add an extractor for Clipsyndicate (closes #1744 )
2013-11-29 03:34:26 +01:00 · 2013-11-29 03:33:25 +01:00 · 2013-11-28 14:38:10 +01:00 · 2013-11-28 13:49:00 +01:00 · 2013-11-28 13:32:49 +01:00 · 2013-11-28 06:17:56 +01:00
59 changed files with 820 additions and 404 deletions
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -1,10 +1,21 @@
 __youtube_dl()
 {
-    local cur prev opts
+    local cur prev opts fileopts diropts keywords
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
+    prev="${COMP_WORDS[COMP_CWORD-1]}"
    opts="{{flags}}"
-    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
+    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
+    fileopts="-a|--batch-file|--download-archive|--cookies"
+    diropts="--cache-dir"
+
+    if [[ ${prev} =~ ${fileopts} ]]; then
+        COMPREPLY=( $(compgen -f -- ${cur}) )
+        return 0
+    elif [[ ${prev} =~ ${diropts} ]]; then
+        COMPREPLY=( $(compgen -d -- ${cur}) )
+        return 0
+    fi

    if [[ ${cur} =~ : ]]; then
        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
--- a/test/helper.py
+++ b/test/helper.py
@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
 from youtube_dl.utils import preferredencoding


-def global_setup():
-    youtube_dl._setup_opener(timeout=10)
-
-
 def get_params(override=None):
    PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   "parameters.json")
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -6,8 +6,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import global_setup, try_rm
-global_setup()
+from test.helper import try_rm


 from youtube_dl import YoutubeDL
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_keywords(self):
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
-        self.assertMatch(':thedailyshow', ['ComedyCentral'])
-        self.assertMatch(':tds', ['ComedyCentral'])
-        self.assertMatch(':colbertreport', ['ComedyCentral'])
-        self.assertMatch(':cr', ['ComedyCentral'])
+        self.assertMatch(':ythistory', ['youtube:history'])
+        self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
+        self.assertMatch(':tds', ['ComedyCentralShows'])
+        self.assertMatch(':colbertreport', ['ComedyCentralShows'])
+        self.assertMatch(':cr', ['ComedyCentralShows'])


 if __name__ == '__main__':
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import (
    get_params,
    get_testcases,
-    global_setup,
    try_rm,
    md5,
    report_warning
 )
-global_setup()


 import hashlib
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -8,8 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import FakeYDL, global_setup
-global_setup()
+from test.helper import FakeYDL


 from youtube_dl.extractor import (
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -6,8 +6,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import FakeYDL, global_setup, md5
-global_setup()
+from test.helper import FakeYDL, md5


 from youtube_dl.extractor import (
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
        self.DL.params['writesubtitles'] = True
        self.DL.params['subtitlesformat'] = 'vtt'
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
+        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')

    def test_youtube_list_subtitles(self):
        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -7,8 +7,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import get_params, global_setup, try_rm
-global_setup()
+from test.helper import get_params, try_rm


 import io
--- a/test/test_write_info_json.py
+++ b/test/test_write_info_json.py
@@ -7,8 +7,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import get_params, global_setup
-global_setup()
+from test.helper import get_params


 import io
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -6,8 +6,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import FakeYDL, global_setup
-global_setup()
+from test.helper import FakeYDL


 from youtube_dl.extractor import (
@@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('http://www.youtube.com/show/airdisasters')
        self.assertTrue(len(result) >= 3)

+    def test_youtube_mix(self):
+        dl = FakeYDL()
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
+        entries = result['entries']
+        self.assertTrue(len(entries) >= 20)
+        original_video = entries[0]
+        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -6,9 +6,6 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import global_setup
-global_setup()
-

 import io
 import re
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -1,4 +1,3 @@
-import math
 import os
 import re
 import subprocess
@@ -11,6 +10,7 @@ from .utils import (
    ContentTooShortError,
    determine_ext,
    encodeFilename,
+    format_bytes,
    sanitize_open,
    timeconvert,
 )
@@ -53,20 +53,6 @@ class FileDownloader(object):
        self._progress_hooks = []
        self.params = params

-    @staticmethod
-    def format_bytes(bytes):
-        if bytes is None:
-            return 'N/A'
-        if type(bytes) is str:
-            bytes = float(bytes)
-        if bytes == 0.0:
-            exponent = 0
-        else:
-            exponent = int(math.log(bytes, 1024.0))
-        suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
-        converted = float(bytes) / float(1024 ** exponent)
-        return '%.2f%s' % (converted, suffix)
-
    @staticmethod
    def format_seconds(seconds):
        (mins, secs) = divmod(seconds, 60)
@@ -117,7 +103,7 @@ class FileDownloader(object):
    def format_speed(speed):
        if speed is None:
            return '%10s' % '---b/s'
-        return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
+        return '%10s' % ('%s/s' % format_bytes(speed))

    @staticmethod
    def best_block_size(elapsed_time, bytes):
@@ -270,6 +256,61 @@ class FileDownloader(object):
                (clear_line, data_len_str, self.format_seconds(tot_time)))

    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
+        def run_rtmpdump(args):
+            start = time.time()
+            resume_percent = None
+            resume_downloaded_data_len = None
+            proc = subprocess.Popen(args, stderr=subprocess.PIPE)
+            cursor_in_new_line = True
+            proc_stderr_closed = False
+            while not proc_stderr_closed:
+                # read line from stderr
+                line = u''
+                while True:
+                    char = proc.stderr.read(1)
+                    if not char:
+                        proc_stderr_closed = True
+                        break
+                    if char in [b'\r', b'\n']:
+                        break
+                    line += char.decode('ascii', 'replace')
+                if not line:
+                    # proc_stderr_closed is True
+                    continue
+                mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
+                if mobj:
+                    downloaded_data_len = int(float(mobj.group(1))*1024)
+                    percent = float(mobj.group(2))
+                    if not resume_percent:
+                        resume_percent = percent
+                        resume_downloaded_data_len = downloaded_data_len
+                    eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
+                    speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
+                    data_len = None
+                    if percent > 0:
+                        data_len = int(downloaded_data_len * 100 / percent)
+                    data_len_str = u'~' + format_bytes(data_len)
+                    self.report_progress(percent, data_len_str, speed, eta)
+                    cursor_in_new_line = False
+                    self._hook_progress({
+                        'downloaded_bytes': downloaded_data_len,
+                        'total_bytes': data_len,
+                        'tmpfilename': tmpfilename,
+                        'filename': filename,
+                        'status': 'downloading',
+                        'eta': eta,
+                        'speed': speed,
+                    })
+                elif self.params.get('verbose', False):
+                    if not cursor_in_new_line:
+                        self.to_screen(u'')
+                    cursor_in_new_line = True
+                    self.to_screen(u'[rtmpdump] '+line)
+            proc.wait()
+            if not cursor_in_new_line:
+                self.to_screen(u'')
+            return proc.returncode
+
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
        test = self.params.get('test', False)
@@ -280,12 +321,11 @@ class FileDownloader(object):
        except (OSError, IOError):
            self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
            return False
-        verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'

        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
        if player_url is not None:
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
@@ -299,30 +339,48 @@ class FileDownloader(object):
        if live:
            basic_args += ['--live']
        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
+
+        if sys.platform == 'win32' and sys.version_info < (3, 0):
+            # Windows subprocess module does not actually support Unicode
+            # on Python 2.x
+            # See http://stackoverflow.com/a/9951851/35070
+            subprocess_encoding = sys.getfilesystemencoding()
+            args = [a.encode(subprocess_encoding, 'ignore') for a in args]
+        else:
+            subprocess_encoding = None
+
        if self.params.get('verbose', False):
+            if subprocess_encoding:
+                str_args = [
+                    a.decode(subprocess_encoding) if isinstance(a, bytes) else a
+                    for a in args]
+            else:
+                str_args = args
            try:
                import pipes
-                shell_quote = lambda args: ' '.join(map(pipes.quote, args))
+                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
            except ImportError:
                shell_quote = repr
-            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
-        retval = subprocess.call(args)
+            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
+
+        retval = run_rtmpdump(args)
+
        while (retval == 2 or retval == 1) and not test:
            prevsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
+            self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
            time.sleep(5.0) # This seems to be needed
-            retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
+            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
            cursize = os.path.getsize(encodeFilename(tmpfilename))
            if prevsize == cursize and retval == 1:
                break
             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
            if prevsize == cursize and retval == 2 and cursize > 1024:
-                self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+                self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                retval = 0
                break
        if retval == 0 or (test and retval == 2):
            fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
+            self.to_screen(u'[rtmpdump] %s bytes' % fsize)
            self.try_rename(tmpfilename, filename)
            self._hook_progress({
                'downloaded_bytes': fsize,
@@ -525,7 +583,7 @@ class FileDownloader(object):
                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                return False

-        data_len_str = self.format_bytes(data_len)
+        data_len_str = format_bytes(data_len)
        byte_counter = 0 + resume_len
        block_size = self.params.get('buffersize', 1024)
        start = time.time()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,8 +7,10 @@ import errno
 import io
 import json
 import os
+import platform
 import re
 import shutil
+import subprocess
 import socket
 import sys
 import time
@@ -18,6 +20,7 @@ if os.name == 'nt':
    import ctypes

 from .utils import (
+    compat_cookiejar,
    compat_http_client,
    compat_print,
    compat_str,
@@ -30,9 +33,12 @@ from .utils import (
    DownloadError,
    encodeFilename,
    ExtractorError,
+    format_bytes,
    locked_file,
+    make_HTTPS_handler,
    MaxDownloadsReached,
    PostProcessingError,
+    platform_name,
    preferredencoding,
    SameFileError,
    sanitize_filename,
@@ -41,9 +47,11 @@ from .utils import (
    UnavailableVideoError,
    write_json_file,
    write_string,
+    YoutubeDLHandler,
 )
 from .extractor import get_info_extractor, gen_extractors
 from .FileDownloader import FileDownloader
+from .version import __version__


 class YoutubeDL(object):
@@ -118,9 +126,12 @@ class YoutubeDL(object):
    noplaylist:        Download single video instead of a playlist if in doubt.
    age_limit:         An integer representing the user's age in years.
                       Unsuitable videos for the given age are skipped.
-    downloadarchive:   File name of a file where all downloads are recorded.
+    download_archive:   File name of a file where all downloads are recorded.
                       Videos already present in the file are not downloaded
                       again.
+    cookiefile:        File name where cookies should be read from and dumped to.
+    nocheckcertificate:Do not verify SSL certificates
+    proxy:             URL of the proxy server to use

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@@ -135,7 +146,7 @@ class YoutubeDL(object):
    _num_downloads = None
    _screen_file = None

-    def __init__(self, params):
+    def __init__(self, params={}):
        """Create a FileDownloader object with the given options."""
        self._ies = []
        self._ies_instances = {}
@@ -144,6 +155,7 @@ class YoutubeDL(object):
        self._download_retcode = 0
        self._num_downloads = 0
        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+        self.params = params

        if (sys.version_info >= (3,) and sys.platform != 'win32' and
                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -153,14 +165,15 @@ class YoutubeDL(object):
                u'Assuming --restrict-filenames since file system encoding '
                u'cannot encode all charactes. '
                u'Set the LC_ALL environment variable to fix this.')
-            params['restrictfilenames'] = True
+            self.params['restrictfilenames'] = True

-        self.params = params
        self.fd = FileDownloader(self, self.params)

-        if '%(stitle)s' in self.params['outtmpl']:
+        if '%(stitle)s' in self.params.get('outtmpl', ''):
            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')

+        self._setup_opener()
+
    def add_info_extractor(self, ie):
        """Add an InfoExtractor object to the end of the list."""
        self._ies.append(ie)
@@ -241,10 +254,9 @@ class YoutubeDL(object):

    def __exit__(self, *args):
        self.restore_console_title()
-
-    def fixed_template(self):
-        """Checks if the output template is fixed."""
-        return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
+    
+        if self.params.get('cookiefile') is not None:
+            self.cookiejar.save()

    def trouble(self, message=None, tb=None):
        """Determine action to take when a download problem appears.
@@ -782,13 +794,15 @@ class YoutubeDL(object):

    def download(self, url_list):
        """Download a given list of URLs."""
-        if len(url_list) > 1 and self.fixed_template():
+        if (len(url_list) > 1 and
+                '%' not in self.params['outtmpl']
+                and self.params.get('max_downloads') != 1):
            raise SameFileError(self.params['outtmpl'])

        for url in url_list:
            try:
                #It also downloads the videos
-                videos = self.extract_info(url)
+                self.extract_info(url)
            except UnavailableVideoError:
                self.report_error(u'unable to download video')
            except MaxDownloadsReached:
@@ -820,20 +834,26 @@ class YoutubeDL(object):
            except (IOError, OSError):
                self.report_warning(u'Unable to remove downloaded video file')

-    def in_download_archive(self, info_dict):
-        fn = self.params.get('download_archive')
-        if fn is None:
-            return False
-        extractor = info_dict.get('extractor_id')
+    def _make_archive_id(self, info_dict):
+        # Future-proof against any change in case
+        # and backwards compatibility with prior versions
+        extractor = info_dict.get('extractor_key')
        if extractor is None:
            if 'id' in info_dict:
                extractor = info_dict.get('ie_key')  # key in a playlist
        if extractor is None:
+            return None  # Incomplete video information
+        return extractor.lower() + u' ' + info_dict['id']
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+
+        vid_id = self._make_archive_id(info_dict)
+        if vid_id is None:
            return False  # Incomplete video information
-        # Future-proof against any change in case
-        # and backwards compatibility with prior versions
-        extractor = extractor.lower()
-        vid_id = extractor + u' ' + info_dict['id']
+
        try:
            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                for line in archive_file:
@@ -848,12 +868,15 @@ class YoutubeDL(object):
        fn = self.params.get('download_archive')
        if fn is None:
            return
-        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        vid_id = self._make_archive_id(info_dict)
+        assert vid_id
        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
            archive_file.write(vid_id + u'\n')

    @staticmethod
    def format_resolution(format, default='unknown'):
+        if format.get('vcodec') == 'none':
+            return 'audio only'
        if format.get('_resolution') is not None:
            return format['_resolution']
        if format.get('height') is not None:
@@ -867,10 +890,11 @@ class YoutubeDL(object):

    def list_formats(self, info_dict):
        def format_note(fdict):
-            if fdict.get('format_note') is not None:
-                return fdict['format_note']
            res = u''
-            if fdict.get('vcodec') is not None:
+            if fdict.get('format_note') is not None:
+                res += fdict['format_note'] + u' '
+            if (fdict.get('vcodec') is not None and
+                    fdict.get('vcodec') != 'none'):
                res += u'%-5s' % fdict['vcodec']
            elif fdict.get('vbr') is not None:
                res += u'video'
@@ -886,25 +910,100 @@ class YoutubeDL(object):
                res += 'audio'
            if fdict.get('abr') is not None:
                res += u'@%3dk' % fdict['abr']
+            if fdict.get('filesize') is not None:
+                if res:
+                    res += u', '
+                res += format_bytes(fdict['filesize'])
            return res

-        def line(format):
-            return (u'%-20s%-10s%-12s%s' % (
+        def line(format, idlen=20):
+            return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
                format['format_id'],
                format['ext'],
                self.format_resolution(format),
                format_note(format),
-                )
-            )
+            ))

        formats = info_dict.get('formats', [info_dict])
-        formats_s = list(map(line, formats))
+        idlen = max(len(u'format code'),
+                    max(len(f['format_id']) for f in formats))
+        formats_s = [line(f, idlen) for f in formats]
        if len(formats) > 1:
            formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
            formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'

        header_line = line({
            'format_id': u'format code', 'ext': u'extension',
-            '_resolution': u'resolution', 'format_note': u'note'})
+            '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
        self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
                       (info_dict['id'], header_line, u"\n".join(formats_s)))
+
+    def urlopen(self, req):
+        """ Start an HTTP download """
+        return self._opener.open(req)
+
+    def print_debug_header(self):
+        if not self.params.get('verbose'):
+            return
+        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
+        try:
+            sp = subprocess.Popen(
+                ['git', 'rev-parse', '--short', 'HEAD'],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                cwd=os.path.dirname(os.path.abspath(__file__)))
+            out, err = sp.communicate()
+            out = out.decode().strip()
+            if re.match('[0-9a-f]+', out):
+                write_string(u'[debug] Git HEAD: ' + out + u'\n')
+        except:
+            try:
+                sys.exc_clear()
+            except:
+                pass
+        write_string(u'[debug] Python version %s - %s' %
+                     (platform.python_version(), platform_name()) + u'\n')
+
+        proxy_map = {}
+        for handler in self._opener.handlers:
+            if hasattr(handler, 'proxies'):
+                proxy_map.update(handler.proxies)
+        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
+
+    def _setup_opener(self, timeout=20):
+        opts_cookiefile = self.params.get('cookiefile')
+        opts_proxy = self.params.get('proxy')
+
+        if opts_cookiefile is None:
+            self.cookiejar = compat_cookiejar.CookieJar()
+        else:
+            self.cookiejar = compat_cookiejar.MozillaCookieJar(
+                opts_cookiefile)
+            if os.access(opts_cookiefile, os.R_OK):
+                self.cookiejar.load()
+
+        cookie_processor = compat_urllib_request.HTTPCookieProcessor(
+            self.cookiejar)
+        if opts_proxy is not None:
+            if opts_proxy == '':
+                proxies = {}
+            else:
+                proxies = {'http': opts_proxy, 'https': opts_proxy}
+        else:
+            proxies = compat_urllib_request.getproxies()
+            # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+            if 'http' in proxies and 'https' not in proxies:
+                proxies['https'] = proxies['http']
+        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+        https_handler = make_HTTPS_handler(
+            self.params.get('nocheckcertificate', False))
+        opener = compat_urllib_request.build_opener(
+            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+        # Delete the default user-agent header, which would otherwise apply in
+        # cases where our custom HTTP handler doesn't come into play
+        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+        opener.addheaders = []
+        self._opener = opener
+
+        # TODO remove this global modification
+        compat_urllib_request.install_opener(opener)
+        socket.setdefaulttimeout(timeout)
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -41,45 +41,35 @@ __authors__  = (
 __license__ = 'Public Domain'

 import codecs
-import collections
 import getpass
 import optparse
 import os
 import random
 import re
 import shlex
-import socket
 import subprocess
 import sys
-import traceback
-import platform


 from .utils import (
-    compat_cookiejar,
    compat_print,
-    compat_str,
-    compat_urllib_request,
    DateRange,
    decodeOption,
    determine_ext,
    DownloadError,
    get_cachedir,
-    make_HTTPS_handler,
    MaxDownloadsReached,
-    platform_name,
    preferredencoding,
    SameFileError,
    std_headers,
    write_string,
-    YoutubeDLHandler,
 )
 from .update import update_self
-from .version import __version__
 from .FileDownloader import (
    FileDownloader,
 )
 from .extractor import gen_extractors
+from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .PostProcessor import (
    FFmpegMetadataPP,
@@ -216,7 +206,9 @@ def parseOpts(overrideArguments=None):
            dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
-    selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
+    selection.add_option('--max-downloads', metavar='NUMBER',
+                         dest='max_downloads', type=int, default=None,
+                         help='Abort after downloading NUMBER files')
    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
@@ -452,19 +444,6 @@ def _real_main(argv=None):

    parser, opts, args = parseOpts(argv)

-    # Open appropriate CookieJar
-    if opts.cookiefile is None:
-        jar = compat_cookiejar.CookieJar()
-    else:
-        try:
-            jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
-            if os.access(opts.cookiefile, os.R_OK):
-                jar.load()
-        except (IOError, OSError) as err:
-            if opts.verbose:
-                traceback.print_exc()
-            write_string(u'ERROR: unable to open cookie file\n')
-            sys.exit(101)
    # Set user agent
    if opts.user_agent is not None:
        std_headers['User-Agent'] = opts.user_agent
@@ -496,8 +475,6 @@ def _real_main(argv=None):
    all_urls = batchurls + args
    all_urls = [url.strip() for url in all_urls]

-    opener = _setup_opener(jar=jar, opts=opts)
-
    extractors = gen_extractors()

    if opts.list_extractors:
@@ -552,7 +529,7 @@ def _real_main(argv=None):
    if opts.retries is not None:
        try:
            opts.retries = int(opts.retries)
-        except (TypeError, ValueError) as err:
+        except (TypeError, ValueError):
            parser.error(u'invalid retry count specified')
    if opts.buffersize is not None:
        numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@@ -563,13 +540,13 @@ def _real_main(argv=None):
        opts.playliststart = int(opts.playliststart)
        if opts.playliststart <= 0:
            raise ValueError(u'Playlist start must be positive')
-    except (TypeError, ValueError) as err:
+    except (TypeError, ValueError):
        parser.error(u'invalid playlist start number specified')
    try:
        opts.playlistend = int(opts.playlistend)
        if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
            raise ValueError(u'Playlist end must be greater than playlist start')
-    except (TypeError, ValueError) as err:
+    except (TypeError, ValueError):
        parser.error(u'invalid playlist end number specified')
    if opts.extractaudio:
        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@@ -672,34 +649,13 @@ def _real_main(argv=None):
        'youtube_print_sig_code': opts.youtube_print_sig_code,
        'age_limit': opts.age_limit,
        'download_archive': opts.download_archive,
+        'cookiefile': opts.cookiefile,
+        'nocheckcertificate': opts.no_check_certificate,
+        'proxy': opts.proxy,
    }

    with YoutubeDL(ydl_opts) as ydl:
-        if opts.verbose:
-            write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
-            try:
-                sp = subprocess.Popen(
-                    ['git', 'rev-parse', '--short', 'HEAD'],
-                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                    cwd=os.path.dirname(os.path.abspath(__file__)))
-                out, err = sp.communicate()
-                out = out.decode().strip()
-                if re.match('[0-9a-f]+', out):
-                    write_string(u'[debug] Git HEAD: ' + out + u'\n')
-            except:
-                try:
-                    sys.exc_clear()
-                except:
-                    pass
-            write_string(u'[debug] Python version %s - %s' %
-                         (platform.python_version(), platform_name()) + u'\n')
-
-            proxy_map = {}
-            for handler in opener.handlers:
-                if hasattr(handler, 'proxies'):
-                    proxy_map.update(handler.proxies)
-            write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
-
+        ydl.print_debug_header()
        ydl.add_default_info_extractors()

        # PostProcessors
@@ -730,46 +686,9 @@ def _real_main(argv=None):
            ydl.to_screen(u'--max-download limit reached, aborting.')
            retcode = 101

-    # Dump cookie jar if requested
-    if opts.cookiefile is not None:
-        try:
-            jar.save()
-        except (IOError, OSError):
-            sys.exit(u'ERROR: unable to save cookie jar')
-
    sys.exit(retcode)


-def _setup_opener(jar=None, opts=None, timeout=300):
-    if opts is None:
-        FakeOptions = collections.namedtuple(
-            'FakeOptions', ['proxy', 'no_check_certificate'])
-        opts = FakeOptions(proxy=None, no_check_certificate=False)
-
-    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-    if opts.proxy is not None:
-        if opts.proxy == '':
-            proxies = {}
-        else:
-            proxies = {'http': opts.proxy, 'https': opts.proxy}
-    else:
-        proxies = compat_urllib_request.getproxies()
-        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
-        if 'http' in proxies and 'https' not in proxies:
-            proxies['https'] = proxies['http']
-    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
-    https_handler = make_HTTPS_handler(opts)
-    opener = compat_urllib_request.build_opener(
-        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
-    # Delete the default user-agent header, which would otherwise apply in
-    # cases where our custom HTTP handler doesn't come into play
-    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
-    opener.addheaders = []
-    compat_urllib_request.install_opener(opener)
-    socket.setdefaulttimeout(timeout)
-    return opener
-
-
 def main(argv=None):
    try:
        _real_main(argv)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -20,9 +20,11 @@ from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cinemassacre import CinemassacreIE
+from .clipfish import ClipfishIE
+from .clipsyndicate import ClipsyndicateIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
-from .comedycentral import ComedyCentralIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
@@ -70,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ign import IGNIE, OneUPIE
+from .imdb import ImdbIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
@@ -104,6 +107,7 @@ from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
+from .podomatic import PodomaticIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
@@ -185,6 +189,7 @@ from .youtube import (
    YoutubeTruncatedURLIE,
    YoutubeWatchLaterIE,
    YoutubeFavouritesIE,
+    YoutubeHistoryIE,
 )
 from .zdf import ZDFIE

--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
        key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
                                      webpage, u'key')

-        webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
+        config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
                                                key)
-        config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))

        video_title = config_xml.find('title').text

--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -1,7 +1,6 @@
 # encoding: utf-8
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
-        ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
-        ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
+        ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
        config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
        """Extract form http://liveweb.arte.tv/"""
        webpage = self._download_webpage(url, name)
        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
-        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
                                            video_id, u'Downloading information')
-        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        event_doc = config_doc.find('event')
        url_node = event_doc.find('video').find('urlHd')
        if url_node is None:
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
            u'uploader': u'pixelversity',
            u'uploader_id': u'344706',
        },
+        u'params': {
+            # It doesn't respect the 'Range' header, it would download the whole video
+            # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
+            u'skip_download': True,
+        },
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -76,18 +76,21 @@ class BrightcoveIE(InfoExtractor):
                  'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
                  }
        def find_param(name):
-            return find_xpath_attr(object_doc, './param', 'name', name)
+            node = find_xpath_attr(object_doc, './param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+            return None
        playerKey = find_param('playerKey')
        # Not all pages define this value
        if playerKey is not None:
-            params['playerKey'] = playerKey.attrib['value']
+            params['playerKey'] = playerKey
        # The three fields hold the id of the video
        videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
        if videoPlayer is not None:
-            params['@videoPlayer'] = videoPlayer.attrib['value']
+            params['@videoPlayer'] = videoPlayer
        linkBase = find_param('linkBaseURL')
        if linkBase is not None:
-            params['linkBaseURL'] = linkBase.attrib['value']
+            params['linkBaseURL'] = linkBase
        data = compat_urllib_parse.urlencode(params)
        return cls._FEDERATED_URL_TEMPLATE % data

--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import unified_strdate
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
            webpage = self._download_webpage(url, mobj.group('path'))
            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
-        info_page = self._download_webpage(info_url,video_id, 
+        doc = self._download_xml(info_url,video_id, 
                                           u'Downloading video info')

        self.report_extraction(video_id)
-        doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
        video_info = [video for video in doc if video.find('ID').text == video_id][0]
        infos = video_info.find('INFOS')
        media = video_info.find('MEDIA')
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -0,0 +1,57 @@
+import re
+import time
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class ClipfishIE(InfoExtractor):
+    IE_NAME = u'clipfish'
+
+    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
+    _TEST = {
+        u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
+        u'file': u'3966754.mp4',
+        u'md5': u'2521cd644e862936cf2e698206e47385',
+        u'info_dict': {
+            u'title': u'FIFA 14 - E3 2013 Trailer',
+            u'duration': 82,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+
+        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
+                    (video_id, int(time.time())))
+        doc = self._download_xml(
+            info_url, video_id, note=u'Downloading info page')
+        title = doc.find('title').text
+        video_url = doc.find('filename').text
+        if video_url is None:
+            xml_bytes = xml.etree.ElementTree.tostring(doc)
+            raise ExtractorError(u'Cannot find video URL in document %r' %
+                                 xml_bytes)
+        thumbnail = doc.find('imageurl').text
+        duration_str = doc.find('duration').text
+        m = re.match(
+            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
+            duration_str)
+        if m:
+            duration = (
+                (int(m.group('hours')) * 60 * 60) +
+                (int(m.group('minutes')) * 60) +
+                (int(m.group('seconds')))
+            )
+        else:
+            duration = None
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -0,0 +1,52 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+)
+
+
+class ClipsyndicateIE(InfoExtractor):
+    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+        u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
+        u'info_dict': {
+            u'id': u'4629301',
+            u'ext': u'mp4',
+            u'title': u'Brick Briscoe',
+            u'duration': 612,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        js_player = self._download_webpage(
+            'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
+            video_id, u'Downlaoding player')
+        # it includes a required token
+        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
+
+        playlist_page = self._download_webpage(
+            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
+            video_id, u'Downloading video info') 
+        # Fix broken xml
+        playlist_page = re.sub('&', '&amp;', playlist_page)
+        pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
+
+        track_doc = pdoc.find('trackList/track')
+        def find_param(name):
+            node = find_xpath_attr(track_doc, './/param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+
+        return {
+            'id': video_id,
+            'title': find_param('title'),
+            'url': track_doc.find('location').text,
+            'thumbnail': find_param('thumbnail'),
+            'duration': int(find_param('duration')),
+        }
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import determine_ext
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
        path = mobj.group('path')
        page_title = mobj.group('title')
        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
-        info_xml = self._download_webpage(info_url, page_title)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        info = self._download_xml(info_url, page_title)

        formats = []
        for f in info.findall('files/file'):
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):

        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        metaXml = self._download_webpage(xmlUrl, video_id,
+        mdoc = self._download_xml(xmlUrl, video_id,
                                         u'Downloading info XML',
                                         u'Unable to download video info XML')

-        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
            youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):

        if next_url.endswith(u'manifest.f4m'):
            manifest_url = next_url + '?hdcore=2.10.3'
-            manifestXml = self._download_webpage(manifest_url, video_id,
+            adoc = self._download_xml(manifest_url, video_id,
                                         u'Downloading XML manifest',
                                         u'Unable to download video info XML')

-            adoc = xml.etree.ElementTree.fromstring(manifestXml)
            try:
                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
            except IndexError:
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,7 +1,7 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
+from .mtv import MTVIE, _media_xml_tag
 from ..utils import (
    compat_str,
    compat_urllib_parse,
@@ -11,7 +11,37 @@ from ..utils import (
 )


-class ComedyCentralIE(InfoExtractor):
+class ComedyCentralIE(MTVIE):
+    _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
+
+    _TEST = {
+        u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+        u'md5': u'4167875aae411f903b751a21f357f1ee',
+        u'info_dict': {
+            u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            u'ext': u'mp4',
+            u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
+            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
+        },
+    }
+    # Overwrite MTVIE properties we don't want
+    _TESTS = []
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        return itemdoc.find(search_path).attrib['url']
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
+                                  webpage, u'mgid')
+        return self._get_videos_info(mgid)
+
+
+class ComedyCentralShowsIE(InfoExtractor):
    IE_DESC = u'The Daily Show / Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
@@ -127,13 +157,12 @@ class ComedyCentralIE(InfoExtractor):

        uri = mMovieParams[0][1]
        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        indexXml = self._download_webpage(indexUrl, epTitle,
+        idoc = self._download_xml(indexUrl, epTitle,
                                          u'Downloading show index',
                                          u'unable to download episode index')

        results = []

-        idoc = xml.etree.ElementTree.fromstring(indexXml)
        itemEls = idoc.findall('.//item')
        for partNum,itemEl in enumerate(itemEls):
            mediaId = itemEl.findall('./guid')[0].text
@@ -144,10 +173,9 @@ class ComedyCentralIE(InfoExtractor):

            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                        compat_urllib_parse.urlencode({'uri': mediaId}))
-            configXml = self._download_webpage(configUrl, epTitle,
+            cdoc = self._download_xml(configUrl, epTitle,
                                               u'Downloading configuration for %s' % shortMediaId)

-            cdoc = xml.etree.ElementTree.fromstring(configXml)
            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -4,11 +4,11 @@ import re
 import socket
 import sys
 import netrc
+import xml.etree.ElementTree

 from ..utils import (
    compat_http_client,
    compat_urllib_error,
-    compat_urllib_request,
    compat_str,

    clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
    unescapeHTML,
 )

+
 class InfoExtractor(object):
    """Information Extractor class.

@@ -75,6 +76,7 @@ class InfoExtractor(object):
                    * acodec    Name of the audio codec in use
                    * vbr       Average video bitrate in KBit/s
                    * vcodec    Name of the video codec in use
+                    * filesize  The number of bytes, if known in advance
    webpage_url:    The url to the video webpage, if given to youtube-dl it
                    should allow to get the same result again. (It will be set
                    by YoutubeDL if it's missing)
@@ -156,7 +158,7 @@ class InfoExtractor(object):
        elif note is not False:
            self.to_screen(u'%s: %s' % (video_id, note))
        try:
-            return compat_urllib_request.urlopen(url_or_request)
+            return self._downloader.urlopen(url_or_request)
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            if errnote is None:
                errnote = u'Unable to download webpage'
@@ -208,6 +210,12 @@ class InfoExtractor(object):
        """ Returns the data of the page as a string """
        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]

+    def _download_xml(self, url_or_request, video_id,
+                      note=u'Downloading XML', errnote=u'Unable to download XML'):
+        """Return the xml as an xml.etree.ElementTree.Element"""
+        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
    def to_screen(self, msg):
        """Print msg to screen, prefixing it with '[ie_name]'"""
        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
            webpage, u'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
-        info_xml = self._download_webpage(
+        info = self._download_xml(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
            u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
            video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))

        self.to_screen(u'%s: Getting video urls' % video_id)
        formats = []
@@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
                'vid': full_id,
                'profile': profile,
            })
-            url_xml = self._download_webpage(
+            url_doc = self._download_xml(
                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
                video_id, note=False)
-            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
            format_url = url_doc.find('result/url').text
            formats.append({
                'url': format_url,
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -1,7 +1,6 @@
 # coding: utf-8

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
-        details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
-        details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
+        details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')

        thumbnail_els = details_doc.findall('.//teaserimage')
        thumbnails = [{
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import determine_ext
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        config_xml = self._download_webpage(
+        config = self._download_xml(
            'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        video_url = config.find('file').text

        return {
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
            u'config xml url')
-        config_xml = self._download_webpage(config_xml_url, video_id,
+        config = self._download_xml(config_xml_url, video_id,
            u'Downloading config xml')
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))

        encodings = config.find('ENCODINGS')
        formats = []
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 import json

 from .common import InfoExtractor
@@ -11,11 +10,10 @@ from ..utils import (

 class FranceTVBaseInfoExtractor(InfoExtractor):
    def _extract_video(self, video_id):
-        xml_desc = self._download_webpage(
+        info = self._download_xml(
            'http://www.francetvinfo.fr/appftv/webservices/video/'
            'getInfosOeuvre.php?id-diffusion='
            + video_id, video_id, 'Downloading XML config')
-        info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))

        manifest_url = info.find('videos/video/url').text
        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -209,7 +209,7 @@ class GenericIE(InfoExtractor):
            mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
        if mobj is None:
            # Broaden the search a little bit: JWPlayer JS loader
-            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
+            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
        if mobj is None:
            # Try to find twitter cards info
            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -236,18 +236,16 @@ class GenericIE(InfoExtractor):
        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))

        # here's a fun little line of code for you:
-        video_extension = os.path.splitext(video_id)[1][1:]
        video_id = os.path.splitext(video_id)[0]

        # video uploader is domain name
        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
            url, u'video uploader')

-        return [{
+        return {
            'id':       video_id,
            'url':      video_url,
            'uploader': video_uploader,
            'upload_date':  None,
            'title':    video_title,
-            'ext':      video_extension,
-        }]
+        }
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -0,0 +1,60 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    get_element_by_attribute,
+)
+
+
+class ImdbIE(InfoExtractor):
+    IE_NAME = u'imdb'
+    IE_DESC = u'Internet Movie Database trailers'
+    _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
+        u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
+        u'info_dict': {
+            u'id': u'2524815897',
+            u'ext': u'mp4',
+            u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
+            u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
+            u'duration': 151,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url,video_id)
+        descr = get_element_by_attribute('itemprop', 'description', webpage)
+        available_formats = re.findall(
+            r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
+            flags=re.MULTILINE)
+        formats = []
+        for f_id, f_path in available_formats:
+            format_page = self._download_webpage(
+                compat_urlparse.urljoin(url, f_path),
+                u'Downloading info for %s format' % f_id)
+            json_data = self._search_regex(
+                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
+                format_page, u'json data', flags=re.DOTALL)
+            info = json.loads(json_data)
+            format_info = info['videoPlayerObject']['video']
+            formats.append({
+                'format_id': f_id,
+                'url': format_info['url'],
+                'height': format_info['height'],
+                'width': format_info['width'],
+            })
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'description': descr,
+            'thumbnail': format_info['slate'],
+            'duration': int(info['titleObject']['title']['duration_seconds']),
+        }
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
        video_id = query_dic['publishedid'][0]
        url = self._build_url(query)

-        flashconfiguration_xml = self._download_webpage(url, video_id,
+        flashconfiguration = self._download_xml(url, video_id,
            u'Downloading flash configuration')
-        flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
        file_url = flashconfiguration.find('file').text
        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
        # Replace some of the parameters in the query to get the best quality
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
        file_url = re.sub(r'(?<=\?)(.+)$',
            lambda m: self._clean_query(m.group()),
            file_url)
-        info_xml = self._download_webpage(file_url, video_id,
+        info = self._download_xml(file_url, video_id,
            u'Downloading video info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        item = info.find('channel/item')

        def _bp(p):
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -2,7 +2,6 @@

 import json
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
            xml_link, u'video ID')

-        xml_config = self._download_webpage(
+        config = self._download_xml(
            xml_link, title, u'Downloading XML config')
-        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info_json = self._search_regex(
-            r'(?sm)<format\.json>(.*?)</format\.json>',
-            xml_config, u'JSON information')
+        info_json = config.find('format.json').text
        info = json.loads(info_json)['versions'][0]
        
        video_url = 'http://video720.jeuxvideo.com/' + info['file']
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,7 +1,6 @@
 import json
 import os
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
            archive_id = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            chapter_info_xml = self._download_webpage(api, chapter_id,
+            doc = self._download_xml(api, chapter_id,
                                             note=u'Downloading chapter information',
                                             errnote=u'Chapter information download failed')
-            doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
        user = mobj.group('user')
        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)

-        api_response = self._download_webpage(api_url, video_id)
-        info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
+        info = self._download_xml(api_url, video_id)
        item = info.find('channel').find('item')
        ns = {'media': 'http://search.yahoo.com/mrss'}
        thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
            'title': info['name'],
            'url': final_song_url,
            'ext': 'mp3',
-            'description': info['description'],
+            'description': info.get('description'),
            'thumbnail': info['pictures'].get('extra_large'),
            'uploader': info['user']['name'],
            'uploader_id': info['user']['username'],
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
    def _get_videos_info(self, uri):
        video_id = self._id_from_uri(uri)
        data = compat_urllib_parse.urlencode({'uri': uri})
-        infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
+        idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
                                         u'Downloading info')
-        idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
        return [self._get_video_info(item) for item in idoc.findall('.//item')]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -1,5 +1,4 @@
 import os.path
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):

        # get metadata
        metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata_text = self._download_webpage(metadata_url, video_id)
-        metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
+        metadata = self._download_xml(metadata_url, video_id)

        # extract values from metadata
        url_flv_el = metadata.find('url_flv')
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
            'protocol': 'p2p',
            'inKey': key,
        })
-        info_xml = self._download_webpage(
+        info = self._download_xml(
            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
            video_id, u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
            video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))

        formats = []
        for format_el in urls.findall('EncodingOptions/EncodingOption'):
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = all_info.find('video')

        return {'id': video_id,
                'title': info.find('headline').text,
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
        })
        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_response = self._download_webpage(path_url, video_id,
+        path_doc = self._download_xml(path_url, video_id,
            u'Downloading final video url')
-        path_doc = xml.etree.ElementTree.fromstring(path_response)
        video_url = path_doc.find('path').text

        join = compat_urlparse.urljoin
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -2,7 +2,6 @@

 import re
 import socket
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
        # the cookies in order to be able to download the info webpage
        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)

-        video_info_webpage = self._download_webpage(
+        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
            note=u'Downloading video info page')

@@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]

        # Start extracting information
-        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
        video_title = video_info.find('.//title').text
        video_extension = video_info.find('.//movie_type').text
        video_format = video_extension.upper()
@@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
        video_uploader = video_uploader_id
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
-            user_info_webpage = self._download_webpage(
+            user_info = self._download_xml(
                url, video_id, note=u'Downloading user information')
+            video_uploader = user_info.find('.//nickname').text
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
-        else:
-            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
-            video_uploader = user_info.find('.//nickname').text

        return {
            'id':          video_id,
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@@ -0,0 +1,49 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class PodomaticIE(InfoExtractor):
+    IE_NAME = 'podomatic'
+    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
+
+    _TEST = {
+        u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+        u"file": u"2009-01-02T16_03_35-08_00.mp3",
+        u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
+        u"info_dict": {
+            u"uploader": u"Science Teaching Tips",
+            u"uploader_id": u"scienceteachingtips",
+            u"title": u"64.  When the Moon Hits Your Eye",
+            u"duration": 446,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        channel = mobj.group('channel')
+
+        json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
+                     '?permalink=true&rtmp=0') %
+                    (mobj.group('proto'), channel, video_id))
+        data_json = self._download_webpage(
+            json_url, video_id, note=u'Downloading video info')
+        data = json.loads(data_json)
+
+        video_url = data['downloadLink']
+        uploader = data['podcast']
+        title = data['title']
+        thumbnail = data['imageLocation']
+        duration = int(data['length'] / 1000.0)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'uploader': uploader,
+            'uploader_id': channel,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -1,7 +1,6 @@
 # coding: utf-8

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):

    def _extract_video(self, video_id):
        data = compat_urllib_parse.urlencode({'vid': video_id})
-        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
            video_id, u'Downloading video url')
        image_page = self._download_webpage(
            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
            video_id, u'Downloading thumbnail info')
-        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))

        return {'id': video_id,
                'url': url_doc.find('./durl/url').text,
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -76,44 +76,78 @@ class SoundcloudIE(InfoExtractor):
    def _extract_info_dict(self, info, full_title=None, quiet=False):
        track_id = compat_str(info['id'])
        name = full_title or track_id
-        if quiet == False:
+        if quiet:
            self.report_extraction(name)

        thumbnail = info['artwork_url']
        if thumbnail is not None:
            thumbnail = thumbnail.replace('-large', '-t500x500')
+        ext = info.get('original_format', u'mp3')
        result = {
-            'id':       track_id,
+            'id': track_id,
            'uploader': info['user']['username'],
            'upload_date': unified_strdate(info['created_at']),
-            'title':    info['title'],
-            'ext':      info.get('original_format', u'mp3'),
+            'title': info['title'],
            'description': info['description'],
            'thumbnail': thumbnail,
        }
        if info.get('downloadable', False):
            # We can build a direct link to the song
-            result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
+            format_url = (
+                u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
+                    track_id, self._CLIENT_ID))
+            result['formats'] = [{
+                'format_id': 'download',
+                'ext': ext,
+                'url': format_url,
+                'vcodec': 'none',
+            }]
        else:
            # We have to retrieve the url
            stream_json = self._download_webpage(
                'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
                track_id, u'Downloading track url')
-            # There should be only one entry in the dictionary
-            key, stream_url = list(json.loads(stream_json).items())[0]
-            if key.startswith(u'http'):
-                result['url'] = stream_url
-            elif key.startswith(u'rtmp'):
-                # The url doesn't have an rtmp app, we have to extract the playpath
-                url, path = stream_url.split('mp3:', 1)
-                result.update({
-                    'url': url,
-                    'play_path': 'mp3:' + path,
-                })
-            else:
+
+            formats = []
+            format_dict = json.loads(stream_json)
+            for key, stream_url in format_dict.items():
+                if key.startswith(u'http'):
+                    formats.append({
+                        'format_id': key,
+                        'ext': ext,
+                        'url': stream_url,
+                        'vcodec': 'none',
+                    })
+                elif key.startswith(u'rtmp'):
+                    # The url doesn't have an rtmp app, we have to extract the playpath
+                    url, path = stream_url.split('mp3:', 1)
+                    formats.append({
+                        'format_id': key,
+                        'url': url,
+                        'play_path': 'mp3:' + path,
+                        'ext': ext,
+                        'vcodec': 'none',
+                    })
+
+            if not formats:
                # We fallback to the stream_url in the original info, this
                # cannot be always used, sometimes it can give an HTTP 404 error
-                result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+                formats.append({
+                    'format_id': u'fallback',
+                    'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+                    'ext': ext,
+                    'vcodec': 'none',
+                })
+
+            def format_pref(f):
+                if f['format_id'].startswith('http'):
+                    return 2
+                if f['format_id'].startswith('rtmp'):
+                    return 1
+                return 0
+
+            formats.sort(key=format_pref)
+            result['formats'] = formats

        return result

--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
            r'<div class="module-title">(.*?)</div>', webpage, u'title')

        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
-        xml_code = self._download_webpage(
+        idoc = self._download_xml(
            xml_url, video_id,
            note=u'Downloading XML', errnote=u'Failed to download XML')

-        idoc = xml.etree.ElementTree.fromstring(xml_code)
-
        formats = [
            {
                'format_id': n.tag.rpartition('type')[2],
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
        self.report_extraction(video_id)

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
-        data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
+        data = self._download_xml(data_url, video_id, 'Downloading data webpage')


        qualities = ['500k', '480p', '1000k', '720p', '1080p']
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -1,6 +1,5 @@
 # coding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')

        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
-        streams_webpage = self._download_webpage(
+        streams_doc = self._download_xml(
            streams_url, video_id, note=u'Downloading stream list')

-        streams_doc = xml.etree.ElementTree.fromstring(
-            streams_webpage.encode('utf-8'))
        video_url = next(n.text
                         for n in streams_doc.findall('.//choice/url')
                         if u'//ad.doubleclick' not in n.text)
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -1,6 +1,5 @@
 import json
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):

        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
                      u'video-formats2' % log)
-        format_str = self._download_webpage(
+        format_doc = self._download_xml(
            format_url, video_id,
            note=u'Downloading formats',
            errnote=u'Error while downloading formats')
-
-        format_doc = xml.etree.ElementTree.fromstring(format_str)
 
        video_url_template = (
            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
+        config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
                                            video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        video = config.find('video')
        sources = video.find('sources')
        url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) 
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor


 class VideoPremiumIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
    _TEST = {
        u'url': u'http://videopremium.tv/4w7oadjsf156',
        u'file': u'4w7oadjsf156.f4v',
@@ -41,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
            'player_url':  "http://videopremium.tv/uplayer/uppod.swf",
            'ext':         'f4v',
            'title':       video_title,
-        }
+        }
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,6 +1,8 @@
 import re

 from ..utils import (
+    ExtractorError,
+    unescapeHTML,
    unified_strdate,
 )
 from .subtitles import SubtitlesInfoExtractor
@@ -20,7 +22,8 @@ class VikiIE(SubtitlesInfoExtractor):
            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
            u'upload_date': u'20131121',
            u'age_limit': 13,
-        }
+        },
+        u'skip': u'Blocked in the US',
    }

    def _real_extract(self, url):
@@ -32,11 +35,12 @@ class VikiIE(SubtitlesInfoExtractor):
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)

-        uploader = self._html_search_regex(
-            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
-            u'uploader')
-        if uploader is not None:
-            uploader = uploader.strip()
+        uploader_m = re.search(
+            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
+        if uploader_m is None:
+            uploader = None
+        else:
+            uploader = uploader_m.group(1).strip()

        rating_str = self._html_search_regex(
            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
@@ -51,7 +55,12 @@ class VikiIE(SubtitlesInfoExtractor):
        age_limit = RATINGS.get(rating_str)

        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
-        info_webpage = self._download_webpage(info_url, video_id)
+        info_webpage = self._download_webpage(
+            info_url, video_id, note=u'Downloading info page')
+        if re.match(r'\s*<div\s+class="video-error', info_webpage):
+            raise ExtractorError(
+                u'Video %s is blocked from your location.' % video_id,
+                expected=True)
        video_url = self._html_search_regex(
            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')

@@ -83,7 +92,8 @@ class VikiIE(SubtitlesInfoExtractor):

    def _get_available_subtitles(self, video_id, info_webpage):
        res = {}
-        for sturl in re.findall(r'<track src="([^"]+)"/>'):
+        for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+            sturl = unescapeHTML(sturl_html)
            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
            if not m:
                continue
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -46,7 +46,7 @@ class YahooIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

-        items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
+        items_json = self._search_regex(r'mediaItems: ({.*?})$',
            webpage, u'items', flags=re.MULTILINE)
        items = json.loads(items_json)
        info = items['mediaItems']['query']['results']['mediaObj'][0]
@@ -91,17 +91,13 @@ class YahooIE(InfoExtractor):
            formats.append(format_info)
        formats = sorted(formats, key=lambda f:(f['height'], f['width']))

-        info = {
+        return {
            'id': video_id,
            'title': meta['title'],
            'formats': formats,
            'description': clean_html(meta['description']),
            'thumbnail': meta['thumbnail'],
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info


 class YahooSearchIE(SearchInfoExtractor):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import socket
 import string
 import struct
 import traceback
-import xml.etree.ElementTree
 import zlib

 from .common import InfoExtractor, SearchInfoExtractor
@@ -29,6 +28,7 @@ from ..utils import (
    clean_html,
    get_cachedir,
    get_element_by_id,
+    get_element_by_attribute,
    ExtractorError,
    unescapeHTML,
    unified_strdate,
@@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '248': 'webm',
    }
    _video_dimensions = {
-        '5': '240x400',
+        '5': '400x240',
        '6': '???',
        '13': '???',
-        '17': '144x176',
-        '18': '360x640',
-        '22': '720x1280',
-        '34': '360x640',
-        '35': '480x854',
-        '36': '240x320',
-        '37': '1080x1920',
-        '38': '3072x4096',
-        '43': '360x640',
-        '44': '480x854',
-        '45': '720x1280',
-        '46': '1080x1920',
+        '17': '176x144',
+        '18': '640x360',
+        '22': '1280x720',
+        '34': '640x360',
+        '35': '854x480',
+        '36': '320x240',
+        '37': '1920x1080',
+        '38': '4096x3072',
+        '43': '640x360',
+        '44': '854x480',
+        '45': '1280x720',
+        '46': '1920x1080',
        '82': '360p',
        '83': '480p',
        '84': '720p',
@@ -1144,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'asrs': 1,
            })
            list_url = caption_url + '&' + list_params
-            list_page = self._download_webpage(list_url, video_id)
-            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            caption_list = self._download_xml(list_url, video_id)
            original_lang_node = caption_list.find('track')
            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1528,7 +1527,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                     )"""
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
-    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
+    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'

    @classmethod
@@ -1539,6 +1538,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    def _real_initialize(self):
        self._login()

+    def _ids_to_results(self, ids):
+        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+
+    def _extract_mix(self, playlist_id):
+        # The mixes are generated from a a single video
+        # the id of the playlist is just 'RD' + video_id
+        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
+        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+            get_element_by_attribute('class', 'title ', webpage))
+        title = clean_html(title_span)
+        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        ids = orderedSet(re.findall(video_re, webpage))
+        url_results = self._ids_to_results(ids)
+
+        return self.playlist_result(url_results, playlist_id, title)
+
    def _real_extract(self, url):
        # Extract playlist id
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1556,14 +1573,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            else:
                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))

+        if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
        # Extract the video ids from the playlist pages
        ids = []

        for page_num in itertools.count(1):
            url = self._TEMPLATE_URL % (playlist_id, page_num)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
-            # The ids are duplicated
-            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+            matches = re.finditer(self._VIDEO_RE, page)
+            # We remove the duplicates and the link with index 0
+            # (it's not the first video of the playlist)
+            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
            ids.extend(new_ids)

            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
@@ -1571,8 +1594,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):

        playlist_title = self._og_search_title(page)

-        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)


@@ -1769,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
    """
    _LOGIN_REQUIRED = True
-    _PAGING_STEP = 30
    # use action_load_personal_feed instead of action_load_system_feed
    _PERSONAL_FEED = False

@@ -1789,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

    def _real_extract(self, url):
        feed_entries = []
-        # The step argument is available only in 2.7 or higher
-        for i in itertools.count(0):
-            paging = i*self._PAGING_STEP
+        paging = 0
+        for i in itertools.count(1):
            info = self._download_webpage(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
@@ -1804,6 +1824,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                for video_id in ids)
            if info['paging'] is None:
                break
+            paging = info['paging']
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)

 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1823,9 +1844,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
    _FEED_NAME = 'watch_later'
    _PLAYLIST_TITLE = u'Youtube Watch Later'
-    _PAGING_STEP = 100
    _PERSONAL_FEED = True

+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    _FEED_NAME = 'history'
+    _PERSONAL_FEED = True
+    _PLAYLIST_TITLE = u'Youtube Watch History'
+
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
    IE_NAME = u'youtube:favorites'
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,75 +1,125 @@
+# coding: utf-8
+
+import operator
 import re

 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
-    ExtractorError,
+    unified_strdate,
 )


 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
-    _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
+    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+
+    _TEST = {
+        u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
+        u"file": u"2037704.webm",
+        u"info_dict": {
+            u"upload_date": u"20131127",
+            u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
+            u"uploader": u"spezial",
+            u"title": u"ZDFspezial - Ende des Machtpokers"
+        },
+        u"skip": u"Videos on ZDF.de are depublicised in short order",
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('video_id')

-        if mobj.group('hash'):
-            url = url.replace(u'#', u'', 1)
+        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        doc = self._download_xml(
+            xml_url, video_id,
+            note=u'Downloading video info',
+            errnote=u'Failed to download video info')

-        html = self._download_webpage(url, video_id)
-        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
-        if streams is None:
-            raise ExtractorError(u'No media url found.')
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        uploader_node = doc.find('.//details/originChannelTitle')
+        uploader = None if uploader_node is None else uploader_node.text
+        duration_str = doc.find('.//details/length').text
+        duration_m = re.match(r'''(?x)^
+            (?P<hours>[0-9]{2})
+            :(?P<minutes>[0-9]{2})
+            :(?P<seconds>[0-9]{2})
+            (?:\.(?P<ms>[0-9]+)?)
+            ''', duration_str)
+        duration = (
+            (
+                (int(duration_m.group('hours')) * 60 * 60) +
+                (int(duration_m.group('minutes')) * 60) +
+                int(duration_m.group('seconds'))
+            )
+            if duration_m
+            else None
+        )
+        upload_date = unified_strdate(doc.find('.//details/airtime').text)

-        # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
-        # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
-        # choose first/default media type and highest quality for now
-        def stream_pref(s):
-            TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
+        def xml_to_format(fnode):
+            video_url = fnode.find('url').text
+            is_available = u'http://www.metafilegenerator' not in video_url
+
+            format_id = fnode.attrib['basetype']
+            format_m = re.match(r'''(?x)
+                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
+                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
+            ''', format_id)
+
+            ext = format_m.group('container')
+            is_supported = ext != 'f4f'
+
+            PROTO_ORDER = ['http', 'rtmp', 'rtsp']
            try:
-                type_pref = TYPE_ORDER.index(s['media_type'])
+                proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
            except ValueError:
-                type_pref = 999
+                proto_pref = 999

-            QUALITY_ORDER = ['veryhigh', '300']
+            quality = fnode.find('./quality').text
+            QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
            try:
-                quality_pref = QUALITY_ORDER.index(s['quality'])
+                quality_pref = -QUALITY_ORDER.index(quality)
            except ValueError:
                quality_pref = 999

-            return (type_pref, quality_pref)
+            abr = int(fnode.find('./audioBitrate').text) // 1000
+            vbr = int(fnode.find('./videoBitrate').text) // 1000
+            pref = (is_available, is_supported,
+                    proto_pref, quality_pref, vbr, abr)

-        sorted_streams = sorted(streams, key=stream_pref)
-        if not sorted_streams:
-            raise ExtractorError(u'No stream found.')
-        stream = sorted_streams[0]
+            format_note = u''
+            if not is_supported:
+                format_note += u'(unsupported)'
+            if not format_note:
+                format_note = None

-        media_link = self._download_webpage(
-            stream['video_url'],
-            video_id,
-            u'Get stream URL')
+            return {
+                'format_id': format_id + u'-' + quality,
+                'url': video_url,
+                'ext': ext,
+                'acodec': format_m.group('acodec'),
+                'vcodec': format_m.group('vcodec'),
+                'abr': abr,
+                'vbr': vbr,
+                'width': int(fnode.find('./width').text),
+                'height': int(fnode.find('./height').text),
+                'filesize': int(fnode.find('./filesize').text),
+                'format_note': format_note,
+                '_pref': pref,
+                '_available': is_available,
+            }

-        #MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
-        RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
-
-        mobj = re.search(self._MEDIA_STREAM, media_link)
-        if mobj is None:
-            mobj = re.search(RTSP_STREAM, media_link)
-            if mobj is None:
-                raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
-        video_url = mobj.group('video_url')
-
-        title = self._html_search_regex(
-            r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
-            html, u'title')
+        format_nodes = doc.findall('.//formitaeten/formitaet')
+        formats = sorted(filter(lambda f: f['_available'],
+                                map(xml_to_format, format_nodes)),
+                         key=operator.itemgetter('_pref'))

        return {
            'id': video_id,
-            'url': video_url,
            'title': title,
-            'ext': determine_ext(video_url)
+            'formats': formats,
+            'description': description,
+            'uploader': uploader,
+            'duration': duration,
+            'upload_date': upload_date,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -8,6 +8,7 @@ import gzip
 import io
 import json
 import locale
+import math
 import os
 import pipes
 import platform
@@ -16,6 +17,7 @@ import ssl
 import socket
 import sys
 import traceback
+import xml.etree.ElementTree
 import zlib

 try:
@@ -536,8 +538,7 @@ def formatSeconds(secs):
    else:
        return '%d' % secs

-
-def make_HTTPS_handler(opts):
+def make_HTTPS_handler(opts_no_check_certificate):
    if sys.version_info < (3, 2):
        import httplib

@@ -552,7 +553,7 @@ def make_HTTPS_handler(opts):
                    self._tunnel()
                try:
                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
-                except ssl.SSLError as e:
+                except ssl.SSLError:
                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)

        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
@@ -564,7 +565,7 @@ def make_HTTPS_handler(opts):
        context.set_default_verify_paths()
        
        context.verify_mode = (ssl.CERT_NONE
-                               if opts.no_check_certificate
+                               if opts_no_check_certificate
                               else ssl.CERT_REQUIRED)
        return compat_urllib_request.HTTPSHandler(context=context)

@@ -1006,3 +1007,17 @@ def unsmuggle_url(smug_url):
    jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
    data = json.loads(jsond)
    return url, data
+
+
+def format_bytes(bytes):
+    if bytes is None:
+        return u'N/A'
+    if type(bytes) is str:
+        bytes = float(bytes)
+    if bytes == 0.0:
+        exponent = 0
+    else:
+        exponent = int(math.log(bytes, 1024.0))
+    suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
+    converted = float(bytes) / float(1024 ** exponent)
+    return u'%.2f%s' % (converted, suffix)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.11.24'
+__version__ = '2013.11.29'
Author	SHA1	Message	Date
Philipp Hagemeister	17769d5a6c	release 2013.11.29	2013-11-29 03:34:26 +01:00
Philipp Hagemeister	677c18092d	[podomatic] Add extractor	2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz	3862402ff3	Add an extractor for Clipsyndicate (closes #1744 )	2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz	b03d0d064c	[imdb] Fix extraction in python 2.6 Using a regular expression because the html cannot be parsed.	2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz	d8d6148628	Add an extractor for Internet Movie Database trailers (closes #1832 )	2013-11-28 13:32:49 +01:00
Philipp Hagemeister	2be54167d0	release 2013.11.28.1	2013-11-28 06:17:56 +01:00
Philipp Hagemeister	4e0084d92e	[youtube/subtitles] Change MD5 of vtt subtitle in test	2013-11-28 06:14:17 +01:00
Philipp Hagemeister	fc9e1cc697	[clipfish] Use FIFA trailer as testcase (#1842 )	2013-11-28 06:10:37 +01:00
Philipp Hagemeister	f8f60d2793	[clipfish] Fix imports (#1842 )	2013-11-28 05:54:46 +01:00
Philipp Hagemeister	ea07dbb8b1	release 2013.11.28	2013-11-28 05:48:32 +01:00
Philipp Hagemeister	2a275ab007	[zdf] Use _download_xml	2013-11-28 05:47:50 +01:00
Philipp Hagemeister	a2e6db365c	[zdf] add a pseudo-testcase and fix URL matching	2013-11-28 05:47:20 +01:00
Philipp Hagemeister	9d93e7da6c	Merge branch 'master' of github.com:rg3/youtube-dl	2013-11-28 04:37:02 +01:00
Jaime Marquínez Ferrándiz	0e44d8381a	[youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845 )	2013-11-28 00:33:27 +01:00
Jaime Marquínez Ferrándiz	35907e23ec	[yahoo] Fix video extraction and use the new format system exclusively	2013-11-27 21:24:55 +01:00
Jaime Marquínez Ferrándiz	76d1700b28	[youtube:playlist] Fix the extraction of the title for some mixes (#1844 ) Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM	2013-11-27 20:01:51 +01:00
Philipp Hagemeister	dcca796ce4	[clipfish] Effect a better error message (#1842 )	2013-11-27 18:33:51 +01:00
Filippo Valsorda	4b19e38954	[videopremium] support new .me domain	2013-11-27 02:54:51 +01:00
Jaime Marquínez Ferrándiz	5f09bbff4d	[bash-completion] Complete the ':ythistory' keyword	2013-11-27 00:42:59 +01:00
Jaime Marquínez Ferrándiz	c1f9c59d11	[bash-completion] Complete filenames or directories if the previous option requires it	2013-11-27 00:41:30 +01:00
Jaime Marquínez Ferrándiz	652cdaa269	[youtube:playlist] Add support for YouTube mixes (fixes #1839 )	2013-11-26 21:35:03 +01:00
Jaime Marquínez Ferrándiz	e26f871228	Use the new '_download_xml' helper in more extractors	2013-11-26 19:17:25 +01:00
Jaime Marquínez Ferrándiz	6e47b51eef	[youtube:playlist] Remove the link with index 0 It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)	2013-11-26 19:09:14 +01:00
Jaime Marquínez Ferrándiz	4a98cdbf3b	YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840 ) If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''	2013-11-26 18:54:14 +01:00
Philipp Hagemeister	c5ed4e8f7e	release 2013.11.26	2013-11-26 10:41:35 +01:00
Jaime Marquínez Ferrándiz	c2e52508cc	Include the proxy in the parameters for YoutubeDL (fixes #1831 )	2013-11-26 08:03:11 +01:00
Philipp Hagemeister	d8ec4959c8	Merge pull request #1830 from jaimeMF/download-archive Use the 'extractor_key' field for the download archive file	2013-11-25 14:14:25 -08:00
Jaime Marquínez Ferrándiz	d31209a144	Use the 'extractor_key' field for the download archive file It has the same value as the ie_key.	2013-11-25 22:57:15 +01:00
Jaime Marquínez Ferrándiz	529a2e2cc3	Fix typo in the documentation of the 'download_archive' param	2013-11-25 22:52:09 +01:00
Philipp Hagemeister	781a7d0546	release 2013.11.25.3	2013-11-25 22:36:18 +01:00
Philipp Hagemeister	fb04e40396	[soundcloud] Support for listing of audio-only files	2013-11-25 22:34:56 +01:00
Philipp Hagemeister	d9b011f201	Fix rtmpdump with non-ASCII filenames on Windows on 2.x Reported in #1798	2013-11-25 22:31:38 +01:00
Philipp Hagemeister	b0b9eaa196	Merge pull request #1829 from jaimeMF/ydl-empty-params Allow to initialize a YoutubeDL object without parameters	2013-11-25 13:19:59 -08:00
Philipp Hagemeister	8b134b1062	Merge branch 'master' of github.com:rg3/youtube-dl	2013-11-25 22:16:07 +01:00
Philipp Hagemeister	0c75c3fa7a	Do not warn about fixed output template if --max-downloads is 1 Fixes #1828	2013-11-25 22:15:33 +01:00
Jaime Marquínez Ferrándiz	a3927cf7ee	Allow to initialize a YoutubeDL object without parameters Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.	2013-11-25 22:03:39 +01:00
Jaime Marquínez Ferrándiz	1a62c18f65	[bambuser] Skip the download in the test It doesn't respect the 'Range' header.	2013-11-25 22:03:20 +01:00
Philipp Hagemeister	2a15e7063b	[soundcloud] Prefer HTTP over RTMP (#1798 )	2013-11-25 20:30:41 +01:00
Philipp Hagemeister	d46cc192d7	Reduce socket timeout	2013-11-25 19:11:01 +01:00
Philipp Hagemeister	bb2bebdbe1	release 2013.11.25.2	2013-11-25 15:47:14 +01:00
Philipp Hagemeister	5db07df634	Fix --download-archive (Fixes #1826 )	2013-11-25 15:46:54 +01:00
Philipp Hagemeister	ea36cbac5e	Merge remote-tracking branch 'rbrito/swap-dimensions'	2013-11-25 06:19:15 +01:00
Philipp Hagemeister	d0d2b49ab7	[FileDownloader] use moved format_bytes method	2013-11-25 06:17:41 +01:00
Philipp Hagemeister	31cb6d8fef	Merge remote-tracking branch 'rzhxeo/rtmpdump'	2013-11-25 06:16:18 +01:00
Philipp Hagemeister	daa0dd2973	release 2013.11.25.1	2013-11-25 06:06:39 +01:00
Philipp Hagemeister	de79c46c8f	[viki] Fix subtitle extraction	2013-11-25 06:06:18 +01:00
Philipp Hagemeister	94ccb6fa2e	[viki] Fix subtitles extraction	2013-11-25 05:58:04 +01:00
Philipp Hagemeister	07e4035879	[viki] Fix uploader extraction	2013-11-25 05:57:55 +01:00
Philipp Hagemeister	d0efb9ec9a	[tests] Remove global_setup function	2013-11-25 03:47:32 +01:00
Philipp Hagemeister	ac05067d3d	release 2013.11.25	2013-11-25 03:37:49 +01:00
Philipp Hagemeister	113577e155	[generic] Improve detection Allow download of http://goo.gl/7X5tOk Fixes #1818	2013-11-25 03:35:53 +01:00
Philipp Hagemeister	79d09f47c2	Merge branch 'opener-to-ydl'	2013-11-25 03:30:37 +01:00
Philipp Hagemeister	c059bdd432	Remove quality_name field and improve zdf extractor	2013-11-25 03:28:55 +01:00
Philipp Hagemeister	02dbf93f0e	[zdf/common] Use API in ZDF extractor. This also comes with a lot of extra format fields Fixes #1518	2013-11-25 03:13:22 +01:00
Philipp Hagemeister	1fb2bcbbf7	[viki] Make uploader field optional (#1813 )	2013-11-25 02:02:34 +01:00
Jaime Marquínez Ferrándiz	16e055849e	Update the keywords tests for the rename of the old ComedyCentralIE	2013-11-24 22:13:20 +01:00
Jaime Marquínez Ferrándiz	66cfab4226	[comedycentral] Add support for comedycentral.com videos (closes #1824 ) It's a subclass of MTVIE The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE	2013-11-24 21:18:35 +01:00
Philipp Hagemeister	6d88bc37a3	[viki] Skip travis test Also provide a better error message for geoblocked videos.	2013-11-24 15:28:50 +01:00
Philipp Hagemeister	b7553b2554	[vik] Clarify output	2013-11-24 15:20:16 +01:00
Philipp Hagemeister	e03db0a077	Merge branch 'master' into opener-to-ydl	2013-11-24 15:18:44 +01:00
Philipp Hagemeister	a1ee09e815	Document proxy	2013-11-24 15:03:25 +01:00
Jaime Marquínez Ferrándiz	267ed0c5d3	[collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822 ) Uses a new helper method in InfoExtractor: _download_xml	2013-11-24 14:59:19 +01:00
Jaime Marquínez Ferrándiz	f459d17018	[youtube] Add an extractor for downloading the watch history (closes #1821 )	2013-11-24 14:33:50 +01:00
Jaime Marquínez Ferrándiz	dc65dcbb6d	[mixcloud] The description field may be missing (fixes #1819 )	2013-11-24 11:28:44 +01:00
Jaime Marquínez Ferrándiz	d214fdb8fe	[brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead	2013-11-24 11:02:34 +01:00
Philipp Hagemeister	138df537ff	release 2013.11.24.1	2013-11-24 07:51:56 +01:00
Philipp Hagemeister	0c7c19d6bc	[clipfish] Add extractor (Fixes #1760 )	2013-11-24 07:51:44 +01:00
Philipp Hagemeister	dca0872056	Move the opener to the YoutubeDL object. This is the first step towards being able to just import youtube_dl and start using it. Apart from removing global state, this would fix problems like #1805.	2013-11-22 19:57:52 +01:00
rzhxeo	2b35c9ef74	Merge branch 'master' into rtmpdump Conflicts: youtube_dl/FileDownloader.py Merge	2013-11-18 00:27:06 +01:00
rzhxeo	4894fe8c5b	Report download progress of rtmpdump	2013-11-09 11:14:40 +01:00
Rogério Brito	d5a9bb4ea9	extractor: youtube: Swap video dimensions to match standard practice. While working on this, I thought about simplifying things like changing 480x854 to 480p, and that seemed like a good option, until I realized that people (me included) usually link the concept of some number followed by a p with the video being 16:9. So, we would be losing some information and, as we all know, [explicit is better than implicit][]. []: http://www.python.org/dev/peps/pep-0020/ This closes #1446. Signed-off-by: Rogério Brito <rbrito@ime.usp.br>	2013-10-19 14:04:44 -03:00