release 2013.12.08.1

[youtube:channel] Fix automated channel detection
[wimp] simplify
2013-12-08 07:32:25 +01:00 · 2013-12-08 07:30:42 +01:00 · 2013-12-08 07:22:19 +01:00 · 2013-12-08 06:54:52 +01:00 · 2013-12-08 06:54:39 +01:00 · 2013-12-08 06:53:46 +01:00
100 changed files with 1395 additions and 468 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -3,9 +3,6 @@ python:
  - "2.6"
  - "2.7"
  - "3.3"
-before_install:
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq rtmpdump
 script: nosetests test --verbose
 notifications:
  email:
--- a/README.md
+++ b/README.md
@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
    --list-extractors          List all supported extractors and the URLs they
                               would handle
    --extractor-descriptions   Output descriptions of all supported extractors
-    --proxy URL                Use the specified HTTP/HTTPS proxy
+    --proxy URL                Use the specified HTTP/HTTPS proxy. Pass in an
+                               empty string (--proxy "") for direct connection
    --no-check-certificate     Suppress HTTPS certificate validation.
    --cache-dir DIR            Location in the filesystem where youtube-dl can
                               store downloaded information permanently. By
@ -55,8 +56,9 @@ which means you can modify it, redistribute it or use it however you like.
    --dateafter DATE           download only videos uploaded after this date
    --no-playlist              download only the currently playing video
    --age-limit YEARS          download only videos suitable for the given age
-    --download-archive FILE    Download only videos not present in the archive
-                               file. Record all downloaded videos in it.
+    --download-archive FILE    Download only videos not listed in the archive
+                               file. Record the IDs of all downloaded videos in
+                               it.

 ## Download Options:
    -r, --rate-limit LIMIT     maximum download rate in bytes per second (e.g.
@ -130,11 +132,11 @@ which means you can modify it, redistribute it or use it however you like.
    -v, --verbose              print various debugging information
    --dump-intermediate-pages  print downloaded pages to debug problems(very
                               verbose)
-    --write-pages              Write downloaded pages to files in the current
-                               directory
+    --write-pages              Write downloaded intermediary pages to files in
+                               the current directory to debug problems

 ## Video Format Options:
-    -f, --format FORMAT        video format code, specifiy the order of
+    -f, --format FORMAT        video format code, specify the order of
                               preference using slashes: "-f 22/17/18". "-f mp4"
                               and "-f flv" are also supported
    --all-formats              download all available video formats
@ -182,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.

 # CONFIGURATION

-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.

 # OUTPUT TEMPLATE

--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@ -1,10 +1,21 @@
 __youtube_dl()
 {
-    local cur prev opts
+    local cur prev opts fileopts diropts keywords
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
+    prev="${COMP_WORDS[COMP_CWORD-1]}"
    opts="{{flags}}"
-    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
+    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
+    fileopts="-a|--batch-file|--download-archive|--cookies"
+    diropts="--cache-dir"
+
+    if [[ ${prev} =~ ${fileopts} ]]; then
+        COMPREPLY=( $(compgen -f -- ${cur}) )
+        return 0
+    elif [[ ${prev} =~ ${diropts} ]]; then
+        COMPREPLY=( $(compgen -d -- ${cur}) )
+        return 0
+    fi

    if [[ ${cur} =~ : ]]; then
        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
--- a/test/parameters.json
+++ b/test/parameters.json
@ -39,5 +39,6 @@
    "writeinfojson": true, 
    "writesubtitles": false,
    "allsubtitles": false,
-    "listssubtitles": false
+    "listssubtitles": false,
+    "socket_timeout": 20
 }
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -106,6 +106,10 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch(':colbertreport', ['ComedyCentralShows'])
        self.assertMatch(':cr', ['ComedyCentralShows'])

+    def test_vimeo_matching(self):
+        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
+        self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -15,13 +15,18 @@ from youtube_dl.extractor import (
    DailymotionPlaylistIE,
    DailymotionUserIE,
    VimeoChannelIE,
+    VimeoUserIE,
+    VimeoAlbumIE,
+    VimeoGroupsIE,
    UstreamChannelIE,
    SoundcloudSetIE,
    SoundcloudUserIE,
    LivestreamIE,
    NHLVideocenterIE,
    BambuserChannelIE,
-    BandcampAlbumIE
+    BandcampAlbumIE,
+    SmotriCommunityIE,
+    SmotriUserIE
 )


@ -54,6 +59,30 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'Vimeo Tributes')
        self.assertTrue(len(result['entries']) > 24)

+    def test_vimeo_user(self):
+        dl = FakeYDL()
+        ie = VimeoUserIE(dl)
+        result = ie.extract('http://vimeo.com/nkistudio/videos')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Nki')
+        self.assertTrue(len(result['entries']) > 65)
+
+    def test_vimeo_album(self):
+        dl = FakeYDL()
+        ie = VimeoAlbumIE(dl)
+        result = ie.extract('http://vimeo.com/album/2632481')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Staff Favorites: November 2013')
+        self.assertTrue(len(result['entries']) > 12)
+
+    def test_vimeo_groups(self):
+        dl = FakeYDL()
+        ie = VimeoGroupsIE(dl)
+        result = ie.extract('http://vimeo.com/groups/rolexawards')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
+        self.assertTrue(len(result['entries']) > 72)
+
    def test_ustream_channel(self):
        dl = FakeYDL()
        ie = UstreamChannelIE(dl)
@ -110,6 +139,24 @@ class TestPlaylists(unittest.TestCase):
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'Nightmare Night EP')
        self.assertTrue(len(result['entries']) >= 4)
+        
+    def test_smotri_community(self):
+        dl = FakeYDL()
+        ie = SmotriCommunityIE(dl)
+        result = ie.extract('http://smotri.com/community/video/kommuna')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'kommuna')
+        self.assertEqual(result['title'], u'КПРФ')
+        self.assertTrue(len(result['entries']) >= 4)
+        
+    def test_smotri_user(self):
+        dl = FakeYDL()
+        ie = SmotriUserIE(dl)
+        result = ie.extract('http://smotri.com/user/inspector')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'inspector')
+        self.assertEqual(result['title'], u'Inspector')
+        self.assertTrue(len(result['entries']) >= 9)

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -72,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
        self.DL.params['writesubtitles'] = True
        self.DL.params['subtitlesformat'] = 'vtt'
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
+        self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')

    def test_youtube_list_subtitles(self):
        self.DL.expect_warning(u'Video doesn\'t have automatic captions')
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -26,6 +26,7 @@ from youtube_dl.utils import (
    unsmuggle_url,
    shell_quote,
    encodeFilename,
+    str_to_int,
 )

 if sys.version_info < (3, 0):
@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase):
        args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
        self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")

+    def test_str_to_int(self):
+        self.assertEqual(str_to_int('123,456'), 123456)
+        self.assertEqual(str_to_int('123.456'), 123456)
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_write_info_json.py
+++ b/test/test_write_info_json.py
@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
 INFO_JSON_FILE = TEST_ID + '.info.json'
 DESCRIPTION_FILE = TEST_ID + '.mp4.description'
 EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐
+test URL: https://github.com/rg3/youtube-dl/issues/1892

 This is a test video for youtube-dl.

--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -107,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('http://www.youtube.com/show/airdisasters')
        self.assertTrue(len(result) >= 3)

+    def test_youtube_mix(self):
+        dl = FakeYDL()
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
+        entries = result['entries']
+        self.assertTrue(len(entries) >= 20)
+        original_video = entries[0]
+        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -204,11 +204,27 @@ class FileDownloader(object):
        """Report destination filename."""
        self.to_screen(u'[download] Destination: ' + filename)

+    def _report_progress_status(self, msg, is_last_line=False):
+        fullmsg = u'[download] ' + msg
+        if self.params.get('progress_with_newline', False):
+            self.to_screen(fullmsg)
+        else:
+            if os.name == 'nt':
+                prev_len = getattr(self, '_report_progress_prev_line_length',
+                                   0)
+                if prev_len > len(fullmsg):
+                    fullmsg += u' ' * (prev_len - len(fullmsg))
+                self._report_progress_prev_line_length = len(fullmsg)
+                clear_line = u'\r'
+            else:
+                clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
+            self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
+        self.to_console_title(u'youtube-dl ' + msg)
+
    def report_progress(self, percent, data_len_str, speed, eta):
        """Report download progress."""
        if self.params.get('noprogress', False):
            return
-        clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
        if eta is not None:
            eta_str = self.format_eta(eta)
        else:
@ -218,14 +234,20 @@ class FileDownloader(object):
        else:
            percent_str = 'Unknown %'
        speed_str = self.format_speed(speed)
-        if self.params.get('progress_with_newline', False):
-            self.to_screen(u'[download] %s of %s at %s ETA %s' %
-                (percent_str, data_len_str, speed_str, eta_str))
+
+        msg = (u'%s of %s at %s ETA %s' %
+               (percent_str, data_len_str, speed_str, eta_str))
+        self._report_progress_status(msg)
+
+    def report_finish(self, data_len_str, tot_time):
+        """Report download finished."""
+        if self.params.get('noprogress', False):
+            self.to_screen(u'[download] Download completed')
        else:
-            self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
-                (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
-        self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
-                (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
+            self._report_progress_status(
+                (u'100%% of %s in %s' %
+                 (data_len_str, self.format_seconds(tot_time))),
+                is_last_line=True)

    def report_resuming_byte(self, resume_len):
        """Report attempt to resume at given byte."""
@ -246,15 +268,6 @@ class FileDownloader(object):
        """Report it was impossible to resume download."""
        self.to_screen(u'[download] Unable to resume')

-    def report_finish(self, data_len_str, tot_time):
-        """Report download finished."""
-        if self.params.get('noprogress', False):
-            self.to_screen(u'[download] Download completed')
-        else:
-            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
-            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
-                (clear_line, data_len_str, self.format_seconds(tot_time)))
-
    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
        def run_rtmpdump(args):
            start = time.time()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -132,6 +132,7 @@ class YoutubeDL(object):
    cookiefile:        File name where cookies should be read from and dumped to.
    nocheckcertificate:Do not verify SSL certificates
    proxy:             URL of the proxy server to use
+    socket_timeout:    Time to wait for unresponsive hosts, in seconds

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@ -146,7 +147,7 @@ class YoutubeDL(object):
    _num_downloads = None
    _screen_file = None

-    def __init__(self, params={}):
+    def __init__(self, params=None):
        """Create a FileDownloader object with the given options."""
        self._ies = []
        self._ies_instances = {}
@ -155,6 +156,7 @@ class YoutubeDL(object):
        self._download_retcode = 0
        self._num_downloads = 0
        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+        self.params = {} if params is None else params

        if (sys.version_info >= (3,) and sys.platform != 'win32' and
                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@ -164,9 +166,8 @@ class YoutubeDL(object):
                u'Assuming --restrict-filenames since file system encoding '
                u'cannot encode all charactes. '
                u'Set the LC_ALL environment variable to fix this.')
-            params['restrictfilenames'] = True
+            self.params['restrictfilenames'] = True

-        self.params = params
        self.fd = FileDownloader(self, self.params)

        if '%(stitle)s' in self.params.get('outtmpl', ''):
@ -404,7 +405,8 @@ class YoutubeDL(object):
        for key, value in extra_info.items():
            info_dict.setdefault(key, value)

-    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+    def extract_info(self, url, download=True, ie_key=None, extra_info={},
+                     process=True):
        '''
        Returns a list with a dictionary for each video we find.
        If 'download', also downloads the videos.
@ -440,7 +442,10 @@ class YoutubeDL(object):
                        'webpage_url': url,
                        'extractor_key': ie.ie_key(),
                    })
-                return self.process_ie_result(ie_result, download, extra_info)
+                if process:
+                    return self.process_ie_result(ie_result, download, extra_info)
+                else:
+                    return ie_result
            except ExtractorError as de: # An error we somewhat expected
                self.report_error(compat_str(de), de.format_traceback())
                break
@ -473,8 +478,33 @@ class YoutubeDL(object):
                                     download,
                                     ie_key=ie_result.get('ie_key'),
                                     extra_info=extra_info)
-        elif result_type == 'playlist':
+        elif result_type == 'url_transparent':
+            # Use the information from the embedding page
+            info = self.extract_info(
+                ie_result['url'], ie_key=ie_result.get('ie_key'),
+                extra_info=extra_info, download=False, process=False)

+            def make_result(embedded_info):
+                new_result = ie_result.copy()
+                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
+                          'entries', 'urlhandle', 'ie_key', 'duration',
+                          'subtitles', 'annotations', 'format',
+                          'thumbnail', 'thumbnails'):
+                    if f in new_result:
+                        del new_result[f]
+                    if f in embedded_info:
+                        new_result[f] = embedded_info[f]
+                return new_result
+            new_result = make_result(info)
+
+            assert new_result.get('_type') != 'url_transparent'
+            if new_result.get('_type') == 'compat_list':
+                new_result['entries'] = [
+                    make_result(e) for e in new_result['entries']]
+
+            return self.process_ie_result(
+                new_result, download=download, extra_info=extra_info)
+        elif result_type == 'playlist':
            # We process each entry in the playlist
            playlist = ie_result.get('title', None) or ie_result.get('id', None)
            self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -969,7 +999,10 @@ class YoutubeDL(object):
                proxy_map.update(handler.proxies)
        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')

-    def _setup_opener(self, timeout=20):
+    def _setup_opener(self):
+        timeout_val = self.params.get('socket_timeout')
+        timeout = 600 if timeout_val is None else float(timeout_val)
+
        opts_cookiefile = self.params.get('cookiefile')
        opts_proxy = self.params.get('proxy')

--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -36,6 +36,7 @@ __authors__  = (
    'Marcin Cieślak',
    'Anton Larionov',
    'Takuya Tsuchida',
+    'Sergey M.',
 )

 __license__ = 'Public Domain'
@ -80,11 +81,11 @@ from .PostProcessor import (


 def parseOpts(overrideArguments=None):
-    def _readOptions(filename_bytes):
+    def _readOptions(filename_bytes, default=[]):
        try:
            optionf = open(filename_bytes)
        except IOError:
-            return [] # silently skip if file is not present
+            return default  # silently skip if file is not present
        try:
            res = []
            for l in optionf:
@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None):
    general.add_option('--extractor-descriptions',
            action='store_true', dest='list_extractor_descriptions',
            help='Output descriptions of all supported extractors', default=False)
-    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
+    general.add_option(
+        '--proxy', dest='proxy', default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
    general.add_option(
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@ -198,6 +201,9 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
        help='Disable filesystem caching')
+    general.add_option(
+        '--socket-timeout', dest='socket_timeout',
+        type=float, default=None, help=optparse.SUPPRESS_HELP)


    selection.add_option('--playlist-start',
@ -220,7 +226,7 @@ def parseOpts(overrideArguments=None):
                         default=None, type=int)
    selection.add_option('--download-archive', metavar='FILE',
                         dest='download_archive',
-                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')
+                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')


    authentication.add_option('-u', '--username',
@ -235,7 +241,7 @@ def parseOpts(overrideArguments=None):

    video_format.add_option('-f', '--format',
            action='store', dest='format', metavar='FORMAT', default='best',
-            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
+            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
    video_format.add_option('--all-formats',
            action='store_const', dest='format', help='download all available video formats', const='all')
    video_format.add_option('--prefer-free-formats',
@ -317,7 +323,7 @@ def parseOpts(overrideArguments=None):
            help='print downloaded pages to debug problems(very verbose)')
    verbosity.add_option('--write-pages',
            action='store_true', dest='write_pages', default=False,
-            help='Write downloaded pages to files in the current directory')
+            help='Write downloaded intermediary pages to files in the current directory to debug problems')
    verbosity.add_option('--youtube-print-sig-code',
            action='store_true', dest='youtube_print_sig_code', default=False,
            help=optparse.SUPPRESS_HELP)
@ -415,6 +421,8 @@ def parseOpts(overrideArguments=None):
        if opts.verbose:
            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
    else:
+        systemConf = _readOptions('/etc/youtube-dl.conf')
+
        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
        if xdg_config_home:
            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@ -424,8 +432,31 @@ def parseOpts(overrideArguments=None):
            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
            if not os.path.isfile(userConfFile):
                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
-        systemConf = _readOptions('/etc/youtube-dl.conf')
-        userConf = _readOptions(userConfFile)
+        userConf = _readOptions(userConfFile, None)
+
+        if userConf is None:
+            appdata_dir = os.environ.get('appdata')
+            if appdata_dir:
+                userConf = _readOptions(
+                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
+                    default=None)
+                if userConf is None:
+                    userConf = _readOptions(
+                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+                        default=None)
+
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                default=None)
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                default=None)
+
+        if userConf is None:
+            userConf = []
+
        commandLineConf = sys.argv[1:]
        argv = systemConf + userConf + commandLineConf
        opts, args = parser.parse_args(argv)
@ -651,6 +682,8 @@ def _real_main(argv=None):
        'download_archive': opts.download_archive,
        'cookiefile': opts.cookiefile,
        'nocheckcertificate': opts.no_check_certificate,
+        'proxy': opts.proxy,
+        'socket_timeout': opts.socket_timeout,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -21,6 +21,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
+from .clipsyndicate import ClipsyndicateIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@ -55,7 +56,7 @@ from .flickr import FlickrIE
 from .francetv import (
    PluzzIE,
    FranceTvInfoIE,
-    France2IE,
+    FranceTVIE,
    GenerationQuoiIE
 )
 from .freesound import FreesoundIE
@ -71,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ign import IGNIE, OneUPIE
+from .imdb import ImdbIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
@ -100,13 +102,16 @@ from .nbc import NBCNewsIE
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
+from .ninegag import NineGagIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
+from .podomatic import PodomaticIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
+from .pyvideo import PyvideoIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
@ -118,6 +123,11 @@ from .rutube import RutubeIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
+from .smotri import (
+    SmotriIE,
+    SmotriCommunityIE,
+    SmotriUserIE,
+)
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import (
@ -136,6 +146,7 @@ from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
+from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
@ -156,7 +167,13 @@ from .viddler import ViddlerIE
 from .videodetective import VideoDetectiveIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
-from .vimeo import VimeoIE, VimeoChannelIE
+from .vimeo import (
+    VimeoIE,
+    VimeoChannelIE,
+    VimeoUserIE,
+    VimeoAlbumIE,
+    VimeoGroupsIE,
+)
 from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
@ -164,12 +181,17 @@ from .wat import WatIE
 from .websurg import WeBSurgIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
+from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .xtube import XTubeIE
-from .yahoo import YahooIE, YahooSearchIE
+from .yahoo import (
+    YahooIE,
+    YahooNewsIE,
+    YahooSearchIE,
+)
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
 from .youporn import YouPornIE
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@ -13,7 +13,7 @@ from ..utils import (

 class AddAnimeIE(InfoExtractor):

-    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
    IE_NAME = u'AddAnime'
    _TEST = {
        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
        key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
                                      webpage, u'key')

-        webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
+        config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
                                                key)
-        config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))

        video_title = config_xml.find('title').text

--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -10,7 +10,7 @@ from ..utils import (


 class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
    _TEST = {
        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
        u"playlist": [
@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
                })
            formats = sorted(formats, key=lambda f: (f['height'], f['width']))

-            info = {
+            playlist.append({
                '_type': 'video',
                'id': video_id,
                'title': title,
@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
                'upload_date': upload_date,
                'uploader_id': uploader_id,
                'user_agent': 'QuickTime compatible (youtube-dl)',
-            }
-            # TODO: Remove when #980 has been merged
-            info['url'] = formats[-1]['url']
-            info['ext'] = formats[-1]['ext']
-
-            playlist.append(info)
+            })

        return {
            '_type': 'playlist',
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@ -11,7 +11,7 @@ from ..utils import (
 class ArchiveOrgIE(InfoExtractor):
    IE_NAME = 'archive.org'
    IE_DESC = 'archive.org videos'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
    _TEST = {
        u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
        u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
        for f in formats:
            f['ext'] = determine_ext(f['url'])

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
            'description': description,
            'uploader': uploader,
            'upload_date': upload_date,
+            'thumbnail': data.get('misc', {}).get('image'),
        }
-        thumbnail = data.get('misc', {}).get('image')
-        if thumbnail:
-            info['thumbnail'] = thumbnail
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -1,7 +1,6 @@
 # encoding: utf-8
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -18,8 +17,8 @@ from ..utils import (
 # add tests.

 class ArteTvIE(InfoExtractor):
-    _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
-    _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
+    _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
+    _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
    _LIVE_URL = r'index-[0-9]+\.html$'

    IE_NAME = u'arte.tv'
@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
-        ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
-        ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
+        ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
        config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
        """Extract form http://liveweb.arte.tv/"""
        webpage = self._download_webpage(url, name)
        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
-        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
                                            video_id, u'Downloading information')
-        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        event_doc = config_doc.find('event')
        url_node = event_doc.find('video').find('urlHd')
        if url_node is None:
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
            u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
        }
    }
-    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
+    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@ -54,7 +54,7 @@ class BambuserIE(InfoExtractor):

 class BambuserChannelIE(InfoExtractor):
    IE_NAME = u'bambuser:channel'
-    _VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
+    _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
    # The maximum number we can get with each request
    _STEP = 50

--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -4,7 +4,7 @@ from .common import InfoExtractor


 class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'

    _TEST = {
        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import unified_strdate
@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
            webpage = self._download_webpage(url, mobj.group('path'))
            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
-        info_page = self._download_webpage(info_url,video_id, 
+        doc = self._download_xml(info_url,video_id, 
                                           u'Downloading video info')

        self.report_extraction(video_id)
-        doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
        video_info = [video for video in doc if video.find('ID').text == video_id][0]
        infos = video_info.find('INFOS')
        media = video_info.find('MEDIA')
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -12,21 +12,27 @@ class CinemassacreIE(InfoExtractor):
    _TESTS = [{
        u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
        u'file': u'19911.flv',
-        u'md5': u'f9bb7ede54d1229c9846e197b4737e06',
        u'info_dict': {
            u'upload_date': u'20121110',
            u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
            u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
-        }
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
    },
    {
        u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
        u'file': u'521be8ef82b16.flv',
-        u'md5': u'9509ee44dcaa7c1068604817c19a9e50',
        u'info_dict': {
            u'upload_date': u'20131002',
            u'title': u'The Mummy’s Hand (1940)',
-        }
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@ -3,6 +3,7 @@ import time
 import xml.etree.ElementTree

 from .common import InfoExtractor
+from ..utils import ExtractorError


 class ClipfishIE(InfoExtractor):
@ -10,13 +11,14 @@ class ClipfishIE(InfoExtractor):

    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
    _TEST = {
-        u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
-        u'file': u'4028320.f4v',
-        u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
+        u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
+        u'file': u'3966754.mp4',
+        u'md5': u'2521cd644e862936cf2e698206e47385',
        u'info_dict': {
-            u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
-            u'duration': 399,
-        }
+            u'title': u'FIFA 14 - E3 2013 Trailer',
+            u'duration': 82,
+        },
+        u'skip': 'Blocked in the US'
    }

    def _real_extract(self, url):
@ -25,11 +27,14 @@ class ClipfishIE(InfoExtractor):

        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
                    (video_id, int(time.time())))
-        info_xml = self._download_webpage(
+        doc = self._download_xml(
            info_url, video_id, note=u'Downloading info page')
-        doc = xml.etree.ElementTree.fromstring(info_xml)
        title = doc.find('title').text
        video_url = doc.find('filename').text
+        if video_url is None:
+            xml_bytes = xml.etree.ElementTree.tostring(doc)
+            raise ExtractorError(u'Cannot find video URL in document %r' %
+                                 xml_bytes)
        thumbnail = doc.find('imageurl').text
        duration_str = doc.find('duration').text
        m = re.match(
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@ -0,0 +1,52 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    find_xpath_attr,
+)
+
+
+class ClipsyndicateIE(InfoExtractor):
+    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+        u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
+        u'info_dict': {
+            u'id': u'4629301',
+            u'ext': u'mp4',
+            u'title': u'Brick Briscoe',
+            u'duration': 612,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        js_player = self._download_webpage(
+            'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
+            video_id, u'Downlaoding player')
+        # it includes a required token
+        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
+
+        playlist_page = self._download_webpage(
+            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
+            video_id, u'Downloading video info') 
+        # Fix broken xml
+        playlist_page = re.sub('&', '&amp;', playlist_page)
+        pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
+
+        track_doc = pdoc.find('trackList/track')
+        def find_param(name):
+            node = find_xpath_attr(track_doc, './/param', 'name', name)
+            if node is not None:
+                return node.attrib['value']
+
+        return {
+            'id': video_id,
+            'title': find_param('title'),
+            'url': track_doc.find('location').text,
+            'thumbnail': find_param('thumbnail'),
+            'duration': int(find_param('duration')),
+        }
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import determine_ext
@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
        path = mobj.group('path')
        page_title = mobj.group('title')
        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
-        info_xml = self._download_webpage(info_url, page_title)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        info = self._download_xml(info_url, page_title)

        formats = []
        for f in info.findall('files/file'):
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -1,8 +1,7 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
@ -12,8 +11,8 @@ from ..utils import (
 )


-class ComedyCentralIE(MTVIE):
-    _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+class ComedyCentralIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'

    _TEST = {
@ -26,12 +25,6 @@ class ComedyCentralIE(MTVIE):
            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
        },
    }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []
-
-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -158,13 +151,12 @@ class ComedyCentralShowsIE(InfoExtractor):

        uri = mMovieParams[0][1]
        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        indexXml = self._download_webpage(indexUrl, epTitle,
+        idoc = self._download_xml(indexUrl, epTitle,
                                          u'Downloading show index',
                                          u'unable to download episode index')

        results = []

-        idoc = xml.etree.ElementTree.fromstring(indexXml)
        itemEls = idoc.findall('.//item')
        for partNum,itemEl in enumerate(itemEls):
            mediaId = itemEl.findall('./guid')[0].text
@ -175,10 +167,9 @@ class ComedyCentralShowsIE(InfoExtractor):

            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                        compat_urllib_parse.urlencode({'uri': mediaId}))
-            configXml = self._download_webpage(configUrl, epTitle,
+            cdoc = self._download_xml(configUrl, epTitle,
                                               u'Downloading configuration for %s' % shortMediaId)

-            cdoc = xml.etree.ElementTree.fromstring(configXml)
            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
@ -200,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                })

            effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
-            info = {
+            results.append({
                'id': shortMediaId,
                'formats': formats,
                'uploader': showId,
@ -208,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
                'title': effTitle,
                'thumbnail': None,
                'description': compat_str(officialTitle),
-            }
-
-            # TODO: Remove when #980 has been merged
-            info.update(info['formats'][-1])
-
-            results.append(info)
+            })

        return results
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -55,6 +55,9 @@ class InfoExtractor(object):
    subtitles:      The subtitle file contents as a dictionary in the format
                    {language: subtitles}.
    view_count:     How many users have watched the video on the platform.
+    like_count:     Number of positive ratings of the video
+    dislike_count:  Number of negative ratings of the video
+    comment_count:  Number of comments on the video
    urlhandle:      [internal] The urlHandle to be used to download the file,
                    like returned by urllib.request.urlopen
    age_limit:      Age restriction for the video, as an integer (years)
@ -210,7 +213,8 @@ class InfoExtractor(object):
        """ Returns the data of the page as a string """
        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]

-    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+    def _download_xml(self, url_or_request, video_id,
+                      note=u'Downloading XML', errnote=u'Unable to download XML'):
        """Return the xml as an xml.etree.ElementTree.Element"""
        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
@ -363,7 +367,8 @@ class InfoExtractor(object):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
-            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
+            r'''(?ix)<meta
+                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
            html, display_name, fatal=False)

--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@ -6,7 +6,7 @@ from ..utils import (
 )

 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
+    _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
    _TEST = {
        u'url': u'http://www.c-spanvideo.org/program/HolderonV',
        u'file': u'315139.flv',
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -11,6 +11,7 @@ from ..utils import (
    get_element_by_attribute,
    get_element_by_id,
    orderedSet,
+    str_to_int,

    ExtractorError,
 )
@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            self._list_available_subtitles(video_id, webpage)
            return

+        view_count = str_to_int(self._search_regex(
+            r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
+
        return {
            'id':       video_id,
            'formats': formats,
@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            'subtitles':    video_subtitles,
            'thumbnail': info['thumbnail_url'],
            'age_limit': age_limit,
+            'view_count': view_count,
        }

    def _get_available_subtitles(self, video_id, webpage):
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -29,17 +28,16 @@ class DaumIE(InfoExtractor):
        video_id = mobj.group(1)
        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
        webpage = self._download_webpage(canonical_url, video_id)
-        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
+        full_id = self._search_regex(
+            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
            webpage, u'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
-        info_xml = self._download_webpage(
+        info = self._download_xml(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
            u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
            video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))

        self.to_screen(u'%s: Getting video urls' % video_id)
        formats = []
@ -49,10 +47,9 @@ class DaumIE(InfoExtractor):
                'vid': full_id,
                'profile': profile,
            })
-            url_xml = self._download_webpage(
+            url_doc = self._download_xml(
                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
                video_id, note=False)
-            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
            format_url = url_doc.find('result/url').text
            formats.append({
                'url': format_url,
@ -60,7 +57,7 @@ class DaumIE(InfoExtractor):
                'format_id': profile,
            })

-        info = {
+        return {
            'id': video_id,
            'title': info.find('TITLE').text,
            'formats': formats,
@ -69,6 +66,3 @@ class DaumIE(InfoExtractor):
            'duration': int(info.find('DURATION').text),
            'upload_date': info.find('REGDTTM').text[:8],
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@ -1,7 +1,6 @@
 # coding: utf-8

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -12,7 +11,7 @@ from ..utils import (

 class DreiSatIE(InfoExtractor):
    IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
    _TEST = {
        u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
        u'file': u'36983.webm',
@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
-        details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
-        details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
+        details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')

        thumbnail_els = details_doc.findall('.//teaserimage')
        thumbnails = [{
@ -67,7 +65,7 @@ class DreiSatIE(InfoExtractor):
            return (qidx, prefer_http, format['video_bitrate'])
        formats.sort(key=_sortkey)

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': video_title,
@ -78,8 +76,3 @@ class DreiSatIE(InfoExtractor):
            'uploader': video_uploader,
            'upload_date': upload_date,
        }
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import determine_ext
@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        config_xml = self._download_webpage(
+        config = self._download_xml(
            'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        video_url = config.find('file').text

        return {
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@ -10,7 +10,7 @@ from ..utils import (

 class EightTracksIE(InfoExtractor):
    IE_NAME = '8tracks'
-    _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+    _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
    _TEST = {
        u"name": u"EightTracks",
        u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
--- a/youtube_dl/extractor/exfm.py
+++ b/youtube_dl/extractor/exfm.py
@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
    IE_NAME = u'exfm'
    IE_DESC = u'ex.fm'
    _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
-    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
+    _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
    _TESTS = [
        {
            u'url': u'http://ex.fm/song/eh359',
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -10,7 +9,7 @@ from ..utils import (

 class FazIE(InfoExtractor):
    IE_NAME = u'faz.net'
-    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
+    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'

    _TEST = {
        u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
            u'config xml url')
-        config_xml = self._download_webpage(config_xml_url, video_id,
+        config = self._download_xml(config_xml_url, video_id,
            u'Downloading config xml')
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))

        encodings = config.find('ENCODINGS')
        formats = []
@ -46,13 +44,10 @@ class FazIE(InfoExtractor):
            })

        descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
-        info = {
+        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'formats': formats,
            'description': descr,
            'thumbnail': config.find('STILL/STILL_BIG').text,
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@ -12,7 +12,7 @@ from ..utils import (

 class FKTVIE(InfoExtractor):
    IE_NAME = u'fernsehkritik.tv'
-    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'

    _TEST = {
        u'url': u'http://fernsehkritik.tv/folge-1',
@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):

 class FKTVPosteckeIE(InfoExtractor):
    IE_NAME = u'fernsehkritik.tv:postecke'
-    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
    _TEST = {
        u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
        u'file': u'0120.flv',
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 import json

 from .common import InfoExtractor
@ -11,11 +10,10 @@ from ..utils import (

 class FranceTVBaseInfoExtractor(InfoExtractor):
    def _extract_video(self, video_id):
-        xml_desc = self._download_webpage(
+        info = self._download_xml(
            'http://www.francetvinfo.fr/appftv/webservices/video/'
            'getInfosOeuvre.php?id-diffusion='
            + video_id, video_id, 'Downloading XML config')
-        info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))

        manifest_url = info.find('videos/video/url').text
        video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
@ -23,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
        thumbnail_path = info.find('image').text

        return {'id': video_id,
-                'ext': 'mp4',
+                'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
                'url': video_url,
                'title': info.find('titre').text,
                'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
@ -47,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):

 class FranceTvInfoIE(FranceTVBaseInfoExtractor):
    IE_NAME = u'francetvinfo.fr'
-    _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
+    _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'

    _TEST = {
        u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@ -68,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
        return self._extract_video(video_id)


-class France2IE(FranceTVBaseInfoExtractor):
-    IE_NAME = u'france2.fr'
-    _VALID_URL = r'''(?x)https?://www\.france2\.fr/
+class FranceTVIE(FranceTVBaseInfoExtractor):
+    IE_NAME = u'francetv'
+    IE_DESC = u'France 2, 3, 4, 5 and Ô'
+    _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
        (?:
-            emissions/.*?/videos/(?P<id>\d+)
-        |   emission/(?P<key>[^/?]+)
+            emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
+        |   (emissions?|jt)/(?P<key>[^/?]+)
        )'''

-    _TEST = {
-        u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
-        u'file': u'75540104.mp4',
-        u'info_dict': {
-            u'title': u'13h15, le samedi...',
-            u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+    _TESTS = [
+        # france2
+        {
+            u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+            u'file': u'75540104.mp4',
+            u'info_dict': {
+                u'title': u'13h15, le samedi...',
+                u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+            },
+            u'params': {
+                # m3u8 download
+                u'skip_download': True,
+            },
        },
-        u'params': {
-            u'skip_download': True,
+        # france3
+        {
+            u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
+            u'info_dict': {
+                u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
+                u'ext': u'flv',
+                u'title': u'Le scandale du prix des médicaments',
+                u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
+            },
+            u'params': {
+                # rtmp download
+                u'skip_download': True,
+            },
        },
-    }
+        # france4
+        {
+            u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+            u'info_dict': {
+                u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+                u'ext': u'flv',
+                u'title': u'Hero Corp Making of - Extrait 1',
+                u'description': u'md5:c87d54871b1790679aec1197e73d650a',
+            },
+            u'params': {
+                # rtmp download
+                u'skip_download': True,
+            },
+        },
+        # france5
+        {
+            u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
+            u'info_dict': {
+                u'id': u'92837968',
+                u'ext': u'mp4',
+                u'title': u'C à dire ?!',
+                u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
+            },
+            u'params': {
+                # m3u8 download
+                u'skip_download': True,
+            },
+        },
+        # franceo
+        {
+            u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
+            u'info_dict': {
+                u'id': u'92327925',
+                u'ext': u'mp4',
+                u'title': u'Infô-Afrique',
+                u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
+            },
+            u'params': {
+                # m3u8 download
+                u'skip_download': True,
+            },
+            u'skip': u'The id changes frequently',
+        },
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj.group('key'):
            webpage = self._download_webpage(url, mobj.group('key'))
-            video_id = self._html_search_regex(
-                r'''(?x)<div\s+class="video-player">\s*
+            id_res = [
+                (r'''(?x)<div\s+class="video-player">\s*
                    <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
-                    class="francetv-video-player">''',
-                webpage, u'video ID')
+                    class="francetv-video-player">'''),
+                (r'<a id="player_direct" href="http://info\.francetelevisions'
+                 '\.fr/\?id-video=([^"/&]+)'),
+                (r'<a class="video" id="ftv_player_(.+?)"'),
+            ]
+            video_id = self._html_search_regex(id_res, webpage, u'video ID')
        else:
            video_id = mobj.group('id')
        return self._extract_video(video_id)
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@ -4,7 +4,7 @@ from .common import InfoExtractor


 class GamekingsIE(InfoExtractor):
-    _VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
+    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
    _TEST = {
        u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
        u'file': u'20130811.mp4',
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
                'format_id': q,
            })

-        info = {
+        return {
            'id': data_video['guid'],
            'title': compat_urllib_parse.unquote(data_video['title']),
            'formats': formats,
            'description': get_meta_content('description', webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@ -1,13 +1,10 @@
 import re

-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor

-class GametrailersIE(MTVIE):
-    """
-    Gametrailers use the same videos system as MTVIE, it just changes the feed
-    url, where the uri is and the method to get the thumbnails.
-    """
-    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
+
+class GametrailersIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
    _TEST = {
        u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
        u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
            u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
        },
    }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []

    _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'

-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
        #   Site Name | Video Title
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
-        video_title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'video title', default=u'video', flags=re.DOTALL)
+        video_title = self._html_search_regex(
+            r'(?s)<title>(.*?)</title>', webpage, u'video title',
+            default=u'video')
+
+        # video uploader is domain name
+        video_uploader = self._search_regex(
+            r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')

        # Look for BrightCove:
        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
@ -188,13 +193,35 @@ class GenericIE(InfoExtractor):

        # Look for embedded YouTube player
        matches = re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
        if matches:
            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
                     for tuppl in matches]
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)

+        # Look for embedded Dailymotion player
+        matches = re.findall(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
+        if matches:
+            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
+                     for tuppl in matches]
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
+
+        # Look for embedded Wistia player
+        match = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+        if match:
+            return {
+                '_type': 'url_transparent',
+                'url': unescapeHTML(match.group('url')),
+                'ie_key': 'Wistia',
+                'uploader': video_uploader,
+                'title': video_title,
+                'id': video_id,
+            }
+
        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
@ -238,14 +265,9 @@ class GenericIE(InfoExtractor):
        # here's a fun little line of code for you:
        video_id = os.path.splitext(video_id)[0]

-        # video uploader is domain name
-        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
-            url, u'video uploader')
-
        return {
            'id':       video_id,
            'url':      video_url,
            'uploader': video_uploader,
-            'upload_date':  None,
            'title':    video_title,
        }
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
        u'file': u'1435540.mp3',
        u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
        u'info_dict': {
-            u"title": u"Freddie Gibbs - Lay It Down"
+            u"title": u'Freddie Gibbs "Lay It Down"'
        }
    }

--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""

-    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@ -0,0 +1,57 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    get_element_by_attribute,
+)
+
+
+class ImdbIE(InfoExtractor):
+    IE_NAME = u'imdb'
+    IE_DESC = u'Internet Movie Database trailers'
+    _VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
+        u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
+        u'info_dict': {
+            u'id': u'2524815897',
+            u'ext': u'mp4',
+            u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
+            u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url,video_id)
+        descr = get_element_by_attribute('itemprop', 'description', webpage)
+        available_formats = re.findall(
+            r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
+            flags=re.MULTILINE)
+        formats = []
+        for f_id, f_path in available_formats:
+            f_path = f_path.strip()
+            format_page = self._download_webpage(
+                compat_urlparse.urljoin(url, f_path),
+                u'Downloading info for %s format' % f_id)
+            json_data = self._search_regex(
+                r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
+                format_page, u'json data', flags=re.DOTALL)
+            info = json.loads(json_data)
+            format_info = info['videoPlayerObject']['video']
+            formats.append({
+                'format_id': f_id,
+                'url': format_info['url'],
+            })
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'description': descr,
+            'thumbnail': format_info['slate'],
+        }
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -3,7 +3,7 @@ import re
 from .common import InfoExtractor

 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
+    _VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
    _TEST = {
        u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
        u'file': u'aye83DjauH.mp4',
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
        video_id = query_dic['publishedid'][0]
        url = self._build_url(query)

-        flashconfiguration_xml = self._download_webpage(url, video_id,
+        flashconfiguration = self._download_xml(url, video_id,
            u'Downloading flash configuration')
-        flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
        file_url = flashconfiguration.find('file').text
        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
        # Replace some of the parameters in the query to get the best quality
@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
        file_url = re.sub(r'(?<=\?)(.+)$',
            lambda m: self._clean_query(m.group()),
            file_url)
-        info_xml = self._download_webpage(file_url, video_id,
+        info = self._download_xml(file_url, video_id,
            u'Downloading video info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        item = info.find('channel/item')

        def _bp(p):
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@ -2,7 +2,6 @@

 import json
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
            xml_link, u'video ID')

-        xml_config = self._download_webpage(
+        config = self._download_xml(
            xml_link, title, u'Downloading XML config')
-        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info_json = self._search_regex(
-            r'(?sm)<format\.json>(.*?)</format\.json>',
-            xml_config, u'JSON information')
+        info_json = config.find('format.json').text
        info = json.loads(info_json)['versions'][0]
        
        video_url = 'http://video720.jeuxvideo.com/' + info['file']
--- a/youtube_dl/extractor/jukebox.py
+++ b/youtube_dl/extractor/jukebox.py
@ -8,7 +8,7 @@ from ..utils import (
 )

 class JukeboxIE(InfoExtractor):
-    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
+    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@ -1,7 +1,6 @@
 import json
 import os
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
            archive_id = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            chapter_info_xml = self._download_webpage(api, chapter_id,
+            doc = self._download_xml(api, chapter_id,
                                             note=u'Downloading chapter information',
                                             errnote=u'Chapter information download failed')
-            doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@ -8,7 +8,7 @@ from ..utils import (

 class LiveLeakIE(InfoExtractor):

-    _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
+    _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
    IE_NAME = u'liveleak'
    _TEST = {
        u'url': u'http://www.liveleak.com/view?i=757_1364311680',
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -12,7 +11,7 @@ from ..utils import (

 class LivestreamIE(InfoExtractor):
    IE_NAME = u'livestream'
-    _VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
+    _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
    _TEST = {
        u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
        u'file': u'4719370.mp4',
@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
        user = mobj.group('user')
        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)

-        api_response = self._download_webpage(api_url, video_id)
-        info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
+        info = self._download_xml(api_url, video_id)
        item = info.find('channel').find('item')
        ns = {'media': 'http://search.yahoo.com/mrss'}
        thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor):
            u'age_limit': 18,
        },
    },
+    # cbs video
+    {
+        u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
+        u'info_dict': {
+            u'id': u'0rOxMBabDXN6',
+            u'ext': u'flv',
+            u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
+            u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
+            u'duration': 129,
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    },
    ]


@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor):

        video_id = mobj.group(1)

-        # Check if video comes from YouTube
-        mobj2 = re.match(r'^yt-(.*)$', video_id)
-        if mobj2 is not None:
-            return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
+        # the video may come from an external site
+        m_external = re.match('^(\w{2})-(.*)$', video_id)
+        if m_external is not None:
+            prefix, ext_id = m_external.groups()
+            # Check if video comes from YouTube
+            if prefix == 'yt':
+                return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
+            # CBS videos use theplatform.com
+            if prefix == 'cb':
+                return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')

        # Retrieve video webpage to extract further information
        req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
            webpage, u'description', flags=re.DOTALL)

-        info = {
+        return {
            'id': video_id,
            'title': clip.find('title').text,
            'formats': formats,
            'description': description,
            'duration': int(clip.find('duration').text),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -10,35 +10,8 @@ from ..utils import (
 def _media_xml_tag(tag):
    return '{http://search.yahoo.com/mrss/}%s' % tag

-class MTVIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
-
-    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
-
-    _TESTS = [
-        {
-            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            u'file': u'853555.mp4',
-            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
-            u'info_dict': {
-                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
-                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
-            },
-        },
-        {
-            u'add_ie': ['Vevo'],
-            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            u'file': u'USCJY1331283.mp4',
-            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
-            u'info_dict': {
-                u'title': u'Everything Has Changed',
-                u'upload_date': u'20130606',
-                u'uploader': u'Taylor Swift',
-            },
-            u'skip': u'VEVO is only available in some countries',
-        },
-    ]

+class MTVServicesInfoExtractor(InfoExtractor):
    @staticmethod
    def _id_from_uri(uri):
        return uri.split(':')[-1]
@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
        return base + m.group('finalid')

    def _get_thumbnail_url(self, uri, itemdoc):
-        return 'http://mtv.mtvnimages.com/uri/' + uri
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        thumb_node = itemdoc.find(search_path)
+        if thumb_node is None:
+            return None
+        else:
+            return thumb_node.attrib['url']

    def _extract_video_formats(self, metadataXml):
        if '/error_country_block.swf' in metadataXml:
@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
        else:
            description = None

-        info = {
+        return {
            'title': itemdoc.find('title').text,
            'formats': self._extract_video_formats(mediagen_page),
            'id': video_id,
@ -101,19 +79,46 @@ class MTVIE(InfoExtractor):
            'description': description,
        }

-        # TODO: Remove when #980 has been merged
-        info.update(info['formats'][-1])
-
-        return info
-
    def _get_videos_info(self, uri):
        video_id = self._id_from_uri(uri)
        data = compat_urllib_parse.urlencode({'uri': uri})
-        infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
+        idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
                                         u'Downloading info')
-        idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
        return [self._get_video_info(item) for item in idoc.findall('.//item')]

+
+class MTVIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
+
+    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+    _TESTS = [
+        {
+            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+            u'file': u'853555.mp4',
+            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
+            u'info_dict': {
+                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
+                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+            },
+        },
+        {
+            u'add_ie': ['Vevo'],
+            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
+            u'file': u'USCJY1331283.mp4',
+            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
+            u'info_dict': {
+                u'title': u'Everything Has Changed',
+                u'upload_date': u'20130606',
+                u'uploader': u'Taylor Swift',
+            },
+            u'skip': u'VEVO is only available in some countries',
+        },
+    ]
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        return 'http://mtv.mtvnimages.com/uri/' + uri
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
--- a/youtube_dl/extractor/muzu.py
+++ b/youtube_dl/extractor/muzu.py
@ -9,7 +9,7 @@ from ..utils import (


 class MuzuTVIE(InfoExtractor):
-    _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
    IE_NAME = u'muzu.tv'

    _TEST = {
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@ -1,5 +1,4 @@
 import os.path
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -10,7 +9,7 @@ from ..utils import (


 class MySpassIE(InfoExtractor):
-    _VALID_URL = r'http://www.myspass.de/.*'
+    _VALID_URL = r'http://www\.myspass\.de/.*'
    _TEST = {
        u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
        u'file': u'11741.mp4',
@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):

        # get metadata
        metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata_text = self._download_webpage(metadata_url, video_id)
-        metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
+        metadata = self._download_xml(metadata_url, video_id)

        # extract values from metadata
        url_flv_el = metadata.find('url_flv')
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
            'protocol': 'p2p',
            'inKey': key,
        })
-        info_xml = self._download_webpage(
+        info = self._download_xml(
            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
            video_id, u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
            video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))

        formats = []
        for format_el in urls.findall('EncodingOptions/EncodingOption'):
@ -59,7 +56,7 @@ class NaverIE(InfoExtractor):
                'height': int(format_el.find('height').text),
            })

-        info = {
+        return {
            'id': video_id,
            'title': info.find('Subject').text,
            'formats': formats,
@ -68,6 +65,3 @@ class NaverIE(InfoExtractor):
            'upload_date': info.find('WriteDate').text.replace('.', ''),
            'view_count': int(info.find('PlayCount').text),
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = all_info.find('video')

        return {'id': video_id,
                'title': info.find('headline').text,
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
        })
        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_response = self._download_webpage(path_url, video_id,
+        path_doc = self._download_xml(path_url, video_id,
            u'Downloading final video url')
-        path_doc = xml.etree.ElementTree.fromstring(path_response)
        video_url = path_doc.find('path').text

        join = compat_urlparse.urljoin
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -2,7 +2,6 @@

 import re
 import socket
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
        # the cookies in order to be able to download the info webpage
        self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)

-        video_info_webpage = self._download_webpage(
+        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
            note=u'Downloading video info page')

@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]

        # Start extracting information
-        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
        video_title = video_info.find('.//title').text
        video_extension = video_info.find('.//movie_type').text
        video_format = video_extension.upper()
@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
        video_uploader = video_uploader_id
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
-            user_info_webpage = self._download_webpage(
+            user_info = self._download_xml(
                url, video_id, note=u'Downloading user information')
+            video_uploader = user_info.find('.//nickname').text
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
-        else:
-            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
-            video_uploader = user_info.find('.//nickname').text

        return {
            'id':          video_id,
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@ -0,0 +1,43 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class NineGagIE(InfoExtractor):
+    IE_NAME = '9gag'
+    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+
+    _TEST = {
+        u"url": u"http://9gag.tv/v/1912",
+        u"file": u"1912.mp4",
+        u"info_dict": {
+            u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+            u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+        },
+        u'add_ie': [u'Youtube']
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        data_json = self._html_search_regex(r'''(?x)
+            <div\s*id="tv-video"\s*data-video-source="youtube"\s*
+                data-video-meta="([^"]+)"''', webpage, u'video metadata')
+
+        data = json.loads(data_json)
+
+        return {
+            '_type': 'url_transparent',
+            'url': data['youtubeVideoId'],
+            'ie_key': 'Youtube',
+            'id': video_id,
+            'title': data['title'],
+            'description': data['description'],
+            'view_count': int(data['view_count']),
+            'like_count': int(data['statistic']['like']),
+            'dislike_count': int(data['statistic']['dislike']),
+            'thumbnail': data['thumbnail_url'],
+        }
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -12,7 +12,7 @@ from ..utils import (
 )

 class ORFIE(InfoExtractor):
-    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
+    _VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -5,7 +5,7 @@ from .common import InfoExtractor


 class PBSIE(InfoExtractor):
-    _VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
+    _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'

    _TEST = {
        u'url': u'http://video.pbs.org/video/2365006249/',
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@ -0,0 +1,49 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class PodomaticIE(InfoExtractor):
+    IE_NAME = 'podomatic'
+    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
+
+    _TEST = {
+        u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+        u"file": u"2009-01-02T16_03_35-08_00.mp3",
+        u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
+        u"info_dict": {
+            u"uploader": u"Science Teaching Tips",
+            u"uploader_id": u"scienceteachingtips",
+            u"title": u"64.  When the Moon Hits Your Eye",
+            u"duration": 446,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        channel = mobj.group('channel')
+
+        json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
+                     '?permalink=true&rtmp=0') %
+                    (mobj.group('proto'), channel, video_id))
+        data_json = self._download_webpage(
+            json_url, video_id, note=u'Downloading video info')
+        data = json.loads(data_json)
+
+        video_url = data['downloadLink']
+        uploader = data['podcast']
+        title = data['title']
+        thumbnail = data['imageLocation']
+        duration = int(data['length'] / 1000.0)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'uploader': uploader,
+            'uploader_id': channel,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -0,0 +1,51 @@
+import re
+import os
+
+from .common import InfoExtractor
+
+
+class PyvideoIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+    _TESTS = [{
+        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+        u'file': u'24_4WWkSmNo.mp4',
+        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
+        u'info_dict': {
+            u"title": u"Become a logging expert in 30 minutes",
+            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
+            u"upload_date": u"20130320",
+            u"uploader": u"NextDayVideo",
+            u"uploader_id": u"NextDayVideo",
+        },
+        u'add_ie': ['Youtube'],
+    },
+    {
+        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
+        u'info_dict': {
+            u'id': u'2542',
+            u'ext': u'm4v',
+            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        },
+    },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
+
+        if m_youtube is not None:
+            return self.url_result(m_youtube.group(1), 'Youtube')
+
+        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
+            webpage, u'title', flags=re.DOTALL)
+        video_url = self._search_regex([r'<source src="(.*?)"',
+            r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, u'video url', flags=re.DOTALL)
+        return {
+            'id': video_id,
+            'title': os.path.splitext(title)[0],
+            'url': video_url,
+        }
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
            r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')

        video_title = self._html_search_regex(
-            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
            webpage, u'title')

        # No self-labeling, but they describe themselves as
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -11,7 +11,7 @@ from ..utils import (


 class RutubeIE(InfoExtractor):
-    _VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
+    _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'

    _TEST = {
        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@ -1,7 +1,6 @@
 # coding: utf-8

 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):

    def _extract_video(self, video_id):
        data = compat_urllib_parse.urlencode({'vid': video_id})
-        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
            video_id, u'Downloading video url')
        image_page = self._download_webpage(
            'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
            video_id, u'Downloading thumbnail info')
-        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))

        return {'id': video_id,
                'url': url_doc.find('./durl/url').text,
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@ -4,7 +4,7 @@ from .common import InfoExtractor


 class SlashdotIE(InfoExtractor):
-    _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
+    _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'

    _TEST = {
        u'add_ie': ['Ooyala'],
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -0,0 +1,251 @@
+# encoding: utf-8
+
+import re
+import json
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError
+)
+
+
+class SmotriIE(InfoExtractor):
+    IE_DESC = u'Smotri.com'
+    IE_NAME = u'smotri'
+    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
+
+    _TESTS = [
+        # real video id 2610366
+        {
+            u'url': u'http://smotri.com/video/view/?id=v261036632ab',
+            u'file': u'v261036632ab.mp4',
+            u'md5': u'2a7b08249e6f5636557579c368040eb9',
+            u'info_dict': {
+                u'title': u'катастрофа с камер видеонаблюдения',
+                u'uploader': u'rbc2008',
+                u'uploader_id': u'rbc08',
+                u'upload_date': u'20131118',
+                u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
+                u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
+            },
+        },
+        # real video id 57591
+        {
+            u'url': u'http://smotri.com/video/view/?id=v57591cb20',
+            u'file': u'v57591cb20.flv',
+            u'md5': u'830266dfc21f077eac5afd1883091bcd',
+            u'info_dict': {
+                u'title': u'test',
+                u'uploader': u'Support Photofile@photofile',
+                u'uploader_id': u'support-photofile',
+                u'upload_date': u'20070704',
+                u'description': u'test, видео test',
+                u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
+            },
+        },
+        # video-password
+        {
+            u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
+            u'file': u'v1390466a13c.mp4',
+            u'md5': u'f6331cef33cad65a0815ee482a54440b',
+            u'info_dict': {
+                u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+                u'uploader': u'timoxa40',
+                u'uploader_id': u'timoxa40',
+                u'upload_date': u'20100404',
+                u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
+                u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
+            },
+            u'params': {
+                u'videopassword': u'qwerty',
+            },
+        },
+        # age limit + video-password
+        {
+            u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
+            u'file': u'v15408898bcf.flv',
+            u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
+            u'info_dict': {
+                u'title': u'этот ролик не покажут по ТВ',
+                u'uploader': u'zzxxx',
+                u'uploader_id': u'ueggb',
+                u'upload_date': u'20101001',
+                u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
+                u'age_limit': 18,
+                u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
+            },
+            u'params': {
+                u'videopassword': u'333'
+            }
+        }
+    ]
+    
+    _SUCCESS = 0
+    _PASSWORD_NOT_VERIFIED = 1
+    _PASSWORD_DETECTED = 2
+    _VIDEO_NOT_FOUND = 3
+
+    def _search_meta(self, name, html, display_name=None):
+        if display_name is None:
+            display_name = name
+        return self._html_search_regex(
+            r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
+            html, display_name, fatal=False)
+        return self._html_search_meta(name, html, display_name)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('videoid')
+        real_video_id = mobj.group('realvideoid')
+
+        # Download video JSON data
+        video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
+        video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
+        video_json = json.loads(video_json_page)
+        
+        status = video_json['status']
+        if status == self._VIDEO_NOT_FOUND:
+            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+        elif status == self._PASSWORD_DETECTED:  # The video is protected by a password, retry with
+                                                # video-password set
+            video_password = self._downloader.params.get('videopassword', None)
+            if not video_password:
+                raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
+            video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
+            video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
+            video_json = json.loads(video_json_page)
+            status = video_json['status']
+            if status == self._PASSWORD_NOT_VERIFIED:
+                raise ExtractorError(u'Video password is invalid', expected=True)
+        
+        if status != self._SUCCESS:
+            raise ExtractorError(u'Unexpected status value %s' % status)
+        
+        # Extract the URL of the video
+        video_url = video_json['file_data']
+        
+        # Video JSON does not provide enough meta data
+        # We will extract some from the video web page instead
+        video_page_url = 'http://' + mobj.group('url')
+        video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
+        
+        # Adult content
+        if re.search(u'EroConfirmText">', video_page) is not None:
+            self.report_age_confirmation()
+            confirm_string = self._html_search_regex(
+                r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
+                video_page, u'confirm string')
+            confirm_url = video_page_url + '&confirm=%s' % confirm_string
+            video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
+            adult_content = True
+        else:
+            adult_content = False
+        
+        # Extract the rest of meta data
+        video_title = self._search_meta(u'name', video_page, u'title')
+        if not video_title:
+            video_title = video_url.rsplit('/', 1)[-1]
+
+        video_description = self._search_meta(u'description', video_page)
+        END_TEXT = u' на сайте Smotri.com'
+        if video_description.endswith(END_TEXT):
+            video_description = video_description[:-len(END_TEXT)]
+        START_TEXT = u'Смотреть онлайн ролик '
+        if video_description.startswith(START_TEXT):
+            video_description = video_description[len(START_TEXT):]
+        video_thumbnail = self._search_meta(u'thumbnail', video_page)
+
+        upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
+        upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
+        video_upload_date = (
+            (
+                upload_date_m.group('year') +
+                upload_date_m.group('month') +
+                upload_date_m.group('day')
+            )
+            if upload_date_m else None
+        )
+        
+        duration_str = self._search_meta(u'duration', video_page)
+        duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
+        video_duration = (
+            (
+                (int(duration_m.group('hours')) * 60 * 60) +
+                (int(duration_m.group('minutes')) * 60) +
+                int(duration_m.group('seconds'))
+            )
+            if duration_m else None
+        )
+        
+        video_uploader = self._html_search_regex(
+            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
+            video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
+        
+        video_uploader_id = self._html_search_regex(
+            u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
+            video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
+        
+        video_view_count = self._html_search_regex(
+            u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
+            video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
+                
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
+            'description': video_description,
+            'uploader': video_uploader,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'video_duration': video_duration,
+            'view_count': video_view_count,
+            'age_limit': 18 if adult_content else 0,
+            'video_page_url': video_page_url
+        }
+
+
+class SmotriCommunityIE(InfoExtractor):
+    IE_DESC = u'Smotri.com community videos'
+    IE_NAME = u'smotri:community'
+    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        community_id = mobj.group('communityid')
+
+        url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
+        rss = self._download_xml(url, community_id, u'Downloading community RSS')
+
+        entries = [self.url_result(video_url.text, 'Smotri')
+                   for video_url in rss.findall('./channel/item/link')]
+
+        description_text = rss.find('./channel/description').text
+        community_title = self._html_search_regex(
+            u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
+
+        return self.playlist_result(entries, community_id, community_title)
+
+
+class SmotriUserIE(InfoExtractor):
+    IE_DESC = u'Smotri.com user videos'
+    IE_NAME = u'smotri:user'
+    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user_id = mobj.group('userid')
+
+        url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
+        rss = self._download_xml(url, user_id, u'Downloading user RSS')
+
+        entries = [self.url_result(video_url.text, 'Smotri')
+                   for video_url in rss.findall('./channel/item/link')]
+
+        description_text = rss.find('./channel/description').text
+        user_nickname = self._html_search_regex(
+            u'^Видео режиссера (.*)$', description_text,
+            u'user nickname')
+
+        return self.playlist_result(entries, user_id, user_nickname)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
    _VALID_URL = r'''^(?:https?://)?
                    (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
-                       |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
+                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
                    )
                    '''
    IE_NAME = u'soundcloud'
@ -217,7 +217,7 @@ class SoundcloudSetIE(SoundcloudIE):


 class SoundcloudUserIE(SoundcloudIE):
-    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
    IE_NAME = u'soundcloud:user'

    # it's in tests/test_playlists.py
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@ -1,15 +1,14 @@
 import re

-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor


-class SouthParkStudiosIE(MTVIE):
+class SouthParkStudiosIE(MTVServicesInfoExtractor):
    IE_NAME = u'southparkstudios.com'
    _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'

    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'

-    # Overwrite MTVIE properties we don't want
    _TESTS = [{
        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
        },
    }]

-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        thumb_node = itemdoc.find(search_path)
-        if thumb_node is None:
-            return None
-        else:
-            return thumb_node.attrib['url']
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        url = u'http://www.' + mobj.group(u'url')
--- a/youtube_dl/extractor/space.py
+++ b/youtube_dl/extractor/space.py
@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError


 class SpaceIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
    _TEST = {
        u'add_ie': ['Brightcove'],
        u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
            r'<div class="module-title">(.*?)</div>', webpage, u'title')

        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
-        xml_code = self._download_webpage(
+        idoc = self._download_xml(
            xml_url, video_id,
            note=u'Downloading XML', errnote=u'Failed to download XML')

-        idoc = xml.etree.ElementTree.fromstring(xml_code)
-
        formats = [
            {
                'format_id': n.tag.rpartition('type')[2],
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -18,7 +18,7 @@ from ..utils import (
 class StanfordOpenClassroomIE(InfoExtractor):
    IE_NAME = u'stanfordoc'
    IE_DESC = u'Stanford Open ClassRoom'
-    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
+    _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    _TEST = {
        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
        u'file': u'PracticalUnix_intro-environment.mp4',
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
        self.report_extraction(video_id)

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
-        data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
+        data = self._download_xml(data_url, video_id, 'Downloading data webpage')


        qualities = ['500k', '480p', '1000k', '720p', '1080p']
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -7,7 +7,7 @@ from .common import InfoExtractor

 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
    _TEST = {
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
        u'file': u'10635995.mp4',
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -0,0 +1,68 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    xpath_with_ns,
+)
+
+_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
+
+
+class ThePlatformIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
+
+    _TEST = {
+        # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
+        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
+        u'info_dict': {
+            u'id': u'e9I_cZgTgIPd',
+            u'ext': u'flv',
+            u'title': u'Blackberry\'s big, bold Z30',
+            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
+            u'duration': 247,
+        },
+        u'params': {
+            # rtmp download
+            u'skip_download': True,
+        },
+    }
+
+    def _get_info(self, video_id):
+        smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
+            'format=smil&mbr=true'.format(video_id))
+        meta = self._download_xml(smil_url, video_id)
+        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
+        info_json = self._download_webpage(info_url, video_id)
+        info = json.loads(info_json)
+
+        head = meta.find(_x('smil:head'))
+        body = meta.find(_x('smil:body'))
+        base_url = head.find(_x('smil:meta')).attrib['base']
+        switch = body.find(_x('smil:switch'))
+        formats = []
+        for f in switch.findall(_x('smil:video')):
+            attr = f.attrib
+            formats.append({
+                'url': base_url,
+                'play_path': 'mp4:' + attr['src'],
+                'ext': 'flv',
+                'width': int(attr['width']),
+                'height': int(attr['height']),
+                'vbr': int(attr['system-bitrate']),
+            })
+        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'formats': formats,
+            'description': info['description'],
+            'thumbnail': info['defaultThumbnailUrl'],
+            'duration': info['duration']//1000,
+        }
+        
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        return self._get_info(video_id)
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@ -1,6 +1,5 @@
 # coding: utf-8
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')

        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
-        streams_webpage = self._download_webpage(
+        streams_doc = self._download_xml(
            streams_url, video_id, note=u'Downloading stream list')

-        streams_doc = xml.etree.ElementTree.fromstring(
-            streams_webpage.encode('utf-8'))
        video_url = next(n.text
                         for n in streams_doc.findall('.//choice/url')
                         if u'//ad.doubleclick' not in n.text)
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@ -1,6 +1,5 @@
 import json
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor

@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):

        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
                      u'video-formats2' % log)
-        format_str = self._download_webpage(
+        format_doc = self._download_xml(
            format_url, video_id,
            note=u'Downloading formats',
            errnote=u'Error while downloading formats')
-
-        format_doc = xml.etree.ElementTree.fromstring(format_str)
 
        video_url_template = (
            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
@ -58,7 +55,7 @@ class TriluliluIE(InfoExtractor):
            for fnode in format_doc.findall('./formats/format')
        ]

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'formats': formats,
@ -67,7 +64,3 @@ class TriluliluIE(InfoExtractor):
            'thumbnail': thumbnail,
        }

-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@ -3,7 +3,7 @@ import re
 from .common import InfoExtractor

 class UnistraIE(InfoExtractor):
-    _VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
+    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'

    _TEST = {
        u'url': u'http://utv.unistra.fr/video.php?id_video=154',
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@ -9,7 +9,7 @@ from ..utils import (
 )

 class VeeHDIE(InfoExtractor):
-    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'

    _TEST = {
        u'url': u'http://veehd.com/video/4686958',
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
-    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
+    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
            u"upload_date": u"20130624",
            u"uploader": u"Hurts",
            u"title": u"Somebody to Die For",
-            u"duration": 230,
+            u"duration": 230.12,
            u"width": 1920,
            u"height": 1080,
        }
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -6,7 +6,7 @@ from ..utils import ExtractorError


 class ViceIE(InfoExtractor):
-    _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
+    _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'

    _TEST = {
        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@ -2,13 +2,10 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-)


 class ViddlerIE(InfoExtractor):
-    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
    _TEST = {
        u"url": u"http://www.viddler.com/v/43903784",
        u'file': u'43903784.mp4',
@ -47,7 +44,7 @@ class ViddlerIE(InfoExtractor):
            r"thumbnail\s*:\s*'([^']*)'",
            webpage, u'thumbnail', fatal=False)

-        info = {
+        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
@ -56,9 +53,3 @@ class ViddlerIE(InfoExtractor):
            'duration': duration,
            'formats': formats,
        }
-
-        # TODO: Remove when #980 has been merged
-        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
-        info.update(info['formats'][-1])
-
-        return info
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@ -8,7 +7,7 @@ from ..utils import (
 )

 class VideofyMeIE(InfoExtractor):
-    _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
+    _VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
    IE_NAME = u'videofy.me'

    _TEST = {
@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
+        config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
                                            video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        video = config.find('video')
        sources = video.find('sources')
        url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) 
--- a/youtube_dl/extractor/videopremium.py
+++ b/youtube_dl/extractor/videopremium.py
@ -5,14 +5,16 @@ from .common import InfoExtractor


 class VideoPremiumIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
    _TEST = {
        u'url': u'http://videopremium.tv/4w7oadjsf156',
        u'file': u'4w7oadjsf156.f4v',
-        u'md5': u'e51e4a266aab7531c6ac06f4ffee3b0d',
        u'info_dict': {
            u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
-        }
+        },
+        u'params': {
+            u'skip_download': True,
+        },
    }

    def _real_extract(self, url):
@ -39,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
            'player_url':  "http://videopremium.tv/uplayer/uppod.swf",
            'ext':         'f4v',
            'title':       video_title,
-        }
+        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
    """Information extractor for vimeo.com."""

    # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
    _NETRC_MACHINE = 'vimeo'
    IE_NAME = u'vimeo'
    _TESTS = [
@ -196,6 +196,16 @@ class VimeoIE(InfoExtractor):
        if mobj is not None:
            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)

+        try:
+            view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count'))
+            like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count'))
+            comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count'))
+        except RegexNotFoundError:
+            # This info is only available in vimeo.com/{id} urls
+            view_count = None
+            like_count = None
+            comment_count = None
+
        # Vimeo specific: extract request signature and timestamp
        sig = config['request']['signature']
        timestamp = config['request']['timestamp']
@ -242,6 +252,9 @@ class VimeoIE(InfoExtractor):
            'description':  video_description,
            'formats': formats,
            'webpage_url': url,
+            'view_count': view_count,
+            'like_count': like_count,
+            'comment_count': comment_count,
        }


@ -249,25 +262,77 @@ class VimeoChannelIE(InfoExtractor):
    IE_NAME = u'vimeo:channel'
    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+    _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel_id =  mobj.group('id')
+    def _page_url(self, base_url, pagenum):
+        return '%s/videos/page:%d/' % (base_url, pagenum)
+
+    def _extract_list_title(self, webpage):
+        return self._html_search_regex(self._TITLE_RE, webpage, u'list title')
+
+    def _extract_videos(self, list_id, base_url):
        video_ids = []
-
        for pagenum in itertools.count(1):
-            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
-                                             channel_id, u'Downloading page %s' % pagenum)
+            webpage = self._download_webpage(
+                self._page_url(base_url, pagenum) ,list_id,
+                u'Downloading page %s' % pagenum)
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break

        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
                   for video_id in video_ids]
-        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
-                                                webpage, u'channel title')
        return {'_type': 'playlist',
-                'id': channel_id,
-                'title': channel_title,
+                'id': list_id,
+                'title': self._extract_list_title(webpage),
                'entries': entries,
                }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel_id =  mobj.group('id')
+        return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+
+
+class VimeoUserIE(VimeoChannelIE):
+    IE_NAME = u'vimeo:user'
+    _VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
+    _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
+
+    @classmethod
+    def suitable(cls, url):
+        if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
+            return False
+        return super(VimeoUserIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+
+
+class VimeoAlbumIE(VimeoChannelIE):
+    IE_NAME = u'vimeo:album'
+    _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)'
+    _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+
+    def _page_url(self, base_url, pagenum):
+        return '%s/page:%d/' % (base_url, pagenum)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        album_id =  mobj.group('id')
+        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+
+
+class VimeoGroupsIE(VimeoAlbumIE):
+    IE_NAME = u'vimeo:group'
+    _VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)'
+
+    def _extract_list_title(self, webpage):
+        return self._og_search_title(webpage)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -11,7 +11,7 @@ from ..utils import (


 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
+    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@ -11,7 +11,8 @@ class WimpIE(InfoExtractor):
        u'file': u'deerfence.flv',
        u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
        u'info_dict': {
-            u"title": u"Watch Till End: Herd of deer jump over a fence."
+            u"title": u"Watch Till End: Herd of deer jump over a fence.",
+            u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
        }
    }

@ -19,18 +20,15 @@ class WimpIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
-        thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
+        title = self._html_search_meta('description', webpage, u'video title')
        googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
        googleString = base64.b64decode(googleString).decode('ascii')
        final_url = self._search_regex('","(.*?)"', googleString,'final video url')
-        ext = final_url.rpartition(u'.')[2]
-
-        return [{
-            'id':        video_id,
-            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
-        }]

+        return {
+            'id': video_id,
+            'url': final_url,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage),
+        }
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@ -0,0 +1,55 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class WistiaIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
+
+    _TEST = {
+        u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt",
+        u"file": u"sh7fpupwlt.mov",
+        u"md5": u"cafeb56ec0c53c18c97405eecb3133df",
+        u"info_dict": {
+            u"title": u"cfh_resourceful_zdkh_final_1"
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        data_json = self._html_search_regex(
+            r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data')
+
+        data = json.loads(data_json)
+
+        formats = []
+        thumbnails = []
+        for atype, a in data['assets'].items():
+            if atype == 'still':
+                thumbnails.append({
+                    'url': a['url'],
+                    'resolution': '%dx%d' % (a['width'], a['height']),
+                })
+                continue
+            if atype == 'preview':
+                continue
+            formats.append({
+                'format_id': atype,
+                'url': a['url'],
+                'width': a['width'],
+                'height': a['height'],
+                'filesize': a['size'],
+                'ext': a['ext'],
+            })
+        formats.sort(key=lambda a: a['filesize'])
+
+        return {
+            'id': video_id,
+            'title': data['name'],
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -46,7 +46,7 @@ class XHamsterIE(InfoExtractor):
                return mobj.group('server')+'/key='+mobj.group('file')

        def is_hd(webpage):
-            return webpage.find('<div class=\'icon iconHD\'>') != -1
+            return webpage.find('<div class=\'icon iconHD\'') != -1

        mobj = re.match(self._VALID_URL, url)

--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -17,27 +17,21 @@ class YahooIE(InfoExtractor):
    _TESTS = [
        {
            u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-            u'file': u'214727115.flv',
+            u'file': u'214727115.mp4',
+            u'md5': u'4962b075c08be8690a922ee026d05e69',
            u'info_dict': {
                u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
                u'description': u'Julian and Travis watch Julian Smith',
            },
-            u'params': {
-                # Requires rtmpdump
-                u'skip_download': True,
-            },
        },
        {
            u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
-            u'file': u'103000935.flv',
+            u'file': u'103000935.mp4',
+            u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
            u'info_dict': {
                u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
                u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
            },
-            u'params': {
-                # Requires rtmpdump
-                u'skip_download': True,
-            },
        },
    ]

@ -46,15 +40,19 @@ class YahooIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

-        items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
+        items_json = self._search_regex(r'mediaItems: ({.*?})$',
            webpage, u'items', flags=re.MULTILINE)
        items = json.loads(items_json)
        info = items['mediaItems']['query']['results']['mediaObj'][0]
        # The 'meta' field is not always in the video webpage, we request it
        # from another page
        long_id = info['id']
+        return self._get_info(long_id, video_id)
+
+    def _get_info(self, long_id, video_id):
        query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
-                 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
+                 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
+                 ' AND protocol="http"' % long_id)
        data = compat_urllib_parse.urlencode({
            'q': query,
            'env': 'prod',
@ -91,17 +89,39 @@ class YahooIE(InfoExtractor):
            formats.append(format_info)
        formats = sorted(formats, key=lambda f:(f['height'], f['width']))

-        info = {
+        return {
            'id': video_id,
            'title': meta['title'],
            'formats': formats,
            'description': clean_html(meta['description']),
            'thumbnail': meta['thumbnail'],
        }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])

-        return info
+
+class YahooNewsIE(YahooIE):
+    IE_NAME = 'yahoo:news'
+    _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
+
+    _TEST = {
+        u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
+        u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
+        u'info_dict': {
+            u'id': u'104538833',
+            u'ext': u'mp4',
+            u'title': u'China Moses Is Crazy About the Blues',
+            u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
+        },
+    }
+
+    # Overwrite YahooIE properties we don't want
+    _TESTS = []
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
+        return self._get_info(long_id, video_id)


 class YahooSearchIE(SearchInfoExtractor):
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@ -7,7 +7,7 @@ from ..utils import (


 class YouJizzIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
    _TEST = {
        u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
        u'file': u'2189178.flv',
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -11,7 +11,6 @@ import socket
 import string
 import struct
 import traceback
-import xml.etree.ElementTree
 import zlib

 from .common import InfoExtractor, SearchInfoExtractor
@ -29,6 +28,7 @@ from ..utils import (
    clean_html,
    get_cachedir,
    get_element_by_id,
+    get_element_by_attribute,
    ExtractorError,
    unescapeHTML,
    unified_strdate,
@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u"uploader": u"Philipp Hagemeister",
                u"uploader_id": u"phihag",
                u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
            }
        },
        {
@ -388,10 +388,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        super(YoutubeIE, self).__init__(*args, **kwargs)
        self._player_cache = {}

-    def report_video_webpage_download(self, video_id):
-        """Report attempt to download video webpage."""
-        self.to_screen(u'%s: Downloading video webpage' % video_id)
-
    def report_video_info_webpage_download(self, video_id):
        """Report attempt to download video info webpage."""
        self.to_screen(u'%s: Downloading video info webpage' % video_id)
@ -1144,8 +1140,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'asrs': 1,
            })
            list_url = caption_url + '&' + list_params
-            list_page = self._download_webpage(list_url, video_id)
-            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            caption_list = self._download_xml(list_url, video_id)
            original_lang_node = caption_list.find('track')
            if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@ -1259,15 +1254,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        video_id = self._extract_id(url)

        # Get video webpage
-        self.report_video_webpage_download(video_id)
        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
-        request = compat_urllib_request.Request(url)
-        try:
-            video_webpage_bytes = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
-
-        video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
+        video_webpage = self._download_webpage(url, video_id)

        # Attempt to extract SWF player URL
        mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@ -1367,6 +1355,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        # description
        video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:
+            video_description = re.sub(r'''(?x)
+                <a\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    title="([^"]+)"\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    class="yt-uix-redirect-link"\s*>
+                [^<]+
+                </a>
+            ''', r'\1', video_description)
            video_description = clean_html(video_description)
        else:
            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@ -1375,6 +1372,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            else:
                video_description = u''

+        def _extract_count(klass):
+            count = self._search_regex(r'class="%s">([\d,]+)</span>' % re.escape(klass), video_webpage, klass, fatal=False)
+            if count is not None:
+                return int(count.replace(',', ''))
+            return None
+        like_count = _extract_count(u'likes-count')
+        dislike_count = _extract_count(u'dislikes-count')
+
        # subtitles
        video_subtitles = self.extract_subtitles(video_id, video_webpage)

@ -1507,6 +1512,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'annotations':  video_annotations,
                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
                'view_count': view_count,
+                'like_count': like_count,
+                'dislike_count': dislike_count,
            })
        return results

@ -1521,14 +1528,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                           \? (?:.*?&)*? (?:p|a|list)=
                        |  p/
                        )
-                        ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
                        .*
                     |
-                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
-    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
+    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'

    @classmethod
@ -1539,6 +1546,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    def _real_initialize(self):
        self._login()

+    def _ids_to_results(self, ids):
+        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+
+    def _extract_mix(self, playlist_id):
+        # The mixes are generated from a a single video
+        # the id of the playlist is just 'RD' + video_id
+        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
+        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+            get_element_by_attribute('class', 'title ', webpage))
+        title = clean_html(title_span)
+        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        ids = orderedSet(re.findall(video_re, webpage))
+        url_results = self._ids_to_results(ids)
+
+        return self.playlist_result(url_results, playlist_id, title)
+
    def _real_extract(self, url):
        # Extract playlist id
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -1556,14 +1581,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            else:
                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))

+        if playlist_id.startswith('RD'):
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
        # Extract the video ids from the playlist pages
        ids = []

        for page_num in itertools.count(1):
            url = self._TEMPLATE_URL % (playlist_id, page_num)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
-            # The ids are duplicated
-            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+            matches = re.finditer(self._VIDEO_RE, page)
+            # We remove the duplicates and the link with index 0
+            # (it's not the first video of the playlist)
+            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
            ids.extend(new_ids)

            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
@ -1571,8 +1602,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):

        playlist_title = self._og_search_title(page)

-        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)


@ -1601,10 +1631,11 @@ class YoutubeChannelIE(InfoExtractor):
        video_ids = []
        url = 'https://www.youtube.com/channel/%s/videos' % channel_id
        channel_page = self._download_webpage(url, channel_id)
-        if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
-            autogenerated = True
-        else:
-            autogenerated = False
+        autogenerated = re.search(r'''(?x)
+                class="[^"]*?(?:
+                    channel-header-autogenerated-label|
+                    yt-channel-title-autogenerated
+                )[^"]*"''', channel_page) is not None

        if autogenerated:
            # The videos are contained in a single page
@ -1743,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
        return self.playlist_result(videos, query)

 class YoutubeSearchDateIE(YoutubeSearchIE):
+    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
    _SEARCH_KEY = 'ytsearchdate'
    IE_DESC = u'YouTube.com searches, newest videos first'
@ -1769,7 +1801,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
    """
    _LOGIN_REQUIRED = True
-    _PAGING_STEP = 30
    # use action_load_personal_feed instead of action_load_system_feed
    _PERSONAL_FEED = False

@ -1789,9 +1820,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

    def _real_extract(self, url):
        feed_entries = []
-        # The step argument is available only in 2.7 or higher
-        for i in itertools.count(0):
-            paging = i*self._PAGING_STEP
+        paging = 0
+        for i in itertools.count(1):
            info = self._download_webpage(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
@ -1804,6 +1834,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                for video_id in ids)
            if info['paging'] is None:
                break
+            paging = info['paging']
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)

 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@ -1823,7 +1854,6 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
    _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
    _FEED_NAME = 'watch_later'
    _PLAYLIST_TITLE = u'Youtube Watch Later'
-    _PAGING_STEP = 100
    _PERSONAL_FEED = True

 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
@ -1833,13 +1863,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
    _PERSONAL_FEED = True
    _PLAYLIST_TITLE = u'Youtube Watch History'

-    def _real_extract(self, url):
-        webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
-        data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
-        # The step is actually a ridiculously big number (like 1374343569725646)
-        self._PAGING_STEP = int(data_paging)
-        return super(YoutubeHistoryIE, self)._real_extract(url)
-
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
    IE_NAME = u'youtube:favorites'
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@ -1,24 +1,38 @@
+# coding: utf-8
+
 import operator
 import re

 from .common import InfoExtractor
 from ..utils import (
-    parse_xml_doc,
    unified_strdate,
 )


 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+
+    _TEST = {
+        u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
+        u"file": u"2037704.webm",
+        u"info_dict": {
+            u"upload_date": u"20131127",
+            u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
+            u"uploader": u"spezial",
+            u"title": u"ZDFspezial - Ende des Machtpokers"
+        },
+        u"skip": u"Videos on ZDF.de are depublicised in short order",
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')

        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
-        info_xml = self._download_webpage(
-            xml_url, video_id, note=u'Downloading video info')
-        doc = parse_xml_doc(info_xml)
+        doc = self._download_xml(
+            xml_url, video_id,
+            note=u'Downloading video info',
+            errnote=u'Failed to download video info')

        title = doc.find('.//information/title').text
        description = doc.find('.//information/detail').text
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -17,7 +17,6 @@ import ssl
 import socket
 import sys
 import traceback
-import xml.etree.ElementTree
 import zlib

 try:
@ -562,11 +561,14 @@ def make_HTTPS_handler(opts_no_check_certificate):
        return HTTPSHandlerV3()
    else:
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
-        context.set_default_verify_paths()
-        
        context.verify_mode = (ssl.CERT_NONE
                               if opts_no_check_certificate
                               else ssl.CERT_REQUIRED)
+        context.set_default_verify_paths()
+        try:
+            context.load_default_certs()
+        except AttributeError:
+            pass  # Python < 3.4
        return compat_urllib_request.HTTPSHandler(context=context)

 class ExtractorError(Exception):
@ -1009,11 +1011,6 @@ def unsmuggle_url(smug_url):
    return url, data


-def parse_xml_doc(s):
-    assert isinstance(s, type(u''))
-    return xml.etree.ElementTree.fromstring(s.encode('utf-8'))
-
-
 def format_bytes(bytes):
    if bytes is None:
        return u'N/A'
@ -1026,3 +1023,7 @@ def format_bytes(bytes):
    suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
    converted = float(bytes) / float(1024 ** exponent)
    return u'%.2f%s' % (converted, suffix)
+
+def str_to_int(int_str):
+    int_str = re.sub(r'[,\.]', u'', int_str)
+    return int(int_str)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2013.11.25.3'
+__version__ = '2013.12.08.1'
Author	SHA1	Message	Date
Philipp Hagemeister	22686b91f0	release 2013.12.08.1	2013-12-08 07:32:25 +01:00
Philipp Hagemeister	31812a9e0e	[youtube:channel] Fix automated channel detection	2013-12-08 07:30:42 +01:00
Philipp Hagemeister	11bf848191	[wimp] simplify	2013-12-08 07:22:19 +01:00
Philipp Hagemeister	d4df5ed14c	release 2013.12.08	2013-12-08 06:54:52 +01:00
Philipp Hagemeister	303b479e0a	Automatically load SSL certs on Windows	2013-12-08 06:54:39 +01:00
Philipp Hagemeister	4c52160646	[FileDownloader] Fix progress report on Windows (Fixes #1918 )	2013-12-08 06:53:46 +01:00
Philipp Hagemeister	a213880aaf	Simplify status reporting (#1918 )	2013-12-08 05:49:35 +01:00
Jaime Marquínez Ferrándiz	42d3bf844a	Merge pull request #1919 from rzhxeo/xhamster [XHamsterIE] Fix HD video detection	2013-12-07 14:35:17 -08:00
rzhxeo	b860967ce4	[XHamsterIE] Fix md5 in second test	2013-12-07 22:17:13 +01:00
rzhxeo	8ca6b8fba1	[XHamsterIE] Fix HD video detection	2013-12-07 21:39:32 +01:00
Jaime Marquínez Ferrándiz	c4d9e6731a	[pyvideo] add support for videos that don't come from Youtube	2013-12-07 11:19:59 +01:00
Jaime Marquínez Ferrándiz	0d9ec5d963	[pyvideo] Cleanup and fix test	2013-12-07 11:00:56 +01:00
Jaime Marquínez Ferrándiz	870fc4e578	Merge remote-tracking branch 'gekitsuu/master' (closes PR #1913 )	2013-12-07 10:50:06 +01:00
Adam Glenn	f623530d6e	removing bad VALID_URL	2013-12-06 21:12:10 -08:00
Adam Glenn	ca9e02dc00	Adding pyvideo support	2013-12-06 21:11:01 -08:00
Jaime Marquínez Ferrándiz	fb30ec22fd	[vimeo] Add an extractor for groups	2013-12-06 22:01:41 +01:00
Jaime Marquínez Ferrándiz	5cc14c2fd7	[vimeo] Add an extractor for albums (closes #1911 )	2013-12-06 21:48:44 +01:00
Jaime Marquínez Ferrándiz	d349cd2240	[imdb] Fix extraction The paths to each format's page may have leading whitespace. The height and the duration can't be extracted.	2013-12-06 20:26:55 +01:00
Jaime Marquínez Ferrándiz	0b6a9f639f	[vevo] Update test video's duration	2013-12-06 20:14:29 +01:00
Jaime Marquínez Ferrándiz	715c8e7bdb	[youtube:playlist] Recognize mix ids for direct use (fixes #1295 )	2013-12-06 19:52:41 +01:00
Jaime Marquínez Ferrándiz	7d4afc557f	[youtube:playlist] Support mix ids longer than 13 (#1295 )	2013-12-06 19:48:54 +01:00
Jaime Marquínez Ferrándiz	563e405411	[dailymotion] Fix view count regex In some languages they can be in the format '123,456' instead of '123.456'	2013-12-06 13:41:07 +01:00
Jaime Marquínez Ferrándiz	f53c966a73	[dailymotion] Extract view count (#1895 )	2013-12-06 13:36:36 +01:00
Jaime Marquínez Ferrándiz	336c3a69bd	[youtube] Extract like and dislike count (#1895 )	2013-12-06 13:22:27 +01:00
Jaime Marquínez Ferrándiz	4e76179476	[vimeo] Extract views count, likes count and comments count (#1895 )	2013-12-06 13:03:08 +01:00
Philipp Hagemeister	ef4fd84857	[wistia] Add extractor	2013-12-06 09:15:04 +01:00
Philipp Hagemeister	72135030d1	Merge remote-tracking branch 'origin/master'	2013-12-05 22:30:04 +01:00
Jaime Marquínez Ferrándiz	3514813d5b	[francetv] Add support for urls in the format http://www.france3.fr/emissions/{program}/diffusions/{date} (fixes #1898 )	2013-12-05 21:49:30 +01:00
Jaime Marquínez Ferrándiz	9e60602084	[francetv] Add support for more channels: 3, 4, 5 and Ô (#1898 ) Rename the France2IE extractor to FranceTVIE	2013-12-05 21:48:41 +01:00
Philipp Hagemeister	19e3dfc9f8	[9gag] Like/dislike count (#1895 )	2013-12-05 18:29:07 +01:00
Philipp Hagemeister	a1ef7e85d6	Remove unused imports	2013-12-05 14:31:54 +01:00
Philipp Hagemeister	ef2fac6f4a	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-05 14:29:14 +01:00
Philipp Hagemeister	7fc3fa0545	[9gag] Add extractor	2013-12-05 14:29:08 +01:00
Jaime Marquínez Ferrándiz	673d1273ff	[vevo] Support '/watch/{id}' urls	2013-12-05 12:41:58 +01:00
Jaime Marquínez Ferrándiz	b9a2c53833	[metacafe] Add support for cbs videos (fixes #1838 ) They use theplatform.com	2013-12-04 23:43:50 +01:00
Jaime Marquínez Ferrándiz	e9bf7479d2	Add an extractor for theplatform.com	2013-12-04 23:41:22 +01:00
Jaime Marquínez Ferrándiz	bfb9f7bc4c	[hotnewhiphop] Update test's title	2013-12-04 20:36:26 +01:00
Jaime Marquínez Ferrándiz	6a656a843a	Update description value for the write_info_json test (required after `27dcce1904`)	2013-12-04 20:35:00 +01:00
Philipp Hagemeister	29030c0a4c	Merge remote-tracking branch 'dstftw/correct-valid-urls'	2013-12-04 19:56:05 +01:00
dst	c0ade33e16	Correct some extractor _VALID_URL regexes	2013-12-04 20:34:47 +07:00
Philipp Hagemeister	671c0f151d	release 2013.12.04	2013-12-04 14:19:07 +01:00
Philipp Hagemeister	27dcce1904	[youtube] Resolve URLs in comments	2013-12-04 14:18:49 +01:00
Jaime Marquínez Ferrándiz	84db81815a	Move common code for extractors based in MTV services to a new base class Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)	2013-12-03 14:58:24 +01:00
Jaime Marquínez Ferrándiz	fb7abb31af	Remove the compatibility code used before the new format system was implemented	2013-12-03 14:31:20 +01:00
Philipp Hagemeister	ce93879a9b	[daum] Fix real video ID extraction	2013-12-03 14:16:58 +01:00
Philipp Hagemeister	938384c587	[redtube] Fix search for title	2013-12-03 14:08:16 +01:00
Philipp Hagemeister	e9d8e302aa	[xhamster] Change test checksum	2013-12-03 14:06:16 +01:00
Jaime Marquínez Ferrándiz	cb7fb54600	Change the ie_name of YoutubeSearchDateIE It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in `09f355f73b`)	2013-12-03 13:55:25 +01:00
Philipp Hagemeister	cf6758d204	Document disabling proxy (#1882 )	2013-12-03 13:33:07 +01:00
Philipp Hagemeister	731e3dde29	release 2013.12.03	2013-12-03 13:13:09 +01:00
Philipp Hagemeister	a0eaa341e1	[configuration] Undo code breakage	2013-12-03 13:11:20 +01:00
Philipp Hagemeister	fb27c2295e	Correct configuration file locations	2013-12-03 13:09:48 +01:00
Philipp Hagemeister	1b753cb334	Add Windows configuration file locations (#1881 )	2013-12-03 13:04:02 +01:00
Philipp Hagemeister	36a826a50d	Clarify --download-archive help (#1757 )	2013-12-03 11:54:52 +01:00
Philipp Hagemeister	8796857429	Credit @dstftw for smotri IE	2013-12-02 17:43:22 +01:00
Philipp Hagemeister	aaebed13a8	[smotri] Simplify	2013-12-02 17:08:17 +01:00
Philipp Hagemeister	25939ffe56	Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl	2013-12-02 15:56:35 +01:00
dst	5270d8cb13	Added extractors for smotri.com	2013-12-02 20:10:19 +07:00
Philipp Hagemeister	0037e02921	release 2013.12.02	2013-12-02 13:37:26 +01:00
Philipp Hagemeister	6ad14cab59	Add --socket-timeout option	2013-12-02 13:37:05 +01:00
Philipp Hagemeister	a9be0cc736	Merge branch 'master' of github.com:rg3/youtube-dl	2013-12-02 13:36:20 +01:00
Jaime Marquínez Ferrándiz	55a10eab48	[vimeo] Add an extractor for users (closes #1871 )	2013-12-01 22:36:18 +01:00
Philipp Hagemeister	e344693b65	Make socket timeout configurable, and bump default to 10 minutes (#1862 )	2013-12-01 11:42:02 +01:00
Philipp Hagemeister	355e4fd07e	[generic] Find embedded dailymotion videos (Fixes #1848 )	2013-12-01 01:21:33 +01:00
Philipp Hagemeister	5e09d6abbd	[clipfish] Skip test on travis	2013-12-01 01:16:20 +01:00
Philipp Hagemeister	b138de72f2	Merge branch 'master' of github.com:rg3/youtube-dl	2013-11-30 00:42:56 +01:00
Philipp Hagemeister	06dcbb71d8	Clarify help of --write-pages (#1853 )	2013-11-30 00:42:43 +01:00
Jaime Marquínez Ferrándiz	c5171c454b	[yahoo] Force use of the http protocol for downloading the videos.	2013-11-29 22:06:17 +01:00
Philipp Hagemeister	323ec6ae56	Clarify --download-archive help	2013-11-29 15:57:43 +01:00
Jaime Marquínez Ferrándiz	befd88b786	[yahoo] Add an extractor for yahoo news (closes #1849 )	2013-11-29 15:25:43 +01:00
Philipp Hagemeister	a3fb4675fb	Do not mutate default arguments In this case, it looks rather harmless (since the conditions for --restrict-filenames should not change while a process is running), but just to be sure. This also simplifies the interface for callers, who can just pass in the idiomatic None for "I don't care, whatever is the default".	2013-11-29 15:25:11 +01:00
Philipp Hagemeister	5f077efcb1	Merge pull request #1850 from nikai3d/master fix typo in help	2013-11-29 01:48:14 -08:00
Nicolas Kaiser	9986238ba9	fix typo in help	2013-11-29 09:48:38 +01:00
Nicolas Kaiser	e1f900d6a4	fix typo in README.md	2013-11-29 09:44:05 +01:00
Jaime Marquínez Ferrándiz	acf37ca151	[imdb] Fix the resolution values (fixes #1847 ) We were using the size of the player, it was the same for all the formats	2013-11-29 07:56:14 +01:00
Philipp Hagemeister	17769d5a6c	release 2013.11.29	2013-11-29 03:34:26 +01:00
Philipp Hagemeister	677c18092d	[podomatic] Add extractor	2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz	3862402ff3	Add an extractor for Clipsyndicate (closes #1744 )	2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz	b03d0d064c	[imdb] Fix extraction in python 2.6 Using a regular expression because the html cannot be parsed.	2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz	d8d6148628	Add an extractor for Internet Movie Database trailers (closes #1832 )	2013-11-28 13:32:49 +01:00
Philipp Hagemeister	2be54167d0	release 2013.11.28.1	2013-11-28 06:17:56 +01:00
Philipp Hagemeister	4e0084d92e	[youtube/subtitles] Change MD5 of vtt subtitle in test	2013-11-28 06:14:17 +01:00
Philipp Hagemeister	fc9e1cc697	[clipfish] Use FIFA trailer as testcase (#1842 )	2013-11-28 06:10:37 +01:00
Philipp Hagemeister	f8f60d2793	[clipfish] Fix imports (#1842 )	2013-11-28 05:54:46 +01:00
Philipp Hagemeister	ea07dbb8b1	release 2013.11.28	2013-11-28 05:48:32 +01:00
Philipp Hagemeister	2a275ab007	[zdf] Use _download_xml	2013-11-28 05:47:50 +01:00
Philipp Hagemeister	a2e6db365c	[zdf] add a pseudo-testcase and fix URL matching	2013-11-28 05:47:20 +01:00
Philipp Hagemeister	9d93e7da6c	Merge branch 'master' of github.com:rg3/youtube-dl	2013-11-28 04:37:02 +01:00
Jaime Marquínez Ferrándiz	0e44d8381a	[youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845 )	2013-11-28 00:33:27 +01:00
Jaime Marquínez Ferrándiz	35907e23ec	[yahoo] Fix video extraction and use the new format system exclusively	2013-11-27 21:24:55 +01:00
Jaime Marquínez Ferrándiz	76d1700b28	[youtube:playlist] Fix the extraction of the title for some mixes (#1844 ) Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM	2013-11-27 20:01:51 +01:00
Philipp Hagemeister	dcca796ce4	[clipfish] Effect a better error message (#1842 )	2013-11-27 18:33:51 +01:00
Filippo Valsorda	4b19e38954	[videopremium] support new .me domain	2013-11-27 02:54:51 +01:00
Jaime Marquínez Ferrándiz	5f09bbff4d	[bash-completion] Complete the ':ythistory' keyword	2013-11-27 00:42:59 +01:00
Jaime Marquínez Ferrándiz	c1f9c59d11	[bash-completion] Complete filenames or directories if the previous option requires it	2013-11-27 00:41:30 +01:00
Jaime Marquínez Ferrándiz	652cdaa269	[youtube:playlist] Add support for YouTube mixes (fixes #1839 )	2013-11-26 21:35:03 +01:00
Jaime Marquínez Ferrándiz	e26f871228	Use the new '_download_xml' helper in more extractors	2013-11-26 19:17:25 +01:00
Jaime Marquínez Ferrándiz	6e47b51eef	[youtube:playlist] Remove the link with index 0 It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)	2013-11-26 19:09:14 +01:00
Jaime Marquínez Ferrándiz	4a98cdbf3b	YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840 ) If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''	2013-11-26 18:54:14 +01:00
Philipp Hagemeister	c5ed4e8f7e	release 2013.11.26	2013-11-26 10:41:35 +01:00
Jaime Marquínez Ferrándiz	c2e52508cc	Include the proxy in the parameters for YoutubeDL (fixes #1831 )	2013-11-26 08:03:11 +01:00