release 2014.04.13

[aol] Add support for playlists (Fixes #2730 )
[tube8] Fix extraction and modernize
2014-04-13 03:22:30 +02:00 · 2014-04-13 03:22:24 +02:00 · 2014-04-13 03:56:32 +07:00 · 2014-04-12 17:15:16 +02:00 · 2014-04-12 15:52:42 +02:00 · 2014-04-12 15:51:40 +02:00
95 changed files with 3112 additions and 1164 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -3,5 +3,4 @@ include test/*.py
 include test/*.json
 include youtube-dl.bash-completion
 include youtube-dl.1
-recursive-include docs *
-prune docs/_build
+recursive-include docs Makefile conf.py *.rst
--- a/3
+++ b/3
@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
 		--exclude '__pycache' \
 		--exclude '.git' \
 		--exclude 'testdata' \
+		--exclude 'docs/_build' \
 		-- \
-		bin devscripts test youtube_dl \
+		bin devscripts test youtube_dl docs \
 		CHANGELOG LICENSE README.md README.txt \
 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
 		youtube-dl
--- a/README.md
+++ b/README.md
@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like.
    --user-agent UA                  specify a custom user agent
    --referer REF                    specify a custom referer, use if the video
                                     access is restricted to one domain
+    --add-header FIELD:VALUE         specify a custom HTTP header and its value,
+                                     separated by a colon ':'. You can use this
+                                     option multiple times
    --list-extractors                List all supported extractors and the URLs
                                     they would handle
    --extractor-descriptions         Output descriptions of all supported
@ -62,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     configuration in ~/.config/youtube-dl.conf
                                     (%APPDATA%/youtube-dl/config.txt on
                                     Windows)
+    --encoding ENCODING              Force the specified encoding (experimental)

 ## Video Selection:
    --playlist-start NUMBER          playlist video to start at (default is 1)
@ -166,6 +170,7 @@ which means you can modify it, redistribute it or use it however you like.

 ## Verbosity / Simulation Options:
    -q, --quiet                      activates quiet mode
+    --no-warnings                    Ignore warnings
    -s, --simulate                   do not download the video and do not write
                                     anything to disk
    --skip-download                  do not download the video
@ -177,7 +182,9 @@ which means you can modify it, redistribute it or use it however you like.
    --get-duration                   simulate, quiet but print video length
    --get-filename                   simulate, quiet but print output filename
    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information
+    -j, --dump-json                  simulate, quiet but print JSON information.
+                                     See --output for a description of available
+                                     keys.
    --newline                        output progress bar as new lines
    --no-progress                    do not print progress bar
    --console-title                  display progress in console titlebar
@ -243,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     default
    --embed-subs                     embed subtitles in the video (only for mp4
                                     videos)
+    --embed-thumbnail                embed thumbnail in the audio as cover art
    --add-metadata                   write metadata to the video file
    --xattrs                         write metadata to the video file's xattrs
                                     (using dublin core and xdg standards)
@ -364,7 +372,67 @@ If you want to create a build of youtube-dl yourself, you'll need

 ### Adding support for a new site

-If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
+If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
+
+1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
+2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
+3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
+4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
+
+        # coding: utf-8
+        from __future__ import unicode_literals
+
+        import re
+
+        from .common import InfoExtractor
+        
+        
+        class YourExtractorIE(InfoExtractor):
+            _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+            _TEST = {
+                'url': 'http://yourextractor.com/watch/42',
+                'md5': 'TODO: md5 sum of the first 10KiB of the video file',
+                'info_dict': {
+                    'id': '42',
+                    'ext': 'mp4',
+                    'title': 'Video title goes here',
+                    # TODO more properties, either as:
+                    # * A value
+                    # * MD5 checksum; start the string with md5:
+                    # * A regular expression; start the string with re:
+                    # * Any Python type (for example int or float)
+                }
+            }
+
+            def _real_extract(self, url):
+                mobj = re.match(self._VALID_URL, url)
+                video_id = mobj.group('id')
+
+                # TODO more code goes here, for example ...
+                webpage = self._download_webpage(url, video_id)
+                title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+
+                return {
+                    'id': video_id,
+                    'title': title,
+                    # TODO more properties (see youtube_dl/extractor/common.py)
+                }
+
+
+5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
+7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
+8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
+9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
+
+        $ git add youtube_dl/extractor/__init__.py
+        $ git add youtube_dl/extractor/yourextractor.py
+        $ git commit -m '[yourextractor] Add new extractor'
+        $ git push origin yourextractor
+
+10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+
+In any case, thank you very much for your contributions!

 # BUGS

--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -26,16 +26,27 @@ class YDL(FakeYDL):
        self.msgs.append(msg)


+def _make_result(formats, **kwargs):
+    res = {
+        'formats': formats,
+        'id': 'testid',
+        'title': 'testttitle',
+        'extractor': 'testex',
+    }
+    res.update(**kwargs)
+    return res
+
+
 class TestFormatSelection(unittest.TestCase):
    def test_prefer_free_formats(self):
        # Same resolution => download webm
        ydl = YDL()
        ydl.params['prefer_free_formats'] = True
        formats = [
-            {'ext': 'webm', 'height': 460},
-            {'ext': 'mp4',  'height': 460},
+            {'ext': 'webm', 'height': 460, 'url': 'x'},
+            {'ext': 'mp4', 'height': 460, 'url': 'y'},
        ]
-        info_dict = {'formats': formats, 'extractor': 'test'}
+        info_dict = _make_result(formats)
        yie = YoutubeIE(ydl)
        yie._sort_formats(info_dict['formats'])
        ydl.process_ie_result(info_dict)
@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase):
        ydl = YDL()
        ydl.params['prefer_free_formats'] = True
        formats = [
-            {'ext': 'webm', 'height': 720},
-            {'ext': 'mp4', 'height': 1080},
+            {'ext': 'webm', 'height': 720, 'url': 'a'},
+            {'ext': 'mp4', 'height': 1080, 'url': 'b'},
        ]
        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase):
        ydl = YDL()
        ydl.params['prefer_free_formats'] = False
        formats = [
-            {'ext': 'webm', 'height': 720},
-            {'ext': 'mp4', 'height': 720},
-            {'ext': 'flv', 'height': 720},
+            {'ext': 'webm', 'height': 720, 'url': '_'},
+            {'ext': 'mp4', 'height': 720, 'url': '_'},
+            {'ext': 'flv', 'height': 720, 'url': '_'},
        ]
        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase):
        ydl = YDL()
        ydl.params['prefer_free_formats'] = False
        formats = [
-            {'ext': 'flv', 'height': 720},
-            {'ext': 'webm', 'height': 720},
+            {'ext': 'flv', 'height': 720, 'url': '_'},
+            {'ext': 'webm', 'height': 720, 'url': '_'},
        ]
        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase):
            {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
            {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
        ]
-        info_dict = {
-            'formats': formats, 'extractor': 'test', 'id': 'testvid'}
+        info_dict = _make_result(formats)

        ydl = YDL()
        ydl.process_ie_result(info_dict)
@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase):

    def test_format_selection(self):
        formats = [
-            {'format_id': '35', 'ext': 'mp4', 'preference': 1},
-            {'format_id': '45', 'ext': 'webm', 'preference': 2},
-            {'format_id': '47', 'ext': 'webm', 'preference': 3},
-            {'format_id': '2', 'ext': 'flv', 'preference': 4},
+            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
+            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
+            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
+            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
        ]
-        info_dict = {'formats': formats, 'extractor': 'test'}
+        info_dict = _make_result(formats)

        ydl = YDL({'format': '20/47'})
        ydl.process_ie_result(info_dict.copy())
@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase):

    def test_format_selection_audio(self):
        formats = [
-            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
-            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
-            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
+            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
+            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
+            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
        ]
-        info_dict = {'formats': formats, 'extractor': 'test'}
+        info_dict = _make_result(formats)

        ydl = YDL({'format': 'bestaudio'})
        ydl.process_ie_result(info_dict.copy())
@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase):
        self.assertEqual(downloaded['format_id'], 'audio-low')

        formats = [
-            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
-            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
+            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
+            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
        ]
-        info_dict = {'formats': formats, 'extractor': 'test'}
+        info_dict = _make_result(formats)

        ydl = YDL({'format': 'bestaudio/worstaudio/best'})
        ydl.process_ie_result(info_dict.copy())
@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase):

    def test_format_selection_video(self):
        formats = [
-            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
-            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
+            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
+            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
        ]
-        info_dict = {'formats': formats, 'extractor': 'test'}
+        info_dict = _make_result(formats)

        ydl = YDL({'format': 'bestvideo'})
        ydl.process_ie_result(info_dict.copy())
@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase):
        for f1id, f2id in zip(order, order[1:]):
            f1 = YoutubeIE._formats[f1id].copy()
            f1['format_id'] = f1id
+            f1['url'] = 'url:' + f1id
            f2 = YoutubeIE._formats[f2id].copy()
            f2['format_id'] = f2id
+            f2['url'] = 'url:' + f2id

-            info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
+            info_dict = _make_result([f1, f2], extractor='youtube')
            ydl = YDL()
            yie = YoutubeIE(ydl)
            yie._sort_formats(info_dict['formats'])
@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase):
            downloaded = ydl.downloaded_info_dicts[0]
            self.assertEqual(downloaded['format_id'], f1id)

-            info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
+            info_dict = _make_result([f2, f1], extractor='youtube')
            ydl = YDL()
            yie = YoutubeIE(ydl)
            yie._sort_formats(info_dict['formats'])
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])

    def test_youtube_channel_matching(self):
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@ -143,5 +144,37 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])

+    def test_ComedyCentralShows(self):
+        self.assertMatch(
+            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+            ['ComedyCentralShows'])
+
+    def test_yahoo_https(self):
+        # https://github.com/rg3/youtube-dl/issues/2701
+        self.assertMatch(
+            'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
+            ['Yahoo'])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -42,6 +42,8 @@ from youtube_dl.extractor import (
    ToypicsUserIE,
    XTubeUserIE,
    InstagramUserIE,
+    CSpanIE,
+    AolIE,
 )


@ -314,6 +316,28 @@ class TestPlaylists(unittest.TestCase):
        }
        expect_info_dict(self, EXPECTED, test_video)

+    def test_CSpan_playlist(self):
+        dl = FakeYDL()
+        ie = CSpanIE(dl)
+        result = ie.extract(
+            'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '342759')
+        self.assertEqual(
+            result['title'], 'General Motors Ignition Switch Recall')
+        whole_duration = sum(e['duration'] for e in result['entries'])
+        self.assertEqual(whole_duration, 14855)
+
+    def test_aol_playlist(self):
+        dl = FakeYDL()
+        ie = AolIE(dl)
+        result = ie.extract(
+            'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '152147')
+        self.assertEqual(
+            result['title'], 'Brace Yourself - Today\'s Weirdest News')
+        self.assertTrue(len(result['entries']) >= 10)

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 # Various small unit tests
 import io
+import json
 import xml.etree.ElementTree

 #from youtube_dl.utils import htmlentity_transform
@ -36,6 +37,8 @@ from youtube_dl.utils import (
    urlencode_postdata,
    xpath_with_ns,
    parse_iso8601,
+    strip_jsonp,
+    uppercase_escape,
 )

 if sys.version_info < (3, 0):
@ -272,5 +275,14 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
        self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)

+    def test_strip_jsonp(self):
+        stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
+        d = json.loads(stripped)
+        self.assertEqual(d, [{"id": "532cb", "x": 3}])
+
+    def test_uppercase_escpae(self):
+        self.assertEqual(uppercase_escape(u'aä'), u'aä')
+        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -8,6 +8,7 @@ import datetime
 import errno
 import io
 import json
+import locale
 import os
 import platform
 import re
@ -94,6 +95,7 @@ class YoutubeDL(object):
    usenetrc:          Use netrc for authentication instead.
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
+    no_warnings:       Do not print out anything for warnings.
    forceurl:          Force printing final URL.
    forcetitle:        Force printing title.
    forceid:           Force printing ID.
@ -158,6 +160,7 @@ class YoutubeDL(object):
    include_ads:       Download ads as well
    default_search:    Prepend this string if an input url is not valid.
                       'auto' for elaborate guessing
+    encoding:          Use this encoding instead of the system-specified.

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@ -283,6 +286,9 @@ class YoutubeDL(object):
        """Print message to stdout if not in quiet mode."""
        return self.to_stdout(message, skip_eol, check_quiet=True)

+    def _write_string(self, s, out=None):
+        write_string(s, out=out, encoding=self.params.get('encoding'))
+
    def to_stdout(self, message, skip_eol=False, check_quiet=False):
        """Print message to stdout if not in quiet mode."""
        if self.params.get('logger'):
@ -292,7 +298,7 @@ class YoutubeDL(object):
            terminator = ['\n', ''][skip_eol]
            output = message + terminator

-            write_string(output, self._screen_file)
+            self._write_string(output, self._screen_file)

    def to_stderr(self, message):
        """Print message to stderr."""
@ -302,7 +308,7 @@ class YoutubeDL(object):
        else:
            message = self._bidi_workaround(message)
            output = message + '\n'
-            write_string(output, self._err_file)
+            self._write_string(output, self._err_file)

    def to_console_title(self, message):
        if not self.params.get('consoletitle', False):
@ -312,21 +318,21 @@ class YoutubeDL(object):
            # already of type unicode()
            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
        elif 'TERM' in os.environ:
-            write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033]0;%s\007' % message, self._screen_file)

    def save_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Save the title on stack
-            write_string('\033[22;0t', self._screen_file)
+            self._write_string('\033[22;0t', self._screen_file)

    def restore_console_title(self):
        if not self.params.get('consoletitle', False):
            return
        if 'TERM' in os.environ:
            # Restore the title from stack
-            write_string('\033[23;0t', self._screen_file)
+            self._write_string('\033[23;0t', self._screen_file)

    def __enter__(self):
        self.save_console_title()
@ -376,6 +382,8 @@ class YoutubeDL(object):
        if self.params.get('logger') is not None:
            self.params['logger'].warning(message)
        else:
+            if self.params.get('no_warnings'):
+                return
            if self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
@ -697,6 +705,11 @@ class YoutubeDL(object):
    def process_video_result(self, info_dict, download=True):
        assert info_dict.get('_type', 'video') == 'video'

+        if 'id' not in info_dict:
+            raise ExtractorError('Missing "id" field in extractor result')
+        if 'title' not in info_dict:
+            raise ExtractorError('Missing "title" field in extractor result')
+
        if 'playlist' not in info_dict:
            # It isn't part of a playlist
            info_dict['playlist'] = None
@ -728,6 +741,9 @@ class YoutubeDL(object):

        # We check that all the formats have the format and format_id fields
        for i, format in enumerate(formats):
+            if 'url' not in format:
+                raise ExtractorError('Missing "url" key in result (index %d)' % i)
+
            if format.get('format_id') is None:
                format['format_id'] = compat_str(i)
            if format.get('format') is None:
@ -738,7 +754,7 @@ class YoutubeDL(object):
                )
            # Automatically determine file extension if missing
            if 'ext' not in format:
-                format['ext'] = determine_ext(format['url'])
+                format['ext'] = determine_ext(format['url']).lower()

        format_limit = self.params.get('format_limit', None)
        if format_limit:
@ -863,7 +879,7 @@ class YoutubeDL(object):

        try:
            dn = os.path.dirname(encodeFilename(filename))
-            if dn != '' and not os.path.exists(dn):
+            if dn and not os.path.exists(dn):
                os.makedirs(dn)
        except (OSError, IOError) as err:
            self.report_error('unable to create directory ' + compat_str(err))
@ -920,7 +936,7 @@ class YoutubeDL(object):
                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                subfile.write(sub)
                except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + descfn)
+                    self.report_error('Cannot write subtitles file ' + sub_filename)
                    return

        if self.params.get('writeinfojson', False):
@ -1197,7 +1213,17 @@ class YoutubeDL(object):
    def print_debug_header(self):
        if not self.params.get('verbose'):
            return
-        write_string('[debug] youtube-dl version ' + __version__ + '\n')
+
+        write_string(
+            '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+                locale.getpreferredencoding(),
+                sys.getfilesystemencoding(),
+                sys.stdout.encoding,
+                self.get_encoding()),
+            encoding=None
+        )
+
+        self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
                ['git', 'rev-parse', '--short', 'HEAD'],
@ -1206,20 +1232,20 @@ class YoutubeDL(object):
            out, err = sp.communicate()
            out = out.decode().strip()
            if re.match('[0-9a-f]+', out):
-                write_string('[debug] Git HEAD: ' + out + '\n')
+                self._write_string('[debug] Git HEAD: ' + out + '\n')
        except:
            try:
                sys.exc_clear()
            except:
                pass
-        write_string('[debug] Python version %s - %s' %
+        self._write_string('[debug] Python version %s - %s' %
                     (platform.python_version(), platform_name()) + '\n')

        proxy_map = {}
        for handler in self._opener.handlers:
            if hasattr(handler, 'proxies'):
                proxy_map.update(handler.proxies)
-        write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+        self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

    def _setup_opener(self):
        timeout_val = self.params.get('socket_timeout')
@ -1261,3 +1287,19 @@ class YoutubeDL(object):
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
        opener.addheaders = []
        self._opener = opener
+
+    def encode(self, s):
+        if isinstance(s, bytes):
+            return s  # Already encoded
+
+        try:
+            return s.encode(self.get_encoding())
+        except UnicodeEncodeError as err:
+            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+            raise
+
+    def get_encoding(self):
+        encoding = self.params.get('encoding')
+        if encoding is None:
+            encoding = preferredencoding()
+        return encoding
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -51,6 +51,8 @@ __authors__  = (
    'David Wagner',
    'Juan C. Olivares',
    'Mattias Harrysson',
+    'phaer',
+    'Sainyam Kapoor',
 )

 __license__ = 'Public Domain'
@ -90,6 +92,8 @@ from .extractor import gen_extractors
 from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .postprocessor import (
+    AtomicParsleyPP,
+    FFmpegAudioFixPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
    FFmpegExtractAudioPP,
@ -227,6 +231,9 @@ def parseOpts(overrideArguments=None):
    general.add_option('--referer',
            dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
            metavar='REF', default=None)
+    general.add_option('--add-header',
+            dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
+            metavar='FIELD:VALUE')
    general.add_option('--list-extractors',
            action='store_true', dest='list_extractors',
            help='List all supported extractors and the URLs they would handle', default=False)
@ -238,7 +245,7 @@ def parseOpts(overrideArguments=None):
        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
    general.add_option(
-        '--prefer-insecure', action='store_true', dest='prefer_insecure',
+        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
    general.add_option(
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@ -252,13 +259,17 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
-            dest='default_search', metavar='PREFIX',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+    general.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')

    selection.add_option(
        '--playlist-start',
@ -361,6 +372,10 @@ def parseOpts(overrideArguments=None):

    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
+    verbosity.add_option(
+        '--no-warnings',
+        dest='no_warnings', action='store_true', default=False,
+        help='Ignore warnings')
    verbosity.add_option('-s', '--simulate',
            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
    verbosity.add_option('--skip-download',
@ -388,7 +403,7 @@ def parseOpts(overrideArguments=None):
            help='simulate, quiet but print output format', default=False)
    verbosity.add_option('-j', '--dump-json',
            action='store_true', dest='dumpjson',
-            help='simulate, quiet but print JSON information', default=False)
+            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
    verbosity.add_option('--newline',
            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
    verbosity.add_option('--no-progress',
@ -490,6 +505,8 @@ def parseOpts(overrideArguments=None):
            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
            help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
+            help='embed thumbnail in the audio as cover art')
    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
            help='write metadata to the video file')
    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@ -532,8 +549,6 @@ def parseOpts(overrideArguments=None):
            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
-            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
-                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))

    return parser, opts, args

@ -556,6 +571,16 @@ def _real_main(argv=None):
    if opts.referer is not None:
        std_headers['Referer'] = opts.referer

+    # Custom HTTP headers
+    if opts.headers is not None:
+        for h in opts.headers:
+            if h.find(':', 1) < 0:
+                parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
+            key, value = h.split(':', 2)
+            if opts.verbose:
+                write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
+            std_headers[key] = value
+
    # Dump user agent
    if opts.dump_user_agent:
        compat_print(std_headers['User-Agent'])
@ -657,7 +682,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # Do not download videos when there are audio-only formats
@ -695,6 +720,7 @@ def _real_main(argv=None):
        'password': opts.password,
        'videopassword': opts.videopassword,
        'quiet': (opts.quiet or any_printing),
+        'no_warnings': opts.no_warnings,
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
@ -767,6 +793,7 @@ def _real_main(argv=None):
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+        'encoding': opts.encoding,
    }

    with YoutubeDL(ydl_opts) as ydl:
@ -785,6 +812,10 @@ def _real_main(argv=None):
            ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
        if opts.xattrs:
            ydl.add_post_processor(XAttrMetadataPP())
+        if opts.embedthumbnail:
+            if not opts.addmetadata:
+                ydl.add_post_processor(FFmpegAudioFixPP())
+            ydl.add_post_processor(AtomicParsleyPP())

        # Update version
        if opts.update_self:
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -4,9 +4,10 @@ import sys
 import time

 from ..utils import (
+    compat_str,
    encodeFilename,
-    timeconvert,
    format_bytes,
+    timeconvert,
 )


@ -173,7 +174,7 @@ class FileDownloader(object):
                return
            os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
        except (IOError, OSError) as err:
-            self.report_error(u'unable to rename file: %s' % str(err))
+            self.report_error(u'unable to rename file: %s' % compat_str(err))

    def try_utime(self, filename, last_modified_hdr):
        """Try to set the last-modified time of the given file."""
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
                        break
            frags_filenames.append(frag_filename)

+        dest_stream.close()
        self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)

        self.try_rename(tmpfilename, filename)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)

-        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
-            '-bsf:a', 'aac_adtstoasc', tmpfilename]
+        args = [
+            '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+            '-bsf:a', 'aac_adtstoasc',
+            encodeFilename(tmpfilename, for_subprocess=True)]

        for program in ['avconv', 'ffmpeg']:
            try:
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
        headers = {'Youtubedl-no-compression': 'True'}
        if 'user_agent' in info_dict:
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
+        if 'http_referer' in info_dict:
+            headers['Referer'] = info_dict['http_referer']
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)

--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -14,6 +14,7 @@ from .arte import (
    ArteTVConcertIE,
    ArteTVFutureIE,
    ArteTVDDCIE,
+    ArteTVEmbedIE,
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
@ -25,11 +26,13 @@ from .bloomberg import BloombergIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
+from .byutv import BYUtvIE
 from .c56 import C56IE
 from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
+from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@ -38,6 +41,7 @@ from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
 from .cmt import CMTIE
+from .cnet import CNETIE
 from .cnn import (
    CNNIE,
    CNNBlogsIE,
@ -59,6 +63,7 @@ from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
+from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
@ -81,6 +86,7 @@ from .fktv import (
 )
 from .flickr import FlickrIE
 from .fourtube import FourTubeIE
+from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
    PluzzIE,
@ -150,10 +156,14 @@ from .mixcloud import MixcloudIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
+from .morningstar import MorningstarIE
+from .motorsport import MotorsportIE
+from .movshare import MovShareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
 )
+from .musicplayon import MusicPlayOnIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@ -175,6 +185,8 @@ from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
+from .ntv import NTVIE
+from .oe1 import OE1IE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .parliamentliveuk import ParliamentLiveUKIE
@ -196,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
+from .rtve import RTVEALaCartaIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
@ -206,7 +219,6 @@ from .rutv import RUTVIE
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
-from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .smotri import (
    SmotriIE,
@ -255,19 +267,20 @@ from .udemy import (
    UdemyCourseIE
 )
 from .unistra import UnistraIE
+from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vesti import VestiIE
 from .vevo import VevoIE
-from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
+from .videoweed import VideoWeedIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
@ -280,8 +293,12 @@ from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .vube import VubeIE
+from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
-from .wdr import WDRIE
+from .wdr import (
+    WDRIE,
+    WDRMausIE,
+)
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -14,14 +16,14 @@ from ..utils import (
 class AddAnimeIE(InfoExtractor):

    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
-    IE_NAME = u'AddAnime'
    _TEST = {
-        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
-        u'file': u'24MR3YO5SAS9.mp4',
-        u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
-        u'info_dict': {
-            u"description": u"One Piece 606",
-            u"title": u"One Piece 606"
+        'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        'md5': '72954ea10bc979ab5e2eb288b21425a0',
+        'info_dict': {
+            'id': '24MR3YO5SAS9',
+            'ext': 'mp4',
+            'description': 'One Piece 606',
+            'title': 'One Piece 606',
        }
    }

@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor):
            redir_webpage = ee.cause.read().decode('utf-8')
            action = self._search_regex(
                r'<form id="challenge-form" action="([^"]+)"',
-                redir_webpage, u'Redirect form')
+                redir_webpage, 'Redirect form')
            vc = self._search_regex(
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
-                redir_webpage, u'redirect vc value')
+                redir_webpage, 'redirect vc value')
            av = re.search(
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
                redir_webpage)
@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor):
            parsed_url = compat_urllib_parse_urlparse(url)
            av_val = av_res + len(parsed_url.netloc)
            confirm_url = (
-                parsed_url.scheme + u'://' + parsed_url.netloc +
+                parsed_url.scheme + '://' + parsed_url.netloc +
                action + '?' +
                compat_urllib_parse.urlencode({
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
            self._download_webpage(
                confirm_url, video_id,
-                note=u'Confirming after redirect')
+                note='Confirming after redirect')
            webpage = self._download_webpage(url, video_id)

        formats = []
        for format_id in ('normal', 'hq'):
            rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
-            video_url = self._search_regex(rex, webpage, u'video file URLx',
+            video_url = self._search_regex(rex, webpage, 'video file URLx',
                                           fatal=False)
            if not video_url:
                continue
@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor):
                'format_id': format_id,
                'url': video_url,
            })
-        if not formats:
-            raise ExtractorError(u'Cannot find any video format!')
+        self._sort_formats(formats)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)

        return {
            '_type': 'video',
-            'id':  video_id,
+            'id': video_id,
            'formats': formats,
            'title': video_title,
            'description': video_description
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@ -8,7 +8,18 @@ from .fivemin import FiveMinIE

 class AolIE(InfoExtractor):
    IE_NAME = 'on.aol.com'
-    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+    _VALID_URL = r'''(?x)
+        (?:
+            aol-video:|
+            http://on\.aol\.com/
+            (?:
+                video/.*-|
+                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
+            )
+        )
+        (?P<id>[0-9]+)
+        (?:$|\?)
+    '''

    _TEST = {
        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        self.to_screen('Downloading 5min.com video %s' % video_id)
+
+        playlist_id = mobj.group('playlist_id')
+        if playlist_id and not self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+            webpage = self._download_webpage(url, playlist_id)
+            title = self._html_search_regex(
+                r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
+            playlist_html = self._search_regex(
+                r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
+                'playlist HTML')
+            entries = [{
+                '_type': 'url',
+                'url': 'aol-video:%s' % m.group('id'),
+                'ie_key': 'Aol',
+            } for m in re.finditer(
+                r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
+                playlist_html)]
+
+            return {
+                '_type': 'playlist',
+                'id': playlist_id,
+                'display_id': mobj.group('playlist_display_id'),
+                'title': title,
+                'entries': entries,
+            }
+
        return FiveMinIE._build_result(video_id)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,7 +6,6 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
-    determine_ext,
 )


@ -16,9 +15,10 @@ class AppleTrailersIE(InfoExtractor):
        "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
        "playlist": [
            {
-                "file": "manofsteel-trailer4.mov",
                "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
                "info_dict": {
+                    "id": "manofsteel-trailer4",
+                    "ext": "mov",
                    "duration": 111,
                    "title": "Trailer 4",
                    "upload_date": "20130523",
@ -26,9 +26,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-trailer3.mov",
                "md5": "b8017b7131b721fb4e8d6f49e1df908c",
                "info_dict": {
+                    "id": "manofsteel-trailer3",
+                    "ext": "mov",
                    "duration": 182,
                    "title": "Trailer 3",
                    "upload_date": "20130417",
@ -36,9 +37,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-trailer.mov",
                "md5": "d0f1e1150989b9924679b441f3404d48",
                "info_dict": {
+                    "id": "manofsteel-trailer",
+                    "ext": "mov",
                    "duration": 148,
                    "title": "Trailer",
                    "upload_date": "20121212",
@ -46,15 +48,16 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
-                "file": "manofsteel-teaser.mov",
                "md5": "5fe08795b943eb2e757fa95cb6def1cb",
                "info_dict": {
+                    "id": "manofsteel-teaser",
+                    "ext": "mov",
                    "duration": 93,
                    "title": "Teaser",
                    "upload_date": "20120721",
                    "uploader_id": "wb",
                },
-            }
+            },
        ]
    }

@ -65,16 +68,16 @@ class AppleTrailersIE(InfoExtractor):
        movie = mobj.group('movie')
        uploader_id = mobj.group('company')

-        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
+        playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
        def fix_html(s):
-            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
            # The ' in the onClick attributes are not escaped, it couldn't be parsed
            # like: http://trailers.apple.com/trailers/wb/gravity/
            def _clean_json(m):
-                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+                return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
            s = re.sub(self._JSON_RE, _clean_json, s)
-            s = u'<html>' + s + u'</html>'
+            s = '<html>' + s + u'</html>'
            return s
        doc = self._download_xml(playlist_url, movie, transform_source=fix_html)

@ -82,7 +85,7 @@ class AppleTrailersIE(InfoExtractor):
        for li in doc.findall('./div/ul/li'):
            on_click = li.find('.//a').attrib['onClick']
            trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, u'trailer info')
+                on_click, 'trailer info')
            trailer_info = json.loads(trailer_info_json)
            title = trailer_info['title']
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@ -98,8 +101,7 @@ class AppleTrailersIE(InfoExtractor):
            first_url = trailer_info['url']
            trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
            settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
-            settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
-            settings = json.loads(settings_json)
+            settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')

            formats = []
            for format in settings['metadata']['sizes']:
@ -107,7 +109,6 @@ class AppleTrailersIE(InfoExtractor):
                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
                formats.append({
                    'url': format_url,
-                    'ext': determine_ext(format_url),
                    'format': format['type'],
                    'width': format['width'],
                    'height': int(format['height']),
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor
 from ..utils import (
@ -19,114 +18,41 @@ from ..utils import (
 # is different for each one. The videos usually expire in 7 days, so we can't
 # add tests.

-class ArteTvIE(InfoExtractor):
-    _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
-    _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
-    _LIVE_URL = r'index-[0-9]+\.html$'

+class ArteTvIE(InfoExtractor):
+    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
    IE_NAME = 'arte.tv'

-    @classmethod
-    def suitable(cls, url):
-        return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
-
-    # TODO implement Live Stream
-    # from ..utils import compat_urllib_parse
-    # def extractLiveStream(self, url):
-    #     video_lang = url.split('/')[-4]
-    #     info = self.grep_webpage(
-    #         url,
-    #         r'src="(.*?/videothek_js.*?\.js)',
-    #         0,
-    #         [
-    #             (1, 'url', 'Invalid URL: %s' % url)
-    #         ]
-    #     )
-    #     http_host = url.split('/')[2]
-    #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
-    #     info = self.grep_webpage(
-    #         next_url,
-    #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
-    #             '(http://.*?\.swf).*?' +
-    #             '(rtmp://.*?)\'',
-    #         re.DOTALL,
-    #         [
-    #             (1, 'path',   'could not extract video path: %s' % url),
-    #             (2, 'player', 'could not extract video player: %s' % url),
-    #             (3, 'url',    'could not extract video url: %s' % url)
-    #         ]
-    #     )
-    #     video_url = '%s/%s' % (info.get('url'), info.get('path'))
-
    def _real_extract(self, url):
-        mobj = re.match(self._VIDEOS_URL, url)
-        if mobj is not None:
-            id = mobj.group('id')
-            lang = mobj.group('lang')
-            return self._extract_video(url, id, lang)
+        mobj = re.match(self._VALID_URL, url)
+        lang = mobj.group('lang')
+        video_id = mobj.group('id')

-        mobj = re.match(self._LIVEWEB_URL, url)
-        if mobj is not None:
-            name = mobj.group('name')
-            lang = mobj.group('lang')
-            return self._extract_liveweb(url, name, lang)
-
-        if re.search(self._LIVE_URL, url) is not None:
-            raise ExtractorError('Arte live streams are not yet supported, sorry')
-            # self.extractLiveStream(url)
-            # return
-
-        raise ExtractorError('No video found')
-
-    def _extract_video(self, url, video_id, lang):
-        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
        ref_xml_doc = self._download_xml(
            ref_xml_url, video_id, note='Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
-        config_xml = self._download_webpage(
+        config = self._download_xml(
            config_xml_url, video_id, note='Downloading configuration')

-        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
-        def _key(m):
-            quality = m.group('quality')
-            if quality == 'hd':
-                return 2
-            else:
-                return 1
-        # We pick the best quality
-        video_urls = sorted(video_urls, key=_key)
-        video_url = list(video_urls)[-1].group('url')
-        
-        title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
-        thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
-                                            config_xml, 'thumbnail')
-        return {'id': video_id,
-                'title': title,
-                'thumbnail': thumbnail,
-                'url': video_url,
-                'ext': 'flv',
-                }
+        formats = [{
+            'forma_id': q.attrib['quality'],
+            'url': q.text,
+            'ext': 'flv',
+            'quality': 2 if q.attrib['quality'] == 'hd' else 1,
+        } for q in config.findall('./urls/url')]
+        self._sort_formats(formats)

-    def _extract_liveweb(self, url, name, lang):
-        """Extract form http://liveweb.arte.tv/"""
-        webpage = self._download_webpage(url, name)
-        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
-        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
-                                            video_id, 'Downloading information')
-        event_doc = config_doc.find('event')
-        url_node = event_doc.find('video').find('urlHd')
-        if url_node is None:
-            url_node = event_doc.find('urlSd')
-
-        return {'id': video_id,
-                'title': event_doc.find('name%s' % lang.capitalize()).text,
-                'url': url_node.text.replace('MP4', 'mp4'),
-                'ext': 'flv',
-                'thumbnail': self._og_search_thumbnail(webpage),
-                }
+        title = config.find('.//name').text
+        thumbnail = config.find('.//firstThumbnailUrl').text
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }


 class ArteTVPlus7IE(InfoExtractor):
@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
-        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
-        self.report_extraction(video_id)
-        info = json.loads(json_info)
+        info = self._download_json(json_url, video_id)
        player_info = info['videoJsonPlayer']

        info_dict = {
@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
                l = 'F'
            elif lang == 'de':
                l = 'A'
+            else:
+                l = lang
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
            'description': 'md5:486eb08f991552ade77439fe6d82c305',
        },
    }
+
+
+class ArteTVEmbedIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:embed'
+    _VALID_URL = r'''(?x)
+        http://www\.arte\.tv
+        /playerv2/embed\.php\?json_url=
+        (?P<json_url>
+            http://arte\.tv/papi/tvguide/videos/stream/player/
+            (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
+        )
+    '''
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        lang = mobj.group('lang')
+        json_url = mobj.group('json_url')
+        return self._extract_from_json_url(json_url, video_id, lang)
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@ -11,22 +11,24 @@ from ..utils import (


 class AUEngineIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
+
    _TEST = {
        'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
-        'file': 'lfvlytY6.mp4',
        'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
        'info_dict': {
+            'id': 'lfvlytY6',
+            'ext': 'mp4',
            'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
        }
    }
-    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
+
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
-                webpage, 'title')
+        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
        title = title.strip()
        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
        links = map(compat_urllib_parse.unquote, links)
@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
            elif '/videos/' in link:
                video_url = link
        if not video_url:
-            raise ExtractorError(u'Could not find video URL')
+            raise ExtractorError('Could not find video URL')
        ext = '.' + determine_ext(video_url)
        if ext == title[-len(ext):]:
            title = title[:-len(ext)]

        return {
-            'id':        video_id,
-            'url':       video_url,
-            'title':     title,
+            'id': video_id,
+            'url': video_url,
+            'title': title,
            'thumbnail': thumbnail,
+            'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
        }
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -1,22 +1,21 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from .ooyala import OoyalaIE


 class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'

    _TEST = {
-        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
-        u'info_dict': {
-            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
-            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
-        },
-        u'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            u'skip_download': True,
+        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'md5': '7bf08858ff7c203c870e8a6190e221e5',
+        'info_dict': {
+            'id': 'qurhIVlJSB6hzkVi229d8g',
+            'ext': 'flv',
+            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
+            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
        },
    }

@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        embed_code = self._search_regex(
-            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
-            'embed code')
-        return OoyalaIE._build_url_result(embed_code)
+        f4m_url = self._search_regex(
+            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
+            'f4m url')
+        title = re.sub(': Video$', '', self._og_search_title(webpage))
+
+        return {
+            'id': name.split('-')[-1],
+            'title': title,
+            'url': f4m_url,
+            'ext': 'flv',
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -4,39 +4,72 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)


 class BRIE(InfoExtractor):
-    IE_DESC = "Bayerischer Rundfunk Mediathek"
-    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
-    _BASE_URL = "http://www.br.de"
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
+    _BASE_URL = 'http://www.br.de'

    _TESTS = [
        {
-            "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
-            "md5": "c4f83cf0f023ba5875aba0bf46860df2",
-            "info_dict": {
-                "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
-                "ext": "mp4",
-                "title": "Feiern und Verzichten",
-                "description": "Anselm Grün: Feiern und Verzichten",
-                "uploader": "BR/Birgit Baier",
-                "upload_date": "20140301"
+            'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
+            'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
+            'info_dict': {
+                'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
+                'ext': 'mp4',
+                'title': 'Feiern und Verzichten',
+                'description': 'Anselm Grün: Feiern und Verzichten',
+                'uploader': 'BR/Birgit Baier',
+                'upload_date': '20140301',
            }
        },
        {
-            "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
-            "md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
-            "info_dict": {
-                "id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
-                "ext": "mp4",
-                "title": "Über den Pass",
-                "description": "Die Eroberung der Alpen: Über den Pass",
-                "uploader": None,
-                "upload_date": None
+            'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
+            'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
+            'info_dict': {
+                'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
+                'ext': 'mp4',
+                'title': 'Über den Pass',
+                'description': 'Die Eroberung der Alpen: Über den Pass',
            }
-        }
+        },
+        {
+            'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
+            'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
+            'info_dict': {
+                'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
+                'ext': 'aac',
+                'title': '"Keine neuen Schulden im nächsten Jahr"',
+                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+            }
+        },
+        {
+            'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
+            'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
+            'info_dict': {
+                'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
+                'ext': 'mp4',
+                'title': 'Umweltbewusster Häuslebauer',
+                'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+            }
+        },
+        {
+            'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
+            'md5': '23bca295f1650d698f94fc570977dae3',
+            'info_dict': {
+                'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
+                'ext': 'mp4',
+                'title': 'Folge 1 - Metaphysik',
+                'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'uploader': 'Eva Maria Steimle',
+                'upload_date': '20140117',
+            }
+        },
    ]

    def _real_extract(self, url):
@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
-            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
+            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
        xml = self._download_xml(self._BASE_URL + xml_url, None)

-        videos = []
-        for xml_video in xml.findall("video"):
-            video = {
-                "id": xml_video.get("externalId"),
-                "title": xml_video.find("title").text,
-                "formats": self._extract_formats(xml_video.find("assets")),
-                "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
-                "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
-                "webpage_url": xml_video.find("permalink").text
-            }
-            if xml_video.find("author").text:
-                video["uploader"] = xml_video.find("author").text
-            if xml_video.find("broadcastDate").text:
-                video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
-            videos.append(video)
+        medias = []

-        if len(videos) > 1:
+        for xml_media in xml.findall('video') + xml.findall('audio'):
+            media = {
+                'id': xml_media.get('externalId'),
+                'title': xml_media.find('title').text,
+                'formats': self._extract_formats(xml_media.find('assets')),
+                'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
+                'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
+                'webpage_url': xml_media.find('permalink').text
+            }
+            if xml_media.find('author').text:
+                media['uploader'] = xml_media.find('author').text
+            if xml_media.find('broadcastDate').text:
+                media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
+            medias.append(media)
+
+        if len(medias) > 1:
            self._downloader.report_warning(
-                'found multiple videos; please '
+                'found multiple medias; please '
                'report this with the video URL to http://yt-dl.org/bug')
-        if not videos:
-            raise ExtractorError('No video entries found')
-        return videos[0]
+        if not medias:
+            raise ExtractorError('No media entries found')
+        return medias[0]

    def _extract_formats(self, assets):
+
+        def text_or_none(asset, tag):
+            elem = asset.find(tag)
+            return None if elem is None else elem.text
+
        formats = [{
-            "url": asset.find("downloadUrl").text,
-            "ext": asset.find("mediaType").text,
-            "format_id": asset.get("type"),
-            "width": int(asset.find("frameWidth").text),
-            "height": int(asset.find("frameHeight").text),
-            "tbr": int(asset.find("bitrateVideo").text),
-            "abr": int(asset.find("bitrateAudio").text),
-            "vcodec": asset.find("codecVideo").text,
-            "container": asset.find("mediaType").text,
-            "filesize": int(asset.find("size").text),
-        } for asset in assets.findall("asset")
-            if asset.find("downloadUrl") is not None]
+            'url': text_or_none(asset, 'downloadUrl'),
+            'ext': text_or_none(asset, 'mediaType'),
+            'format_id': asset.get('type'),
+            'width': int_or_none(text_or_none(asset, 'frameWidth')),
+            'height': int_or_none(text_or_none(asset, 'frameHeight')),
+            'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
+            'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
+            'vcodec': text_or_none(asset, 'codecVideo'),
+            'acodec': text_or_none(asset, 'codecAudio'),
+            'container': text_or_none(asset, 'mediaType'),
+            'filesize': int_or_none(text_or_none(asset, 'size')),
+        } for asset in assets.findall('asset')
+            if asset.find('downloadUrl') is not None]

        self._sort_formats(formats)
        return formats

    def _extract_thumbnails(self, variants):
        thumbnails = [{
-            "url": self._BASE_URL + variant.find("url").text,
-            "width": int(variant.find("width").text),
-            "height": int(variant.find("height").text),
-        } for variant in variants.findall("variant")]
-        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
+            'url': self._BASE_URL + variant.find('url').text,
+            'width': int_or_none(variant.find('width').text),
+            'height': int_or_none(variant.find('height').text),
+        } for variant in variants.findall('variant')]
+        thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
        return thumbnails
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@ -27,9 +27,10 @@ class BreakIE(InfoExtractor):
            webpage, 'info json', flags=re.DOTALL)
        info = json.loads(info_json)
        video_url = info['videoUri']
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
-        if m_youtube is not None:
-            return self.url_result(m_youtube.group(1), 'Youtube')
+        youtube_id = info.get('youtubeId')
+        if youtube_id:
+            return self.url_result(youtube_id, 'Youtube')
+
        final_url = video_url + '?' + info['AuthToken']
        return {
            'id': video_id,
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = object_str.replace('<--', '<!--')
        object_str = fix_xml_ampersands(object_str)

-        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))

        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
        if fv_el is not None:
@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):

        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
        if url_m:
-            return [unescapeHTML(url_m.group(1))]
+            url = unescapeHTML(url_m.group(1))
+            # Some sites don't add it, we can't download with this url, for example:
+            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+            if 'playerKey' in url:
+                return [url]

        matches = re.findall(
            r'''(?sx)<object
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class BYUtvIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
+        'info_dict': {
+            'id': 'granite-flats-talking',
+            'ext': 'mp4',
+            'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
+            'title': 'Talking',
+            'thumbnail': 're:^https?://.*promo.*'
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+        episode_code = self._search_regex(
+            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
+        episode_json = re.sub(
+            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
+        ep = json.loads(episode_json)
+
+        if ep['providerType'] == 'Ooyala':
+            return {
+                '_type': 'url_transparent',
+                'ie_key': 'Ooyala',
+                'url': 'ooyala:%s' % ep['providerId'],
+                'id': video_id,
+                'title': ep['title'],
+                'description': ep.get('description'),
+                'thumbnail': ep.get('imageThumbnail'),
+            }
+        else:
+            raise ExtractorError('Unsupported provider %s' % ep['provider'])
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@ -2,39 +2,46 @@
 from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor


 class C56IE(InfoExtractor):
-    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
    IE_NAME = '56.com'
    _TEST = {
        'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        'file': '93440716.flv',
        'md5': 'e59995ac63d0457783ea05f93f12a866',
        'info_dict': {
+            'id': '93440716',
+            'ext': 'flv',
            'title': '网事知多少 第32期：车怒',
+            'duration': 283.813,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        text_id = mobj.group('textid')
-        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
-                                           text_id, 'Downloading video info')
-        info = json.loads(info_page)['info']
-        formats = [{
-            'format_id': f['type'],
-            'filesize': int(f['filesize']),
-            'url': f['url']
-        } for f in info['rfiles']]
+
+        page = self._download_json(
+            'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
+
+        info = page['info']
+
+        formats = [
+            {
+                'format_id': f['type'],
+                'filesize': int(f['filesize']),
+                'url': f['url']
+            } for f in info['rfiles']
+        ]
        self._sort_formats(formats)

        return {
            'id': info['vid'],
            'title': info['Subject'],
+            'duration': int(info['duration']) / 1000.0,
            'formats': formats,
            'thumbnail': info.get('bimg') or info.get('img'),
        }
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
        video_id = mobj.groupdict().get('id')
        if video_id is None:
            webpage = self._download_webpage(url, mobj.group('path'))
-            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
+            video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
        doc = self._download_xml(info_url,video_id, 
                                           u'Downloading video info')
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -0,0 +1,87 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class CBSNewsIE(InfoExtractor):
+    IE_DESC = 'CBS News'
+    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
+            'info_dict': {
+                'id': 'tesla-and-spacex-elon-musks-industrial-empire',
+                'ext': 'flv',
+                'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
+                'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
+                'duration': 791,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+            'info_dict': {
+                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
+                'ext': 'flv',
+                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
+                'duration': 205,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_info = json.loads(self._html_search_regex(
+            r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
+            webpage, 'video JSON info'))
+
+        item = video_info['item'] if 'item' in video_info else video_info
+        title = item.get('articleTitle') or item.get('hed')
+        duration = item.get('duration')
+        thumbnail = item.get('mediaImage') or item.get('thumbnail')
+
+        formats = []
+        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
+            uri = item.get('media' + format_id + 'URI')
+            if not uri:
+                continue
+            fmt = {
+                'url': uri,
+                'format_id': format_id,
+            }
+            if uri.startswith('rtmp'):
+                fmt.update({
+                    'app': 'ondemand?auth=cbs',
+                    'play_path': 'mp4:' + uri.split('<break>')[-1],
+                    'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
+                    'page_url': 'http://www.cbsnews.com',
+                    'ext': 'flv',
+                })
+            elif uri.endswith('.m3u8'):
+                fmt['ext'] = 'mp4'
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -9,12 +9,12 @@ from ..utils import (


 class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
+    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
    _TESTS = [
        {
            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
            'file': '19911.mp4',
-            'md5': 'fde81fbafaee331785f58cd6c0d46190',
+            'md5': '782f8504ca95a0eba8fc9177c373eec7',
            'info_dict': {
                'upload_date': '20121110',
                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor):
        {
            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
            'file': '521be8ef82b16.mp4',
-            'md5': 'd72f10cd39eac4215048f62ab477a511',
+            'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
            'info_dict': {
                'upload_date': '20131002',
                'title': 'The Mummy’s Hand (1940)',
@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')

-        webpage = self._download_webpage(url, None)  # Don't know video id yet
+        webpage = self._download_webpage(url, display_id)
        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
        if not mobj:
@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor):
        playerdata_url = mobj.group('embed_url')
        video_id = mobj.group('video_id')

-        video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
-            webpage, 'title')
-        video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
+        video_title = self._html_search_regex(
+            r'<title>(?P<title>.+?)\|', webpage, 'title')
+        video_description = self._html_search_regex(
+            r'<div class="entry-content">(?P<description>.+?)</div>',
            webpage, 'description', flags=re.DOTALL, fatal=False)
-        if len(video_description) == 0:
-            video_description = None

        playerdata = self._download_webpage(playerdata_url, video_id)

-        sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
+        sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
+        hd_url = self._html_search_regex(
+            r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
+            default=None)
        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)

-        formats = [
-            {
-                'url': sd_url,
-                'ext': 'mp4',
-                'format': 'sd',
-                'format_id': 'sd',
-            },
-            {
+        formats = [{
+            'url': sd_url,
+            'ext': 'mp4',
+            'format': 'sd',
+            'format_id': 'sd',
+            'quality': 1,
+        }]
+        if hd_url:
+            formats.append({
                'url': hd_url,
                'ext': 'mp4',
                'format': 'hd',
                'format_id': 'hd',
-            },
-        ]
+                'quality': 2,
+            })
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@ -1,22 +1,28 @@
+from __future__ import unicode_literals
+
 import re
 import time
 import xml.etree.ElementTree

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+)


 class ClipfishIE(InfoExtractor):
-    IE_NAME = u'clipfish'
+    IE_NAME = 'clipfish'

    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
    _TEST = {
-        u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        u'file': u'3966754.mp4',
-        u'md5': u'2521cd644e862936cf2e698206e47385',
-        u'info_dict': {
-            u'title': u'FIFA 14 - E3 2013 Trailer',
-            u'duration': 82,
+        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
+        'md5': '2521cd644e862936cf2e698206e47385',
+        'info_dict': {
+            'id': '3966754',
+            'ext': 'mp4',
+            'title': 'FIFA 14 - E3 2013 Trailer',
+            'duration': 82,
        },
        u'skip': 'Blocked in the US'
    }
@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
        video_url = doc.find('filename').text
        if video_url is None:
            xml_bytes = xml.etree.ElementTree.tostring(doc)
-            raise ExtractorError(u'Cannot find video URL in document %r' %
+            raise ExtractorError('Cannot find video URL in document %r' %
                                 xml_bytes)
        thumbnail = doc.find('imageurl').text
-        duration_str = doc.find('duration').text
-        m = re.match(
-            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
-            duration_str)
-        if m:
-            duration = (
-                (int(m.group('hours')) * 60 * 60) +
-                (int(m.group('minutes')) * 60) +
-                (int(m.group('seconds')))
-            )
-        else:
-            duration = None
+        duration = parse_duration(doc.find('duration').text)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'

    _TEST = {
-        u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
-        u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
-        u'info_dict': {
-            u'id': u'4629301',
-            u'ext': u'mp4',
-            u'title': u'Brick Briscoe',
-            u'duration': 612,
+        'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+        'md5': '4d7d549451bad625e0ff3d7bd56d776c',
+        'info_dict': {
+            'id': '4629301',
+            'ext': 'mp4',
+            'title': 'Brick Briscoe',
+            'duration': 612,
+            'thumbnail': 're:^https?://.+\.jpg',
        },
    }

@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
        video_id = mobj.group('id')
        js_player = self._download_webpage(
            'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
-            video_id, u'Downlaoding player')
+            video_id, 'Downlaoding player')
        # it includes a required token
-        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
+        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')

        pdoc = self._download_xml(
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
-            video_id, u'Downloading video info',
+            video_id, 'Downloading video info',
            transform_source=fix_xml_ampersands)

        track_doc = pdoc.find('trackList/track')
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class CNETIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
+    _TEST = {
+        'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
+        'md5': '041233212a0d06b179c87cbcca1577b8',
+        'info_dict': {
+            'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
+            'ext': 'mp4',
+            'title': 'Hands-on with Microsoft Windows 8.1 Update',
+            'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
+            'thumbnail': 're:^http://.*/flmswindows8.jpg$',
+            'uploader_id': 'sarah.mitroff@cbsinteractive.com',
+            'uploader': 'Sarah Mitroff',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+        data_json = self._html_search_regex(
+            r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
+            webpage, 'data json')
+        data = json.loads(data_json)
+        vdata = data['video']
+        if not vdata:
+            vdata = data['videos'][0]
+        if not vdata:
+            raise ExtractorError('Cannot find video data')
+
+        video_id = vdata['id']
+        title = vdata['headline']
+        description = vdata.get('dek')
+        thumbnail = vdata.get('image', {}).get('path')
+        author = vdata.get('author')
+        if author:
+            uploader = '%s %s' % (author['firstName'], author['lastName'])
+            uploader_id = author.get('email')
+        else:
+            uploader = None
+            uploader_id = None
+
+        formats = [{
+            'format_id': '%s-%s-%s' % (
+                f['type'], f['format'],
+                int_or_none(f.get('bitrate'), 1000, default='')),
+            'url': f['uri'],
+            'tbr': int_or_none(f.get('bitrate'), 1000),
+        } for f in vdata['files']['data']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
-
    ExtractorError,
+    float_or_none,
    unified_strdate,
 )

@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):

    _TEST = {
        'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': '4167875aae411f903b751a21f357f1ee',
+        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
        'info_dict': {
            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
            'ext': 'mp4',
@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor):


 class ComedyCentralShowsIE(InfoExtractor):
-    IE_DESC = 'The Daily Show / Colbert Report'
+    IE_DESC = 'The Daily Show / The Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
-                      |(https?://)?(www\.)?
-                          (?P<showname>thedailyshow|colbertnation)\.com/
-                         (full-episodes/(?P<episode>.*)|
+    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+                      |https?://(:www\.)?
+                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
+                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
+                          )|
                          (?P<interview>
-                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
-                     $"""
+                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
+                     (?:[?#].*|$)'''
    _TEST = {
-        'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
-        'file': '422212.mp4',
+        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
        'info_dict': {
-            "upload_date": "20121214",
-            "description": "Kristen Stewart",
-            "uploader": "thedailyshow",
-            "title": "thedailyshow-kristen-stewart part 1"
+            'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
+            'ext': 'mp4',
+            'upload_date': '20121213',
+            'description': 'Kristen Stewart learns to let loose in "On the Road."',
+            'uploader': 'thedailyshow',
+            'title': 'thedailyshow kristen-stewart part 1',
        }
    }

@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor):
        '400': (384, 216),
    }

-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
-
    @staticmethod
    def _transform_rtmp_url(rtmp_video_url):
        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor):

        if mobj.group('shortname'):
            if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                url = 'http://www.thedailyshow.com/full-episodes/'
+                url = 'http://thedailyshow.cc.com/full-episodes/'
            else:
-                url = 'http://www.colbertnation.com/full-episodes/'
+                url = 'http://thecolbertreport.cc.com/full-episodes/'
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            assert mobj is not None

        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
+                epTitle = mobj.group('videotitle')
+            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor):
                epTitle = mobj.group('showname')
            else:
                epTitle = mobj.group('episode')
+        show_name = mobj.group('showname')

-        self.report_extraction(epTitle)
-        webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
+        webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
        if dlNewest:
            url = htmlHandle.geturl()
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor):
                raise ExtractorError('Invalid redirected URL: ' + url)
            if mobj.group('episode') == '':
                raise ExtractorError('Redirected URL is still not specific: ' + url)
-            epTitle = mobj.group('episode')
+            epTitle = mobj.group('episode').rpartition('/')[-1]

        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
-
        if len(mMovieParams) == 0:
            # The Colbert Report embeds the information in a without
            # a URL prefix; so extract the alternate reference
            # and then add the URL prefix manually.

-            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
+            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
                raise ExtractorError('unable to find Flash URL in webpage ' + url)
            else:
                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]

        uri = mMovieParams[0][1]
-        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        idoc = self._download_xml(indexUrl, epTitle,
-                                          'Downloading show index',
-                                          'unable to download episode index')
+        # Correct cc.com in uri
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)

-        results = []
+        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
+        idoc = self._download_xml(
+            index_url, epTitle,
+            'Downloading show index', 'Unable to download episode index')

-        itemEls = idoc.findall('.//item')
-        for partNum,itemEl in enumerate(itemEls):
-            mediaId = itemEl.findall('./guid')[0].text
-            shortMediaId = mediaId.split(':')[-1]
-            showId = mediaId.split(':')[-2].replace('.com', '')
-            officialTitle = itemEl.findall('./title')[0].text
-            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
+        title = idoc.find('./channel/title').text
+        description = idoc.find('./channel/description').text

-            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
-                        compat_urllib_parse.urlencode({'uri': mediaId}))
-            cdoc = self._download_xml(configUrl, epTitle,
-                                               'Downloading configuration for %s' % shortMediaId)
+        entries = []
+        item_els = idoc.findall('.//item')
+        for part_num, itemEl in enumerate(item_els):
+            upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
+            thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
+
+            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
+            duration = float_or_none(content.attrib.get('duration'))
+            mediagen_url = content.attrib['url']
+            guid = itemEl.find('./guid').text.rpartition(':')[-1]
+
+            cdoc = self._download_xml(
+                mediagen_url, epTitle,
+                'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))

            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
                turls.append(finfo)

-            if len(turls) == 0:
-                self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
-                continue
-
            formats = []
            for format, rtmp_video_url in turls:
                w, h = self._video_dimensions.get(format, (None, None))
                formats.append({
+                    'format_id': 'vhttp-%s' % format,
                    'url': self._transform_rtmp_url(rtmp_video_url),
                    'ext': self._video_extensions.get(format, 'mp4'),
-                    'format_id': format,
                    'height': h,
                    'width': w,
                })
+                formats.append({
+                    'format_id': 'rtmp-%s' % format,
+                    'url': rtmp_video_url,
+                    'ext': self._video_extensions.get(format, 'mp4'),
+                    'height': h,
+                    'width': w,
+                })
+                self._sort_formats(formats)

-            effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
-            results.append({
-                'id': shortMediaId,
+            virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
+            entries.append({
+                'id': guid,
+                'title': virtual_id,
                'formats': formats,
-                'uploader': showId,
-                'upload_date': officialDate,
-                'title': effTitle,
-                'thumbnail': None,
-                'description': compat_str(officialTitle),
+                'uploader': show_name,
+                'upload_date': upload_date,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'description': description,
            })

-        return results
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': show_name + ' ' + title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -251,7 +251,21 @@ class InfoExtractor(object):
            with open(filename, 'wb') as outf:
                outf.write(webpage_bytes)

-        content = webpage_bytes.decode(encoding, 'replace')
+        try:
+            content = webpage_bytes.decode(encoding, 'replace')
+        except LookupError:
+            content = webpage_bytes.decode('utf-8', 'replace')
+
+        if (u'<title>Access to this site is blocked</title>' in content and
+                u'Websense' in content[:512]):
+            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
+            blocked_iframe = self._html_search_regex(
+                r'<iframe src="([^"]+)"', content,
+                u'Websense information URL', default=None)
+            if blocked_iframe:
+                msg += u' Visit %s for more details' % blocked_iframe
+            raise ExtractorError(msg, expected=True)
+
        return (content, urlh)

    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@ -4,6 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    int_or_none,
    unescapeHTML,
    find_xpath_attr,
 )
@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
        info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
        data = self._download_json(info_url, video_id)

-        url = unescapeHTML(data['video']['files'][0]['path']['#text'])
-
-        doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
+        doc = self._download_xml(
+            'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
            video_id)

-        def find_string(s):
-            return find_xpath_attr(doc, './/string', 'name', s).text
+        title = find_xpath_attr(doc, './/string', 'name', 'title').text
+        thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
+
+        files = data['video']['files']
+
+        entries = [{
+            'id': '%s_%d' % (video_id, partnum + 1),
+            'title': (
+                title if len(files) == 1 else
+                '%s part %d' % (title, partnum + 1)),
+            'url': unescapeHTML(f['path']['#text']),
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': int_or_none(f.get('length', {}).get('#text')),
+        } for partnum, f in enumerate(files)]

        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': title,
            'id': video_id,
-            'title': find_string('title'),
-            'url': url,
-            'description': description,
-            'thumbnail': find_string('poster'),
        }
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_str,
-    get_element_by_attribute,
    get_element_by_id,
    orderedSet,
    str_to_int,
@ -180,7 +179,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
    IE_NAME = u'dailymotion:playlist'
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
-    _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
+    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'

    def _extract_entries(self, id):
@ -190,10 +189,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
            webpage = self._download_webpage(request,
                                             id, u'Downloading page %s' % pagenum)

-            playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
-            video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
+            video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))

-            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
                break
        return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
                   for video_id in orderedSet(video_ids)]
@ -203,17 +201,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)

-        return {'_type': 'playlist',
-                'id': playlist_id,
-                'title': get_element_by_id(u'playlist_name', webpage),
-                'entries': self._extract_entries(playlist_id),
-                }
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': self._og_search_title(webpage),
+            'entries': self._extract_entries(playlist_id),
+        }


 class DailymotionUserIE(DailymotionPlaylistIE):
    IE_NAME = u'dailymotion:user'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
-    _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'

    def _real_extract(self, url):
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
    _TEST = {
        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'file': '614784.mp4',
        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
        'info_dict': {
+            'id': '614784',
+            'ext': 'mp4',
            'title': 'MythBusters: Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                ' each other -- to the point of confusing Jamie\'s dog -- and '
@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
        formats = []
        for f in info['mp4']:
            formats.append(
-                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})

        return {
            'id': info['contentId'],
--- a/youtube_dl/extractor/divxstage.py
+++ b/youtube_dl/extractor/divxstage.py
@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class DivxStageIE(NovaMovIE):
+    IE_NAME = 'divxstage'
+    IE_DESC = 'DivxStage'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
+
+    _HOST = 'www.divxstage.eu'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
+    _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
+        'md5': '63969f6eb26533a1968c4d325be63e72',
+        'info_dict': {
+            'id': '57f238e2e5e01',
+            'ext': 'flv',
+            'title': 'youtubedl test video',
+            'description': 'This is a test video for youtubedl.',
+        }
+    }
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@ -1,23 +1,25 @@
+from __future__ import unicode_literals
+
 import re

 from ..utils import (
    compat_urllib_parse,
-    determine_ext
 )
 from .common import InfoExtractor


 class EHowIE(InfoExtractor):
-    IE_NAME = u'eHow'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
+    IE_NAME = 'eHow'
+    _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
    _TEST = {
-        u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
-        u'file': u'12245069.flv',
-        u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
-        u'info_dict': {
-            u"title": u"Hardwood Flooring Basics",
-            u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
-   			u"uploader": u"Erick Nathan"
+        'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
+        'md5': '9809b4e3f115ae2088440bcb4efbf371',
+        'info_dict': {
+            'id': '12245069',
+            'ext': 'flv',
+            'title': 'Hardwood Flooring Basics',
+            'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
+            'uploader': 'Erick Nathan',
        }
    }

@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-            webpage, u'video URL')
-        final_url = compat_urllib_parse.unquote(video_url)        
-        uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
-            webpage, u'uploader')
+            webpage, 'video URL')
+        final_url = compat_urllib_parse.unquote(video_url)
+        uploader = self._html_search_meta('uploader', webpage)
        title = self._og_search_title(webpage).replace(' | eHow', '')
-        ext = determine_ext(final_url)

        return {
-            '_type':       'video',
-            'id':          video_id,
-            'url':         final_url,
-            'ext':         ext,
-            'title':       title,
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'id': video_id,
+            'url': final_url,
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
-            'uploader':    uploader,
+            'uploader': uploader,
        }
-
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -6,7 +6,6 @@ from .common import InfoExtractor


 class FirstpostIE(InfoExtractor):
-    IE_NAME = 'Firstpost.com'
    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'

    _TEST = {
@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
            'id': '1025403',
            'ext': 'mp4',
            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
-            'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
        }
    }

@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        webpage = self._download_webpage(url, video_id)
-        video_url = self._html_search_regex(
-            r'<div.*?name="div_video".*?flashvars="([^"]+)">',
-            webpage, 'video URL')
+        data = self._download_xml(
+            'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
+            'Downloading video XML')
+
+        item = data.find('./playlist/item')
+        thumbnail = item.find('./image').text
+        title = item.find('./title').text
+
+        formats = [
+            {
+                'url': details.find('./file').text,
+                'format_id': details.find('./label').text.strip(),
+                'width': int(details.find('./width').text.strip()),
+                'height': int(details.find('./height').text.strip()),
+            } for details in item.findall('./source/file_details') if details.find('./file').text
+        ]

        return {
            'id': video_id,
-            'url': video_url,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
        }
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@ -0,0 +1,77 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_parse_qs,
+    compat_urlparse,
+)
+
+
+class FranceCultureIE(InfoExtractor):
+    _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
+        'info_dict': {
+            'id': '4795174',
+            'ext': 'mp3',
+            'title': 'Rendez-vous au pays des geeks',
+            'vcodec': 'none',
+            'uploader': 'Colette Fellous',
+            'upload_date': '20140301',
+            'duration': 3601,
+            'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
+            'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        baseurl = mobj.group('baseurl')
+
+        webpage = self._download_webpage(url, video_id)
+        params_code = self._search_regex(
+            r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
+            webpage, 'parameter code')
+        params = compat_parse_qs(params_code)
+        video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
+
+        title = self._html_search_regex(
+            r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
+        uploader = self._html_search_regex(
+            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
+            webpage, 'uploader', fatal=False)
+        thumbnail_part = self._html_search_regex(
+            r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
+            'thumbnail', fatal=False)
+        if thumbnail_part is None:
+            thumbnail = None
+        else:
+            thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
+        description = self._html_search_regex(
+            r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
+
+        info = json.loads(params['infoData'][0])[0]
+        duration = info.get('media_length')
+        upload_date_candidate = info.get('media_section5')
+        upload_date = (
+            upload_date_candidate
+            if (upload_date_candidate is not None and
+                re.match(r'[0-9]{8}$', upload_date_candidate))
+            else None)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
+            'duration': duration,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -25,6 +25,7 @@ from ..utils import (
 from .brightcove import BrightcoveIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
+from .smotri import SmotriIE


 class GenericIE(InfoExtractor):
@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Brightcove'],
        },
+        {
+            'url': 'http://www.championat.com/video/football/v/87/87499.html',
+            'md5': 'fb973ecf6e4a78a67453647444222983',
+            'info_dict': {
+                'id': '3414141473001',
+                'ext': 'mp4',
+                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+                'uploader': 'Championat',
+            },
+        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@ -102,20 +114,6 @@ class GenericIE(InfoExtractor):
                'title': '2cc213299525360.mov',  # that's what we get
            },
        },
-        # second style of embedded ooyala videos
-        {
-            'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
-            'info_dict': {
-                'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
-                'ext': 'mp4',
-                'title': 'Behind-the-scenes: Financial Review Sunday ',
-                'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
-            },
-            'params': {
-                # m3u8 download
-                'skip_download': True,
-            },
-        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -186,6 +184,17 @@ class GenericIE(InfoExtractor):
                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
            }
        },
+        # Embeded Ustream video
+        {
+            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
+            'md5': '27b99cdb639c9b12a79bca876a073417',
+            'info_dict': {
+                'id': '45734260',
+                'ext': 'flv',
+                'uploader': 'AU SPA:  The NSA and Privacy',
+                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
+            }
+        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@ -197,6 +206,36 @@ class GenericIE(InfoExtractor):
                'description': 'No description',
            },
        },
+        # arte embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+            'md5': '7653032cbb25bf6c80d80f217055fa43',
+            'info_dict': {
+                'id': '048195-004_PLUS7-F',
+                'ext': 'flv',
+                'title': 'X:enius',
+                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+                'upload_date': '20140320',
+            },
+            'params': {
+                'skip_download': 'Requires rtmpdump'
+            }
+        },
+        # smotri embed
+        {
+            'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
+            'md5': 'ec40048448e9284c9a1de77bb188108b',
+            'info_dict': {
+                'id': 'v27008541fad',
+                'ext': 'mp4',
+                'title': 'Крым и Севастополь вошли в состав России',
+                'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
+                'duration': 900,
+                'upload_date': '20140318',
+                'uploader': 'rbctv_2012_4',
+                'uploader_id': 'rbctv_2012_4',
+            },
+        },
    ]

    def report_download_webpage(self, video_id):
@ -285,13 +324,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'

-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
+                    if default_search == 'auto_warning':
+                        self._downloader.report_warning(
+                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
@ -469,17 +511,18 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

-        # Look for embedded NovaMov player
+        # Look for embedded NovaMov-based player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
+            r'''(?x)<iframe[^>]+?src=(["\'])
+                    (?P<url>http://(?:(?:embed|www)\.)?
+                        (?:novamov\.com|
+                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
+                           videoweed\.(?:es|com)|
+                           movshare\.(?:net|sx|ag)|
+                           divxstage\.(?:eu|net|ch|co|at|ag))
+                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NovaMov')
-
-        # Look for embedded NowVideo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NowVideo')
+            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        mobj = re.search(
@ -525,6 +568,24 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

+        # Look for embedded Ustream videos
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Ustream')
+
+        # Look for embedded arte.tv player
+        mobj = re.search(
+            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+        # Look for embedded smotri.com player
+        smotri_url = SmotriIE._extract_url(webpage)
+        if smotri_url:
+            return self.url_result(smotri_url, 'Smotri')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):

    _TEST = {
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
-        'file': '52dd3e4b02a7602131000677.mp4',
        'md5': '55f5e8981c1c80a64706a44b74833de8',
        'info_dict': {
+            'id': '52dd3e4b02a7602131000677',
+            'ext': 'mp4',
            'title': 'Legalese It! with @MikeSacksHP',
            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
            'duration': 1549,
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -1,10 +1,8 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-)


 class IGNIE(InfoExtractor):
@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
    """

    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = u'ign.com'
+    IE_NAME = 'ign.com'

    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
-    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
-                       r'id="my_show_video">.*?<p>(.*?)</p>',
-                       ]
+    _DESCRIPTION_RE = [
+        r'<span class="page-object-description">(.+?)</span>',
+        r'id="my_show_video">.*?<p>(.*?)</p>',
+    ]

    _TESTS = [
        {
-            u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
-            u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
-            u'info_dict': {
-                u'title': u'The Last of Us Review',
-                u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
+            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
+            'info_dict': {
+                'id': '8f862beef863986b2785559b9e1aa599',
+                'ext': 'mp4',
+                'title': 'The Last of Us Review',
+                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
-            u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            u'playlist': [
+            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'playlist': [
                {
-                    u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
-                    u'info_dict': {
-                        u'title': u'GTA 5 Video Review',
-                        u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                    'info_dict': {
+                        'id': '5ebbd138523268b93c9141af17bec937',
+                        'ext': 'mp4',
+                        'title': 'GTA 5 Video Review',
+                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
-                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
-                    u'info_dict': {
-                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
-                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
+                    'info_dict': {
+                        'id': '638672ee848ae4ff108df2a296418ee2',
+                        'ext': 'mp4',
+                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
+                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
-            u'params': {
-                u'skip_download': True,
+            'params': {
+                'skip_download': True,
            },
        },
    ]

    def _find_video_id(self, webpage):
-        res_id = [r'data-video-id="(.+?)"',
-                  r'<object id="vid_(.+?)"',
-                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
-                  ]
+        res_id = [
+            r'data-video-id="(.+?)"',
+            r'<object id="vid_(.+?)"',
+            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+        ]
        return self._search_regex(res_id, webpage, 'video id')

    def _real_extract(self, url):
@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
-                                              webpage, 'video description',
-                                              flags=re.DOTALL)
+            webpage, 'video description', flags=re.DOTALL)
        result['description'] = description
        return result

    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
-        config = json.loads(self._download_webpage(config_url, video_id,
-                            u'Downloading video info'))
+        config = self._download_json(config_url, video_id)
        media = config['playlist']['media']
-        video_url = media['url']

-        return {'id': media['metadata']['videoId'],
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                'title': media['metadata']['title'],
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
-                }
+        return {
+            'id': media['metadata']['videoId'],
+            'url': media['url'],
+            'title': media['metadata']['title'],
+            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
+        }


 class OneUPIE(IGNIE):
-    """Extractor for 1up.com, it uses the ign videos system."""
-
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

    _TEST = {
-        u'url': u'http://gamevideos.1up.com/video/id/34976',
-        u'file': u'34976.mp4',
-        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
-        u'info_dict': {
-            u'title': u'Sniper Elite V2 - Trailer',
-            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+        'url': 'http://gamevideos.1up.com/video/id/34976',
+        'md5': '68a54ce4ebc772e4b71e3123d413163d',
+        'info_dict': {
+            'id': '34976',
+            'ext': 'mp4',
+            'title': 'Sniper Elite V2 - Trailer',
+            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }

@ -123,7 +122,6 @@ class OneUPIE(IGNIE):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group('name_or_id')
        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = id
+        result['id'] = mobj.group('name_or_id')
        return result
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@ -1,9 +1,12 @@
+from __future__ import unicode_literals
+
 import json
 import os
 import re

 from .common import InfoExtractor
 from ..utils import (
+    compat_str,
    ExtractorError,
    formatSeconds,
 )
@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
        /?(?:\#.*)?$
        """
    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = u'justin.tv'
+    IE_NAME = 'justin.tv'
+    IE_DESC = 'justin.tv and twitch.tv'
    _TEST = {
-        u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
-        u'file': u'296128360.flv',
-        u'md5': u'ecaa8a790c22a40770901460af191c9a',
-        u'info_dict': {
-            u"upload_date": u"20110927", 
-            u"uploader_id": 25114803, 
-            u"uploader": u"thegamedevhub", 
-            u"title": u"Beginner Series - Scripting With Python Pt.1"
+        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
+        'md5': 'ecaa8a790c22a40770901460af191c9a',
+        'info_dict': {
+            'id': '296128360',
+            'ext': 'flv',
+            'upload_date': '20110927',
+            'uploader_id': 25114803,
+            'uploader': 'thegamedevhub',
+            'title': 'Beginner Series - Scripting With Python Pt.1'
        }
    }

-    def report_download_page(self, channel, offset):
-        """Report attempt to download a single page of videos."""
-        self.to_screen(u'%s: Downloading video information from %d to %d' %
-                (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
-
    # Return count of items, list of *valid* items
    def _parse_page(self, url, video_id):
        info_json = self._download_webpage(url, video_id,
-                                           u'Downloading video info JSON',
-                                           u'unable to download video info JSON')
+                                           'Downloading video info JSON',
+                                           'unable to download video info JSON')

        response = json.loads(info_json)
        if type(response) != list:
            error_text = response.get('error', 'unknown error')
-            raise ExtractorError(u'Justin.tv API: %s' % error_text)
+            raise ExtractorError('Justin.tv API: %s' % error_text)
        info = []
        for clip in response:
            video_url = clip['video_file_url']
@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
                video_id = clip['id']
                video_title = clip.get('title', video_id)
                info.append({
-                    'id': video_id,
+                    'id': compat_str(video_id),
                    'url': video_url,
                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)

        api_base = 'http://api.justin.tv'
        paged = False
@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
            webpage = self._download_webpage(url, chapter_id)
            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
            if not m:
-                raise ExtractorError(u'Cannot find archive of a chapter')
+                raise ExtractorError('Cannot find archive of a chapter')
            archive_id = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(api, chapter_id,
-                                             note=u'Downloading chapter information',
-                                             errnote=u'Chapter information download failed')
+            doc = self._download_xml(
+                api, chapter_id,
+                note='Downloading chapter information',
+                errnote='Chapter information download failed')
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
            else:
-                raise ExtractorError(u'Could not find chapter in chapter information')
+                raise ExtractorError('Could not find chapter in chapter information')

            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or u'flv'
+            video_ext = video_url.rpartition('.')[2] or 'flv'

-            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
-                                   note='Downloading chapter metadata',
-                                   errnote='Download of chapter metadata failed')
-            chapter_info = json.loads(chapter_info_json)
+            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_info = self._download_json(
+                chapter_api_url, 'c' + chapter_id,
+                note='Downloading chapter metadata',
+                errnote='Download of chapter metadata failed')

            bracket_start = int(doc.find('.//bracket_start').text)
            bracket_end = int(doc.find('.//bracket_end').text)

            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += u'?start=' + TODO:start_timestamp
+            #video_url += '?start=' + TODO:start_timestamp
            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
-                                            u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))

            info = {
-                'id': u'c' + chapter_id,
+                'id': 'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
                'title': chapter_info['title'],
@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
                'uploader': chapter_info['channel']['display_name'],
                'uploader_id': chapter_info['channel']['name'],
            }
-            return [info]
+            return info
        else:
            video_id = mobj.group('videoid')
            api = api_base + '/broadcast/by_archive/%s.json' % video_id

-        self.report_extraction(video_id)
-
-        info = []
+        entries = []
        offset = 0
        limit = self._JUSTIN_PAGE_LIMIT
        while True:
@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
                self.report_download_page(video_id, offset)
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
            page_count, page_info = self._parse_page(page_url, video_id)
-            info.extend(page_info)
+            entries.extend(page_info)
            if not paged or page_count != limit:
                break
            offset += limit
-        return info
+        return {
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re

@ -11,22 +13,22 @@ from ..aes import (
    aes_decrypt_text
 )

+
 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
    _TEST = {
-        u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        u'file': u'1214711.mp4',
-        u'md5': u'6e297b7e789329923fcf83abb67c9289',
-        u'info_dict': {
-            u"title": u"Petite Asian Lady Mai Playing In Bathtub",
-            u"age_limit": 18,
+        'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
+        'file': '1214711.mp4',
+        'md5': '6e297b7e789329923fcf83abb67c9289',
+        'info_dict': {
+            'title': 'Petite Asian Lady Mai Playing In Bathtub',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor):
            embedded_url = mobj.group(1)
            return self.url_result(embedded_url)

-        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
-        if webpage.find('encrypted=true')!=-1:
-            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, u'password')
+        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
+        if 'encrypted=true' in webpage:
+            password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@ -1,37 +1,39 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor


 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
    _TEST = {
-        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
-        u"file": u"1404461844.mp4",
-        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
-        u"info_dict": {
-            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
+        'md5': 'c81addca81327ffa66c642b5d8b08cab',
+        'info_dict': {
+            'id': '1404461844',
+            'ext': 'mp4',
+            'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
+            'description': 'A unique motocross documentary that examines the '
+                'life and mind of one of sports most elite athletes: Josh Grant.',
        },
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        webpage_src = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)

-        video_url = self._search_regex(r'data-video="(.*?)">',
-            webpage_src, u'video URL')
-        if 'mp4' in video_url:
-            ext = 'mp4'
-        else:
-            ext = 'flv'
-        video_title = self._html_search_regex(r"<title>(.*?)</title>",
-            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
+        video_url = self._search_regex(r'data-video-url="(.*?)"',
+            webpage, 'video URL')
+        video_title = self._html_search_regex(r'<title>(.*?)</title>',
+            webpage, 'title').rpartition('— Kickstarter')[0].strip()

-        results = [{
-                    'id': video_id,
-                    'url': video_url,
-                    'title': video_title,
-                    'ext': ext,
-                    }]
-        return results
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):

    _TEST = {
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
-        'file': '3698222.mp4',
        'info_dict': {
+            'id': '3698222',
+            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@ -14,7 +14,7 @@ from ..utils import (
 class MooshareIE(InfoExtractor):
    IE_NAME = 'mooshare'
    IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
+    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'

    _TESTS = [
        {
--- a/youtube_dl/extractor/morningstar.py
+++ b/youtube_dl/extractor/morningstar.py
@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MorningstarIE(InfoExtractor):
+    IE_DESC = 'morningstar.com'
+    _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
+        'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
+        'info_dict': {
+            'id': '615869',
+            'ext': 'mp4',
+            'title': 'Get Ahead of the Curve on 2013 Taxes',
+            'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
+            'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
+        video_url = self._html_search_regex(
+            r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
+            webpage, 'video URL')
+        thumbnail = self._html_search_regex(
+            r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+        description = self._html_search_regex(
+            r'<div id="mstarDeck".*?>(.*?)</div>',
+            webpage, 'description', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@ -0,0 +1,63 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_parse_qs,
+    compat_str,
+    int_or_none,
+)
+
+
+class MotorsportIE(InfoExtractor):
+    IE_DESC = 'motorsport.com'
+    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
+    _TEST = {
+        'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
+        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
+        'info_dict': {
+            'id': '7063',
+            'ext': 'mp4',
+            'title': 'Red Bull Racing: 2014 Rules Explained',
+            'duration': 207,
+            'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
+            'uploader': 'rainiere',
+            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+        flashvars_code = self._html_search_regex(
+            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
+        flashvars = compat_parse_qs(flashvars_code)
+        params = json.loads(flashvars['parameters'][0])
+
+        e = compat_str(int(time.time()) + 24 * 60 * 60)
+        base_video_url = params['location'] + '?e=' + e
+        s = 'h3hg713fh32'
+        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
+        video_url = base_video_url + '&h=' + h
+
+        uploader = self._html_search_regex(
+            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
+            'uploader', fatal=False)
+
+        return {
+            'id': params['video_id'],
+            'display_id': display_id,
+            'title': params['title'],
+            'url': video_url,
+            'description': params.get('description'),
+            'thumbnail': params.get('main_thumb'),
+            'duration': int_or_none(params.get('duration')),
+            'uploader': uploader,
+        }
--- a/youtube_dl/extractor/movshare.py
+++ b/youtube_dl/extractor/movshare.py
@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class MovShareIE(NovaMovIE):
+    IE_NAME = 'movshare'
+    IE_DESC = 'MovShare'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
+
+    _HOST = 'www.movshare.net'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
+    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.movshare.net/video/559e28be54d96',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': '559e28be54d96',
+            'ext': 'flv',
+            'title': 'dissapeared image',
+            'description': 'optical illusion  dissapeared image  magic illusion',
+        }
+    }
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@ -4,9 +4,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-)
+from ..utils import int_or_none


 class MporaIE(InfoExtractor):
@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
        'info_dict': {
            'title': 'Katy Curd -  Winter in the Forest',
            'duration': 416,
-            'uploader': 'petenewman',
+            'uploader': 'Peter Newman Media',
        },
    }

--- a/youtube_dl/extractor/musicplayon.py
+++ b/youtube_dl/extractor/musicplayon.py
@ -0,0 +1,75 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class MusicPlayOnIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://en.musicplayon.com/play?v=433377',
+        'info_dict': {
+            'id': '433377',
+            'ext': 'mp4',
+            'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
+            'description': 'Rick Ross Interview On Chelsea Lately',
+            'duration': 342,
+            'uploader': 'ultrafish',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(page)
+        description = self._og_search_description(page)
+        thumbnail = self._og_search_thumbnail(page)
+        duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
+        view_count = self._og_search_property('count', page, fatal=False)
+        uploader = self._html_search_regex(
+            r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
+
+        formats = [
+            {
+                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
+                'ext': 'mp4',
+            }
+        ]
+
+        manifest = self._download_webpage(
+            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
+
+        for entry in manifest.split('#')[1:]:
+            if entry.startswith('EXT-X-STREAM-INF:'):
+                meta, url, _ = entry.split('\n')
+                params = dict(param.split('=') for param in meta.split(',')[1:])
+                formats.append({
+                    'url': url,
+                    'ext': 'mp4',
+                    'tbr': int(params['BANDWIDTH']),
+                    'width': int(params['RESOLUTION'].split('x')[1]),
+                    'height': int(params['RESOLUTION'].split('x')[-1]),
+                    'format_note': params['NAME'].replace('"', '').strip(),
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'duration': int_or_none(duration),
+            'view_count': int_or_none(view_count),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@ -6,12 +6,13 @@ from .common import InfoExtractor


 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'file': u'0021200253-okc-bkn-recap.nba.mp4',
        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
+            'id': '0021200253-okc-bkn-recap.nba',
+            'ext': 'mp4',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
        },
@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
        return {
            'id': shortened_video_id,
            'url': video_url,
-            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -1,12 +1,10 @@
 # encoding: utf-8
+from __future__ import unicode_literals

 import re
-import socket

 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
@ -18,57 +16,54 @@ from ..utils import (


 class NiconicoIE(InfoExtractor):
-    IE_NAME = u'niconico'
-    IE_DESC = u'ニコニコ動画'
+    IE_NAME = 'niconico'
+    IE_DESC = 'ニコニコ動画'

    _TEST = {
-        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
-        u'file': u'sm22312215.mp4',
-        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
-        u'info_dict': {
-            u'title': u'Big Buck Bunny',
-            u'uploader': u'takuya0301',
-            u'uploader_id': u'2698420',
-            u'upload_date': u'20131123',
-            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+        'url': 'http://www.nicovideo.jp/watch/sm22312215',
+        'md5': 'd1a75c0823e2f629128c43e1212760f9',
+        'info_dict': {
+            'id': 'sm22312215',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny',
+            'uploader': 'takuya0301',
+            'uploader_id': '2698420',
+            'upload_date': '20131123',
+            'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
        },
-        u'params': {
-            u'username': u'ydl.niconico@gmail.com',
-            u'password': u'youtube-dl',
+        'params': {
+            'username': 'ydl.niconico@gmail.com',
+            'password': 'youtube-dl',
        },
    }

    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
    _NETRC_MACHINE = 'niconico'
-    # If True it will raise an error if no login info is provided
-    _LOGIN_REQUIRED = True

    def _real_initialize(self):
        self._login()

    def _login(self):
        (username, password) = self._get_login_info()
-        # No authentication to be performed
        if username is None:
-            if self._LOGIN_REQUIRED:
-                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
-            return False
+            # Login is required
+            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

        # Log in
        login_form_strs = {
-            u'mail': username,
-            u'password': password,
+            'mail': username,
+            'password': password,
        }
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
        request = compat_urllib_request.Request(
-            u'https://secure.nicovideo.jp/secure/login', login_data)
+            'https://secure.nicovideo.jp/secure/login', login_data)
        login_results = self._download_webpage(
-            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+            request, None, note='Logging in', errnote='Unable to log in')
        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
-            self._downloader.report_warning(u'unable to log in: bad username or password')
+            self._downloader.report_warning('unable to log in: bad username or password')
            return False
        return True

@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):

        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
-            note=u'Downloading video info page')
+            note='Downloading video info page')

        # Get flv info
        flv_info_webpage = self._download_webpage(
-            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
-            video_id, u'Downloading flv info')
+            'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            video_id, 'Downloading flv info')
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]

        # Start extracting information
@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
            user_info = self._download_xml(
-                url, video_id, note=u'Downloading user information')
+                url, video_id, note='Downloading user information')
            video_uploader = user_info.find('.//nickname').text
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+        except ExtractorError as err:
+            self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))

        return {
-            'id':          video_id,
-            'url':         video_real_url,
-            'title':       video_title,
-            'ext':         video_extension,
-            'format':      video_format,
-            'thumbnail':   video_thumbnail,
+            'id': video_id,
+            'url': video_real_url,
+            'title': video_title,
+            'ext': video_extension,
+            'format': video_format,
+            'thumbnail': video_thumbnail,
            'description': video_description,
-            'uploader':    video_uploader,
+            'uploader': video_uploader,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
-            'view_count':  video_view_count,
+            'view_count': video_view_count,
            'webpage_url': video_webpage_url,
        }
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@ -7,9 +7,14 @@ from .common import InfoExtractor

 class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
-    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+    _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
+        (?:
+            v/(?P<numid>[0-9]+)|
+            p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
+        )
+    '''

-    _TEST = {
+    _TESTS = [{
        "url": "http://9gag.tv/v/1912",
        "info_dict": {
            "id": "1912",
@ -20,17 +25,33 @@ class NineGagIE(InfoExtractor):
            "thumbnail": "re:^https?://",
        },
        'add_ie': ['Youtube']
-    }
+    },
+    {
+        'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
+        'info_dict': {
+            'id': 'KklwM',
+            'ext': 'mp4',
+            'display_id': 'alternate-banned-opening-scene-of-gravity',
+            "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
+            'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
+        },
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('numid') or mobj.group('id')
+        display_id = mobj.group('display_id') or video_id

-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)

        youtube_id = self._html_search_regex(
            r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
            webpage, 'video ID')
+        title = self._html_search_regex(
+            r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
+            webpage, 'title', default=None)
+        if not title:
+            title = self._og_search_title(webpage)
        description = self._html_search_regex(
            r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
            'description', fatal=False)
@ -46,7 +67,8 @@ class NineGagIE(InfoExtractor):
            'url': youtube_id,
            'ie_key': 'Youtube',
            'id': video_id,
-            'title': self._og_search_title(webpage),
+            'display_id': display_id,
+            'title': title,
            'description': description,
            'view_count': view_count,
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
+    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+    _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}

    _HOST = 'www.novamov.com'

@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')

        page = self._download_webpage(
            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')

        if re.search(self._FILE_DELETED_REGEX, page) is not None:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')

        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
-
        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)

        api_response = self._download_webpage(
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}

    _HOST = 'www.nowvideo.ch'

--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unescapeHTML
+)
+
+
+class NTVIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.ntv.ru/novosti/863142/',
+            'info_dict': {
+                'id': '746000',
+                'ext': 'flv',
+                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'duration': 136,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/video/novosti/750370/',
+            'info_dict': {
+                'id': '750370',
+                'ext': 'flv',
+                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'duration': 172,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+            'info_dict': {
+                'id': '747480',
+                'ext': 'flv',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'duration': 1496,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/kino/Koma_film',
+            'info_dict': {
+                'id': '758100',
+                'ext': 'flv',
+                'title': 'Остросюжетный фильм «Кома»',
+                'description': 'Остросюжетный фильм «Кома»',
+                'duration': 5592,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+        {
+            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+            'info_dict': {
+                'id': '751482',
+                'ext': 'flv',
+                'title': '«Дело врачей»: «Деревце жизни»',
+                'description': '«Дело врачей»: «Деревце жизни»',
+                'duration': 2590,
+            },
+            'params': {
+                    # rtmp download
+                    'skip_download': True,
+                },
+        },
+    ]
+
+    _VIDEO_ID_REGEXES = [
+        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
+        r'<video embed=[^>]+><id>(\d+)</id>',
+        r'<video restriction[^>]+><key>(\d+)</key>'
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id, 'Downloading page')
+
+        for pattern in self._VIDEO_ID_REGEXES:
+            mobj = re.search(pattern, page)
+            if mobj:
+                break
+
+        if not mobj:
+            raise ExtractorError('No media links available for %s' % video_id)
+
+        video_id = mobj.group(1)
+
+        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
+        title = unescapeHTML(player.find('./data/title').text)
+        description = unescapeHTML(player.find('./data/description').text)
+
+        video = player.find('./data/video')
+        video_id = video.find('./id').text
+        thumbnail = video.find('./splash').text
+        duration = int(video.find('./totaltime').text)
+        view_count = int(video.find('./views').text)
+        puid22 = video.find('./puid22').text
+
+        apps = {
+            '4': 'video1',
+            '7': 'video2',
+        }
+
+        app = apps[puid22] if puid22 in apps else apps['4']
+
+        formats = []
+        for format_id in ['', 'hi', 'webm']:
+            file = video.find('./%sfile' % format_id)
+            if file is None:
+                continue
+            size = video.find('./%ssize' % format_id)
+            formats.append({
+                'url': 'rtmp://media.ntv.ru/%s' % app,
+                'app': app,
+                'play_path': file.text,
+                'rtmp_conn': 'B:1',
+                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
+                'page_url': 'http://www.ntv.ru',
+                'flash_ver': 'LNX 11,2,202,341',
+                'rtmp_live': True,
+                'ext': 'flv',
+                'filesize': int(size.text),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import calendar
+import datetime
+import re
+
+from .common import InfoExtractor
+
+# audios on oe1.orf.at are only available for 7 days, so we can't
+# add tests.
+
+
+class OE1IE(InfoExtractor):
+    IE_DESC = 'oe1.orf.at'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -7,7 +7,7 @@ from ..utils import unescapeHTML


 class OoyalaIE(InfoExtractor):
-    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
+    _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'

    _TEST = {
        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -1,44 +1,81 @@
 from __future__ import unicode_literals

 import re
+import json

 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..utils import int_or_none


 class PornHdIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
+    _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
-        'file': '1962.flv',
-        'md5': '35272469887dca97abd30abecc6cdf75',
+        'md5': '956b8ca569f7f4d8ec563e2c41598441',
        'info_dict': {
-            "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
-            "age_limit": 18,
+            'id': '1962',
+            'ext': 'mp4',
+            'title': 'Sierra loves doing laundry',
+            'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('video_id')
-        video_title = mobj.group('video_title')
+        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

-        next_url = self._html_search_regex(
-            r'&hd=(http.+?)&', webpage, 'video URL')
-        next_url = compat_urllib_parse.unquote(next_url)
+        title = self._og_search_title(webpage)
+        TITLE_SUFFIX = ' porn HD Video | PornHD.com '
+        if title.endswith(TITLE_SUFFIX):
+            title = title[:-len(TITLE_SUFFIX)]

-        video_url = self._download_webpage(
-            next_url, video_id, note='Retrieving video URL',
-            errnote='Could not retrieve video URL')
-        age_limit = 18
+        description = self._html_search_regex(
+            r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
+        view_count = int_or_none(self._html_search_regex(
+            r'(\d+) views 	</span>', webpage, 'view count', fatal=False))
+
+        formats = [
+            {
+                'url': format_url,
+                'ext': format.lower(),
+                'format_id': '%s-%s' % (format.lower(), quality.lower()),
+                'quality': 1 if quality.lower() == 'high' else 0,
+            } for format, quality, format_url in re.findall(
+                r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
+        ]
+
+        mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
+        if mobj:
+            flashvars = json.loads(mobj.group('flashvars'))
+            formats.extend([
+                {
+                    'url': flashvars['hashlink'].replace('?noProxy=1', ''),
+                    'ext': 'flv',
+                    'format_id': 'flv-low',
+                    'quality': 0,
+                },
+                {
+                    'url': flashvars['hd'].replace('?noProxy=1', ''),
+                    'ext': 'flv',
+                    'format_id': 'flv-high',
+                    'quality': 1,
+                }
+            ])
+            thumbnail = flashvars['urlWallpaper']
+        else:
+            thumbnail = self._og_search_thumbnail(webpage)
+
+        self._sort_formats(formats)

        return {
            'id': video_id,
-            'url': video_url,
-            'ext': 'flv',
-            'title': video_title,
-            'age_limit': age_limit,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'formats': formats,
+            'age_limit': 18,
        }
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
+        r'clipId=(\d+)',
    ]
    _TITLE_REGEXES = [
        r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import os

@ -5,45 +7,51 @@ from .common import InfoExtractor


 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'file': u'24_4WWkSmNo.mp4',
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
-        u'info_dict': {
-            u"title": u"Become a logging expert in 30 minutes",
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
-            u"upload_date": u"20130320",
-            u"uploader": u"NextDayVideo",
-            u"uploader_id": u"NextDayVideo",
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+
+    _TESTS = [
+        {
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'info_dict': {
+                'id': '24_4WWkSmNo',
+                'ext': 'mp4',
+                'title': 'Become a logging expert in 30 minutes',
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
+                'upload_date': '20130320',
+                'uploader': 'NextDayVideo',
+                'uploader_id': 'NextDayVideo',
+            },
+            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
-    },
-    {
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
-        u'info_dict': {
-            u'id': u'2542',
-            u'ext': u'm4v',
-            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        {
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+            'info_dict': {
+                'id': '2542',
+                'ext': 'm4v',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
+            },
        },
-    },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)

+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
-            webpage, u'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+        title = self._html_search_regex(
+            r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+            webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, 'video url', flags=re.DOTALL)
+
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
        'md5': '03af18b73a07b4088753930db7a34add',
        'info_dict': {
            "title": "Luati-le Banii sez 4 ep 1",
-            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+            "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
        }
    }

--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dl/extractor/roxwel.py
@ -1,5 +1,6 @@
+from __future__ import unicode_literals
+
 import re
-import json

 from .common import InfoExtractor
 from ..utils import unified_strdate, determine_ext
@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'

    _TEST = {
-        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
-        u'file': u'passionpittakeawalklive.flv',
-        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
-        u'info_dict': {
-            u'title': u'Take A Walk (live)',
-            u'uploader': u'Passion Pit',
-            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+        'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
+        'info_dict': {
+            'id': 'passionpittakeawalklive',
+            'ext': 'flv',
+            'title': 'Take A Walk (live)',
+            'uploader': 'Passion Pit',
+            'uploader_id': 'passionpit',
+            'upload_date': '20120928',
+            'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
        },
-        u'skip': u'Requires rtmpdump',
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        filename = mobj.group('filename')
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
-        info_page = self._download_webpage(info_url, filename,
-                                           u'Downloading video info')
+        info = self._download_json(info_url, filename)

-        self.report_extraction(filename)
-        info = json.loads(info_page)
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
        best_rate = rtmp_rates[-1]
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
-        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
+        rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
        ext = determine_ext(rtmp_url)
        if ext == 'f4v':
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)

-        return {'id': filename,
-                'title': info['title'],
-                'url': rtmp_url,
-                'ext': 'flv',
-                'description': info['description'],
-                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
-                'uploader': info['artist'],
-                'uploader_id': info['artistname'],
-                'upload_date': unified_strdate(info['dbdate']),
-                }
+        return {
+            'id': filename,
+            'title': info['title'],
+            'url': rtmp_url,
+            'ext': 'flv',
+            'description': info['description'],
+            'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+            'uploader': info['artist'],
+            'uploader_id': info['artistname'],
+            'upload_date': unified_strdate(info['dbdate']),
+        }
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@ -9,45 +9,136 @@ from ..utils import (
    parse_duration,
    parse_iso8601,
    unescapeHTML,
+    compat_str,
 )


 class RTSIE(InfoExtractor):
    IE_DESC = 'RTS.ch'
-    _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
+    _VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'

-    _TEST = {
-        'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
-        'md5': '753b877968ad8afaeddccc374d4256a5',
-        'info_dict': {
-            'id': '3449373',
-            'ext': 'mp4',
-            'duration': 1488,
-            'title': 'Les Enfants Terribles',
-            'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
-            'uploader': 'Divers',
-            'upload_date': '19680921',
-            'timestamp': -40280400,
+    _TESTS = [
+        {
+            'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
+            'md5': '753b877968ad8afaeddccc374d4256a5',
+            'info_dict': {
+                'id': '3449373',
+                'ext': 'mp4',
+                'duration': 1488,
+                'title': 'Les Enfants Terribles',
+                'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
+                'uploader': 'Divers',
+                'upload_date': '19680921',
+                'timestamp': -40280400,
+                'thumbnail': 're:^https?://.*\.image'
+            },
        },
-    }
+        {
+            'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
+            'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
+            'info_dict': {
+                'id': '5624067',
+                'ext': 'mp4',
+                'duration': 3720,
+                'title': 'Les yeux dans les cieux - Mon homard au Canada',
+                'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
+                'uploader': 'Passe-moi les jumelles',
+                'upload_date': '20140404',
+                'timestamp': 1396635300,
+                'thumbnail': 're:^https?://.*\.image'
+            },
+        },
+        {
+            'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
+            'md5': 'b4326fecd3eb64a458ba73c73e91299d',
+            'info_dict': {
+                'id': '5745975',
+                'ext': 'mp4',
+                'duration': 48,
+                'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
+                'description': 'Hockey - Playoff',
+                'uploader': 'Hockey',
+                'upload_date': '20140403',
+                'timestamp': 1396556882,
+                'thumbnail': 're:^https?://.*\.image'
+            },
+            'skip': 'Blocked outside Switzerland',
+        },
+        {
+            'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
+            'md5': '9bb06503773c07ce83d3cbd793cebb91',
+            'info_dict': {
+                'id': '5745356',
+                'ext': 'mp4',
+                'duration': 33,
+                'title': 'Londres cachée par un épais smog',
+                'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
+                'uploader': 'Le Journal en continu',
+                'upload_date': '20140403',
+                'timestamp': 1396537322,
+                'thumbnail': 're:^https?://.*\.image'
+            },
+        },
+        {
+            'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
+            'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
+            'info_dict': {
+                'id': '5706148',
+                'ext': 'mp3',
+                'duration': 123,
+                'title': '"Urban Hippie", de Damien Krisl',
+                'description': 'Des Hippies super glam.',
+                'upload_date': '20140403',
+                'timestamp': 1396551600,
+            },
+        },
+    ]

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')

-        all_info = self._download_json(
-            'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
-        info = all_info['video']['JSONinfo']
+        def download_json(internal_id):
+            return self._download_json(
+                'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
+                video_id)
+
+        all_info = download_json(video_id)
+
+        # video_id extracted out of URL is not always a real id
+        if 'video' not in all_info and 'audio' not in all_info:
+            page = self._download_webpage(url, video_id)
+            internal_id = self._html_search_regex(
+                r'<(?:video|audio) data-id="([0-9]+)"', page,
+                'internal video id')
+            all_info = download_json(internal_id)
+
+        info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']

        upload_timestamp = parse_iso8601(info.get('broadcast_date'))
-        duration = parse_duration(info.get('duration'))
+        duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
+        if isinstance(duration, compat_str):
+            duration = parse_duration(duration)
+        view_count = info.get('plays')
        thumbnail = unescapeHTML(info.get('preview_image_url'))
+
+        def extract_bitrate(url):
+            return int_or_none(self._search_regex(
+                r'-([0-9]+)k\.', url, 'bitrate', default=None))
+
        formats = [{
            'format_id': fid,
            'url': furl,
-            'tbr': int_or_none(self._search_regex(
-                r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
+            'tbr': extract_bitrate(furl),
        } for fid, furl in info['streams'].items()]
+
+        if 'media' in info:
+            formats.extend([{
+                'format_id': '%s-%sk' % (media['ext'], media['rate']),
+                'url': 'http://download-video.rts.ch/%s' % media['url'],
+                'tbr': media['rate'] or extract_bitrate(media['url']),
+            } for media in info['media'] if media.get('rate')])
+
        self._sort_formats(formats)

        return {
@ -56,6 +147,8 @@ class RTSIE(InfoExtractor):
            'title': info['title'],
            'description': info.get('intro'),
            'duration': duration,
+            'view_count': view_count,
            'uploader': info.get('programName'),
            'timestamp': upload_timestamp,
+            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -0,0 +1,84 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+    struct_unpack,
+)
+
+
+class RTVEALaCartaIE(InfoExtractor):
+    IE_NAME = 'rtve.es:alacarta'
+    IE_DESC = 'RTVE a la carta'
+    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
+        'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
+        'info_dict': {
+            'id': '2491869',
+            'ext': 'mp4',
+            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+        },
+    }
+
+    def _decrypt_url(self, png):
+        encrypted_data = base64.b64decode(png)
+        text_index = encrypted_data.find(b'tEXt')
+        text_chunk = encrypted_data[text_index-4:]
+        length = struct_unpack('!I', text_chunk[:4])[0]
+        # Use bytearray to get integers when iterating in both python 2.x and 3.x
+        data = bytearray(text_chunk[8:8+length])
+        data = [chr(b) for b in data if b != 0]
+        hash_index = data.index('#')
+        alphabet_data = data[:hash_index]
+        url_data = data[hash_index+1:]
+
+        alphabet = []
+        e = 0
+        d = 0
+        for l in alphabet_data:
+            if d == 0:
+                alphabet.append(l)
+                d = e = (e + 1) % 4
+            else:
+                d -= 1
+        url = ''
+        f = 0
+        e = 3
+        b = 1
+        for letter in url_data:
+            if f == 0:
+                l = int(letter)*10
+                f = 1
+            else:
+                if e == 0:
+                    l += int(letter)
+                    url += alphabet[l]
+                    e = (b + 3) % 4
+                    f = 0
+                    b += 1
+                else:
+                    e -= 1
+
+        return url
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info = self._download_json(
+            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+            video_id)['page']['items'][0]
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = self._decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'url': video_url,
+            'thumbnail': info['image'],
+        }
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@ -2,7 +2,6 @@
 from __future__ import unicode_literals

 import re
-import json
 import itertools

 from .common import InfoExtractor
@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):

    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
-        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
+            'id': '3eac3b4561676c17df9132a9a1e62e3e',
+            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        
-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
-                                              video_id, 'Downloading video JSON')
-        video = json.loads(api_response)
-        
-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-                                              video_id, 'Downloading trackinfo JSON')
-        trackinfo = json.loads(api_response)
-        
+
+        video = self._download_json(
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
+            video_id, 'Downloading video JSON')
+
+        trackinfo = self._download_json(
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+            video_id, 'Downloading trackinfo JSON')
+
        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
        m3u8_url = trackinfo['video_balancer'].get('m3u8')
@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
    def _extract_videos(self, channel_id, channel_title=None):
        entries = []
        for pagenum in itertools.count(1):
-            api_response = self._download_webpage(
+            page = self._download_json(
                self._PAGE_TEMPLATE % (channel_id, pagenum),
                channel_id, 'Downloading page %s' % pagenum)
-            page = json.loads(api_response)
            results = page['results']
            if not results:
                break
@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie_id = mobj.group('id')
-        api_response = self._download_webpage(
+        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
-        movie = json.loads(api_response)
        movie_name = movie['name']
        return self._extract_videos(movie_id, movie_name)

--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@ -1,24 +0,0 @@
-import re
-
-from .common import InfoExtractor
-
-
-class SlashdotIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
-
-    _TEST = {
-        u'add_ie': ['Ooyala'],
-        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
-        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
-        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
-        u'info_dict': {
-            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
-        return self.url_result(ooyala_url, 'Ooyala')
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        description = self._html_search_regex(
-            r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
+            r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+            'description', fatal=False)

        return {
            '_type': 'video',
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -13,22 +13,24 @@ from ..utils import (
    compat_urllib_request,
    ExtractorError,
    url_basename,
+    int_or_none,
 )


 class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
+    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'

    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
-            'file': 'v261036632ab.mp4',
            'md5': '2a7b08249e6f5636557579c368040eb9',
            'info_dict': {
+                'id': 'v261036632ab',
+                'ext': 'mp4',
                'title': 'катастрофа с камер видеонаблюдения',
                'uploader': 'rbc2008',
                'uploader_id': 'rbc08',
@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
        # real video id 57591
        {
            'url': 'http://smotri.com/video/view/?id=v57591cb20',
-            'file': 'v57591cb20.flv',
            'md5': '830266dfc21f077eac5afd1883091bcd',
            'info_dict': {
+                'id': 'v57591cb20',
+                'ext': 'flv',
                'title': 'test',
                'uploader': 'Support Photofile@photofile',
                'uploader_id': 'support-photofile',
@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
        # video-password
        {
            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
-            'file': 'v1390466a13c.mp4',
            'md5': 'f6331cef33cad65a0815ee482a54440b',
            'info_dict': {
+                'id': 'v1390466a13c',
+                'ext': 'mp4',
                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
                'uploader': 'timoxa40',
                'uploader_id': 'timoxa40',
@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
        # age limit + video-password
        {
            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
-            'file': 'v15408898bcf.flv',
            'md5': '91e909c9f0521adf5ee86fbe073aad70',
            'info_dict': {
+                'id': 'v15408898bcf',
+                'ext': 'flv',
                'title': 'этот ролик не покажут по ТВ',
                'uploader': 'zzxxx',
                'uploader_id': 'ueggb',
@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
            'params': {
                'videopassword': '333'
            }
-        }
+        },
+        # swf player
+        {
+            'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
+            'md5': '4d47034979d9390d14acdf59c4935bc2',
+            'info_dict': {
+                'id': 'v9188090500',
+                'ext': 'mp4',
+                'title': 'Shakira - Don\'t Bother',
+                'uploader': 'HannahL',
+                'uploader_id': 'lisaha95',
+                'upload_date': '20090331',
+                'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
+                'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
+            },
+        },
    ]

    _SUCCESS = 0
@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
    _PASSWORD_DETECTED = 2
    _VIDEO_NOT_FOUND = 3

+    @classmethod
+    def _extract_url(cls, webpage):
+        mobj = re.search(
+            r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
+            webpage)
+        if mobj is not None:
+            return mobj.group('url')
+
+        mobj = re.search(
+            r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
+                    <div\s+class="video_image">[^<]+</div>\s*
+                    <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
+        if mobj is not None:
+            return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
+
    def _search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):

        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
-        video_page_url = 'http://' + mobj.group('url')
+        video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
        video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')

        # Warning if video is unavailable
@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'duration': video_duration,
-            'view_count': video_view_count,
+            'view_count': int_or_none(video_view_count),
            'age_limit': 18 if adult_content else 0,
            'video_page_url': video_page_url
        }
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -9,8 +9,18 @@ from ..utils import (


 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
-    _TEST = {
+    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+    _TESTS = [
+    {
+        'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+        'file': '80187.mp4',
+        'md5': '3f7746aa0dc86de18df7539903d399ea',
+        'info_dict': {
+            'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+            'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+        }
+    },
+    {
        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
        'file': '19705.mp4',
        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@ -19,22 +29,23 @@ class TeamcocoIE(InfoExtractor):
            "title": "Louis C.K. Interview Pt. 1 11/3/11"
        }
    }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        url_title = mobj.group('url_title')
-        webpage = self._download_webpage(url, url_title)

-        video_id = self._html_search_regex(
-            r'<article class="video" data-id="(\d+?)"',
-            webpage, 'video id')
-
-        self.report_extraction(video_id)
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
+        
+        video_id = mobj.group("video_id")
+        if not video_id:
+            video_id = self._html_search_regex(
+                r'<article class="video" data-id="(\d+?)"',
+                webpage, 'video id')

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_xml(data_url, video_id, 'Downloading data webpage')
+        data = self._download_xml(
+            data_url, display_id, 'Downloading data webpage')

        qualities = ['500k', '480p', '1000k', '720p', '1080p']
        formats = []
@ -69,6 +80,7 @@ class TeamcocoIE(InfoExtractor):

        return {
            'id': video_id,
+            'display_id': display_id,
            'formats': formats,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
            |
            ((?P<type_talk>talks)) # We have a simple talk
+            |
+            (?P<type_watch>watch)/[^/]+/[^/]+
        )
        (/lang/(.*?))? # The url may contain the language
-        /(?P<name>\w+) # Here goes the name and then ".html"
+        /(?P<name>[\w-]+) # Here goes the name and then ".html"
        .*)$
        '''
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
        'info_dict': {
@ -35,13 +37,24 @@ class TEDIE(SubtitlesInfoExtractor):
                'consciousness, but that half the time our brains are '
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
+            'width': 854,
        }
-    }
+    }, {
+        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
+        'md5': '226f4fb9c62380d11b7995efa4c87994',
+        'info_dict': {
+            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
+            'ext': 'mp4',
+            'title': 'Vishal Sikka: The beauty and power of algorithms',
+            'thumbnail': 're:^https?://.+\.jpg',
+            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
+        }
+    }]

-    _FORMATS_PREFERENCE = {
-        'low': 1,
-        'medium': 2,
-        'high': 3,
+    _NATIVE_FORMATS = {
+        'low': {'preference': 1, 'width': 320, 'height': 180},
+        'medium': {'preference': 2, 'width': 512, 'height': 288},
+        'high': {'preference': 3, 'width': 854, 'height': 480},
    }

    def _extract_info(self, webpage):
@ -57,6 +70,8 @@ class TEDIE(SubtitlesInfoExtractor):
        name = m.group('name')
        if m.group('type_talk'):
            return self._talk_info(url, name)
+        elif m.group('type_watch'):
+            return self._watch_info(url, name)
        else:
            return self._playlist_videos_info(url, name)

@ -84,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
        talk_info = self._extract_info(webpage)['talks'][0]

        formats = [{
-            'ext': 'mp4',
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
-            'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
+        for f in formats:
+            finfo = self._NATIVE_FORMATS.get(f['format_id'])
+            if finfo:
+                f.update(finfo)
        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])
@ -123,3 +140,26 @@ class TEDIE(SubtitlesInfoExtractor):
        else:
            self._downloader.report_warning(u'video doesn\'t have subtitles')
            return {}
+
+    def _watch_info(self, url, name):
+        webpage = self._download_webpage(url, name)
+
+        config_json = self._html_search_regex(
+            r"data-config='([^']+)", webpage, 'config')
+        config = json.loads(config_json)
+        video_url = config['video']['url']
+        thumbnail = config.get('image', {}).get('url')
+
+        title = self._html_search_regex(
+            r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+            webpage, 'description', fatal=False)
+
+        return {
+            'id': name,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@ -1,33 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor

+
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
    _TEST = {
-        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
-        u'file': u'10635995.mp4',
-        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
-        u'info_dict': {
-            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
-            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        'info_dict': {
+            'id': '10635995',
+            'ext': 'mp4',
+            'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group(1)
-        webpage = self._download_webpage(url, id)
-        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
-                                webpage, 'embed url')
-        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        embed_url = self._html_search_regex(
+            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+        embed_page = self._download_webpage(embed_url, video_id,
+            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
-        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
-        wat_info = json.loads(wat_info)['media']
-        wat_url = wat_info['url']
-        return self.url_result(wat_url, 'Wat')
+        wat_info = self._download_json(
+            'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
+        return self.url_result(wat_info['media']['url'], 'Wat')
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -1,63 +1,83 @@
-import os
+from __future__ import unicode_literals
+
+import json
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
+    int_or_none,
+    str_to_int,
 )
-from ..aes import (
-    aes_decrypt_text
-)
+from ..aes import aes_decrypt_text
+

 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
+    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
-        u'file': u'229795.mp4',
-        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
-        u'info_dict': {
-            u"description": u"hot teen Kasia grinding", 
-            u"uploader": u"unknown", 
-            u"title": u"Kasia music video",
-            u"age_limit": 18,
+        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+        'file': '229795.mp4',
+        'md5': 'e9e0b0c86734e5e3766e653509475db0',
+        'info_dict': {
+            'description': 'hot teen Kasia grinding',
+            'uploader': 'unknown',
+            'title': 'Kasia music video',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title')
-        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
-        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
-        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
-        if thumbnail:
-            thumbnail = thumbnail.replace('\\/', '/')
+        flashvars = json.loads(self._html_search_regex(
+            r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))

-        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
-        if webpage.find('"encrypted":true')!=-1:
-            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
-            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        video_url = flashvars['video_url']
+        if flashvars.get('encrypted') is True:
+            video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[4].split('_')[:2]
-        format = "-".join(format)
+        format_id = '-'.join(path.split('/')[4].split('_')[:2])
+
+        thumbnail = flashvars.get('image_url')
+
+        title = self._html_search_regex(
+            r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
+        description = self._html_search_regex(
+            r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
+        uploader = self._html_search_regex(
+            r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
+            webpage, 'uploader', fatal=False)
+
+        like_count = int_or_none(self._html_search_regex(
+            r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
+        dislike_count = int_or_none(self._html_search_regex(
+            r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
+        view_count = self._html_search_regex(
+            r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
+        if view_count:
+            view_count = str_to_int(view_count)
+        comment_count = self._html_search_regex(
+            r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
+        if comment_count:
+            comment_count = str_to_int(comment_count)

        return {
            'id': video_id,
-            'uploader': video_uploader,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'description': video_description,
            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'format_id': format_id,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/urort.py
+++ b/youtube_dl/extractor/urort.py
@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    unified_strdate,
+)
+
+
+class UrortIE(InfoExtractor):
+    IE_DESC = 'NRK P3 Urørt'
+    _VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
+
+    _TEST = {
+        'url': 'https://urort.p3.no/#!/Band/Gerilja',
+        'md5': '5ed31a924be8a05e47812678a86e127b',
+        'info_dict': {
+            'id': '33124-4',
+            'ext': 'mp3',
+            'title': 'The Bomb',
+            'thumbnail': 're:^https?://.+\.jpg',
+            'like_count': int,
+            'uploader': 'Gerilja',
+            'uploader_id': 'Gerilja',
+            'upload_date': '20100323',
+        },
+        'params': {
+            'matchtitle': '^The Bomb$',  # To test, we want just one video
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+        json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
+        songs = self._download_json(json_url, playlist_id)
+        print(songs[0])
+
+        entries = [{
+            'id': '%d-%s' % (s['BandId'], s['$id']),
+            'title': s['Title'],
+            'url': s['TrackUrl'],
+            'ext': 'mp3',
+            'uploader_id': playlist_id,
+            'uploader': s.get('BandName', playlist_id),
+            'like_count': s.get('LikeCount'),
+            'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
+            'upload_date': unified_strdate(s.get('Released')),
+        } for s in songs]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@ -11,7 +11,7 @@ from ..utils import (


 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
    IE_NAME = 'ustream'
    _TEST = {
        'url': 'http://www.ustream.tv/recorded/20274954',
@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
+        if m.group('type') == 'embed':
+            video_id = m.group('videoID')
+            webpage = self._download_webpage(url, video_id)
+            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
+            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
+            return self.url_result(desktop_url, 'Ustream')
+
        video_id = m.group('videoID')

        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -4,26 +4,99 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import compat_urllib_request
+from ..utils import (
+    compat_urllib_request,
+    int_or_none,
+)


 class VeohIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)'
+    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'

-    _TEST = {
-        'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
-        'file': '56314296.mp4',
-        'md5': '620e68e6a3cff80086df3348426c9ca3',
-        'info_dict': {
-            'title': 'Straight Backs Are Stronger',
-            'uploader': 'LUMOback',
-            'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
+    _TESTS = [
+        {
+            'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+            'md5': '620e68e6a3cff80086df3348426c9ca3',
+            'info_dict': {
+                'id': '56314296',
+                'ext': 'mp4',
+                'title': 'Straight Backs Are Stronger',
+                'uploader': 'LUMOback',
+                'description': 'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
+            },
+        },
+        {
+            'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+            'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+            'info_dict': {
+                'id': '27701988',
+                'ext': 'mp4',
+                'title': 'Chile workers cover up to avoid skin damage',
+                'description': 'md5:2bd151625a60a32822873efc246ba20d',
+                'uploader': 'afp-news',
+                'duration': 123,
+            },
+        },
+        {
+            'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+            'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+            'note': 'Embedded ooyala video',
+            'info_dict': {
+                'id': '69525809',
+                'ext': 'mp4',
+                'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+                'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+                'uploader': 'newsy-videos',
+            },
+        },
+    ]
+
+    def _extract_formats(self, source):
+        formats = []
+        link = source.get('aowPermalink')
+        if link:
+            formats.append({
+                'url': link,
+                'ext': 'mp4',
+                'format_id': 'aow',
+            })
+        link = source.get('fullPreviewHashLowPath')
+        if link:
+            formats.append({
+                'url': link,
+                'format_id': 'low',
+            })
+        link = source.get('fullPreviewHashHighPath')
+        if link:
+            formats.append({
+                'url': link,
+                'format_id': 'high',
+            })
+        return formats
+
+    def _extract_video(self, source):
+        return {
+            'id': source.get('videoId'),
+            'title': source.get('title'),
+            'description': source.get('description'),
+            'thumbnail': source.get('highResImage') or source.get('medResImage'),
+            'uploader': source.get('username'),
+            'duration': int_or_none(source.get('length')),
+            'view_count': int_or_none(source.get('views')),
+            'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0,
+            'formats': self._extract_formats(source),
        }
-    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
+
+        if video_id.startswith('v'):
+            rsp = self._download_xml(
+                r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
+            if rsp.get('stat') == 'ok':
+                return self._extract_video(rsp.find('./videoList/video'))
+
        webpage = self._download_webpage(url, video_id)
        age_limit = 0
        if 'class="adultwarning-container"' in webpage:
@ -33,24 +106,16 @@ class VeohIE(InfoExtractor):
            request.add_header('Cookie', 'confirmedAdult=true')
            webpage = self._download_webpage(request, video_id)

-        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
+        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
        if m_youtube is not None:
            youtube_id = m_youtube.group(1)
            self.to_screen('%s: detected Youtube video.' % video_id)
            return self.url_result(youtube_id, 'Youtube')

-        self.report_extraction(video_id)
-        info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
-        info = json.loads(info)
-        video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
+        info = json.loads(
+            self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))

-        return {
-            'id': info['videoId'],
-            'title': info['title'],
-            'url': video_url,
-            'uploader': info['username'],
-            'thumbnail': info.get('highResImage') or info.get('medResImage'),
-            'description': info['description'],
-            'view_count': info['views'],
-            'age_limit': age_limit,
-        }
+        video = self._extract_video(info)
+        video['age_limit'] = age_limit
+
+        return video
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@ -1,38 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..utils import ExtractorError
-
-
-class ViceIE(InfoExtractor):
-    _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
-
-    _TEST = {
-        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
-        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
-        u'info_dict': {
-            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
-        },
-        u'params': {
-            # Requires ffmpeg (m3u8 manifest)
-            u'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        webpage = self._download_webpage(url, name)
-        try:
-            ooyala_url = self._og_search_video_url(webpage)
-        except ExtractorError:
-            try:
-                embed_code = self._search_regex(
-                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
-                    u'ooyala embed code')
-                ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
-            except ExtractorError:
-                raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
-        return self.url_result(ooyala_url, ie='Ooyala')
-
--- a/youtube_dl/extractor/videoweed.py
+++ b/youtube_dl/extractor/videoweed.py
@ -0,0 +1,26 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class VideoWeedIE(NovaMovIE):
+    IE_NAME = 'videoweed'
+    IE_DESC = 'VideoWeed'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+
+    _HOST = 'www.videoweed.es'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
+
+    _TEST = {
+        'url': 'http://www.videoweed.es/file/b42178afbea14',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': 'b42178afbea14',
+            'ext': 'flv',
+            'title': 'optical illusion  dissapeared image magic illusion',
+            'description': ''
+        },
+    }
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@ -16,7 +16,7 @@ from ..utils import (

 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'

    _TESTS = [
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@ -0,0 +1,103 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    strip_jsonp,
+)
+
+
+class WashingtonPostIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    _TEST = {
+        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'playlist': [{
+            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+            'info_dict': {
+                'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
+                'ext': 'mp4',
+                'title': 'Breaking Points: The Paper Mine',
+                'duration': 1287,
+                'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
+                'uploader': 'The Washington Post',
+                'timestamp': 1395527908,
+                'upload_date': '20140322',
+            },
+        }, {
+            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+            'info_dict': {
+                'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
+                'ext': 'mp4',
+                'title': 'The town bureaucracy sustains',
+                'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
+                'duration': 2217,
+                'timestamp': 1395528005,
+                'upload_date': '20140322',
+                'uploader': 'The Washington Post',
+            },
+        }]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, page_id)
+        title = self._og_search_title(webpage)
+        uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
+        entries = []
+        for i, uuid in enumerate(uuids, start=1):
+            vinfo_all = self._download_json(
+                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
+                page_id,
+                transform_source=strip_jsonp,
+                note='Downloading information of video %d/%d' % (i, len(uuids))
+            )
+            vinfo = vinfo_all[0]['contentConfig']
+            uploader = vinfo.get('credits', {}).get('source')
+            timestamp = int_or_none(
+                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
+
+            formats = [{
+                'format_id': (
+                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
+                    if s.get('width')
+                    else s.get('type')),
+                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
+                'width': s.get('width'),
+                'height': s.get('height'),
+                'acodec': s.get('audioCodec'),
+                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
+                'filesize': s.get('fileSize'),
+                'url': s.get('url'),
+                'ext': 'mp4',
+                'protocol': {
+                    'MP4': 'http',
+                    'F4F': 'f4m',
+                }.get(s.get('type'))
+            } for s in vinfo.get('streams', [])]
+            source_media_url = vinfo.get('sourceMediaURL')
+            if source_media_url:
+                formats.append({
+                    'format_id': 'source_media',
+                    'url': source_media_url,
+                })
+            self._sort_formats(formats)
+            entries.append({
+                'id': uuid,
+                'title': vinfo['title'],
+                'description': vinfo.get('blurb'),
+                'uploader': uploader,
+                'formats': formats,
+                'duration': int_or_none(vinfo.get('videoDuration'), 100),
+                'timestamp': timestamp,
+            })
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': page_id,
+            'title': title,
+        }
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -1,37 +1,37 @@
 # coding: utf-8
+from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
-
 from ..utils import (
    unified_strdate,
 )


 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
-        u'file': u'10631273.mp4',
-        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
-        u'info_dict': {
-            u'title': u'World War Z - Philadelphia VOST',
-            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'info_dict': {
+            'id': '10631273',
+            'ext': 'mp4',
+            'title': 'World War Z - Philadelphia VOST',
+            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
        },
-        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
-    
+
    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
-        info = json.loads(info)
+        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
        return info['media']

-
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])

+        upload_date = None
+        if 'date_diffusion' in first_chapter:
+            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
-        info = {'id': real_id,
-                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-                'ext': 'mp4',
-                'title': first_chapter['title'],
-                'thumbnail': first_chapter['preview'],
-                'description': first_chapter['description'],
-                'view_count': video_info['views'],
-                }
-        if 'date_diffusion' in first_chapter:
-            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
-
-        return info
+        return {
+            'id': real_id,
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'title': first_chapter['title'],
+            'thumbnail': first_chapter['preview'],
+            'description': first_chapter['description'],
+            'view_count': video_info['views'],
+            'upload_date': upload_date,
+        }
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -4,9 +4,10 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    compat_parse_qs,
    compat_urlparse,
    determine_ext,
+    unified_strdate,
 )


@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
-        }
+        }
+
+
+class WDRMausIE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
+    IE_DESC = 'Sendung mit der Maus'
+    _TESTS = [{
+        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+        'info_dict': {
+            'id': 'aktuelle-sendung',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': 're:^[0-9]{8}$',
+            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
+        }
+    }, {
+        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
+        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
+        'info_dict': {
+            'id': '40_jahre_maus',
+            'ext': 'mp4',
+            'thumbnail': 're:^http://.+\.jpg',
+            'upload_date': '20131007',
+            'title': '12.03.2011 - 40 Jahre Maus',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        param_code = self._html_search_regex(
+            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
+
+        title_date = self._search_regex(
+            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
+            webpage, 'air date')
+        title_str = self._html_search_regex(
+            r'<h1>(.*?)</h1>', webpage, 'title')
+        title = '%s - %s' % (title_date, title_str)
+        upload_date = unified_strdate(
+            self._html_search_meta('dc.date', webpage))
+
+        fields = compat_parse_qs(param_code)
+        video_url = fields['firstVideo'][0]
+        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
+
+        formats = [{
+            'format_id': 'rtmp',
+            'url': video_url,
+        }]
+
+        jscode = self._download_webpage(
+            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
+            video_id, fatal=False,
+            note='Downloading URL translation table',
+            errnote='Could not download URL translation table')
+        if jscode:
+            for m in re.finditer(
+                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
+                    jscode):
+                if video_url.startswith(m.group('stream')):
+                    http_url = video_url.replace(
+                        m.group('stream'), m.group('dl'))
+                    formats.append({
+                        'format_id': 'http',
+                        'url': http_url,
+                    })
+                    break
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+        }
+
+# TODO test _1
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@ -1,10 +1,11 @@
 # coding: utf-8
+from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor

+
 class WeiboIE(InfoExtractor):
    """
    The videos in Weibo come from different sites, this IE just finds the link
@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'

    _TEST = {
-        u'add_ie': ['Sina'],
-        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
-        u'file': u'98322879.flv',
-        u'info_dict': {
-            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        'info_dict': {
+            'id': '98322879',
+            'ext': 'flv',
+            'title': '魔声耳机最新广告“All Eyes On Us”',
        },
-        u'note': u'Sina video',
-        u'params': {
-            u'skip_download': True,
+        'params': {
+            'skip_download': True,
        },
+        'add_ie': ['Sina'],
    }

    # Additional example videos from different sites
@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        video_id = mobj.group('id')
        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
-        info_page = self._download_webpage(info_url, video_id)
-        info = json.loads(info_page)
+        info = self._download_json(info_url, video_id)

        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
-        #Prefer sina video since they have thumbnails
-        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        # Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
        player_url = videos_urls[-1]
-        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
+            player_url)
        if m_sina is not None:
            self.to_screen('Sina video detected')
            sina_id = m_sina.group(1)
            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
        return self.url_result(player_url)
-
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@ -3,11 +3,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from .youtube import YoutubeIE


 class WimpIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.wimp.com/maruexhausted/',
        'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
        'info_dict': {
@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
            'title': 'Maru is exhausted.',
            'description': 'md5:57e099e857c0a4ea312542b684a869b8',
        }
-    }
+    }, {
+        # youtube video
+        'url': 'http://www.wimp.com/clowncar/',
+        'info_dict': {
+            'id': 'cG4CEr2aiSg',
+            'ext': 'mp4',
+            'title': 'Basset hound clown car...incredible!',
+            'description': 'md5:8d228485e0719898c017203f900b3a35',
+            'uploader': 'Gretchen Hoey',
+            'uploader_id': 'gretchenandjeff1',
+            'upload_date': '20140303',
+        },
+        'add_ie': ['Youtube'],
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
            r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
+        if YoutubeIE.suitable(video_url):
+            self.to_screen('Found YouTube video')
+            return {
+                '_type': 'url',
+                'url': video_url,
+                'ie_key': YoutubeIE.ie_key(),
+            }

        return {
            'id': video_id,
@ -31,4 +52,4 @@ class WimpIE(InfoExtractor):
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
-        }
+        }
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -15,22 +15,24 @@ from ..utils import (

 class YahooIE(InfoExtractor):
    IE_DESC = 'Yahoo screen'
-    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
+    _VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
    _TESTS = [
        {
            'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-            'file': '214727115.mp4',
            'md5': '4962b075c08be8690a922ee026d05e69',
            'info_dict': {
+                'id': '214727115',
+                'ext': 'mp4',
                'title': 'Julian Smith & Travis Legg Watch Julian Smith',
                'description': 'Julian and Travis watch Julian Smith',
            },
        },
        {
            'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
-            'file': '103000935.mp4',
            'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
            'info_dict': {
+                'id': '103000935',
+                'ext': 'mp4',
                'title': 'Codefellas - The Cougar Lies with Spanish Moss',
                'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
            },
@ -60,10 +62,9 @@ class YahooIE(InfoExtractor):
            'env': 'prod',
            'format': 'json',
        })
-        query_result_json = self._download_webpage(
+        query_result = self._download_json(
            'http://video.query.yahoo.com/v1/public/yql?' + data,
            video_id, 'Downloading video info')
-        query_result = json.loads(query_result_json)
        info = query_result['query']['results']['mediaObj'][0]
        meta = info['meta']

@ -86,7 +87,6 @@ class YahooIE(InfoExtractor):
            else:
                format_url = compat_urlparse.urljoin(host, path)
                format_info['url'] = format_url
-                
            formats.append(format_info)

        self._sort_formats(formats)
@ -134,27 +134,25 @@ class YahooSearchIE(SearchInfoExtractor):

    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
-
-        res = {
-            '_type': 'playlist',
-            'id': query,
-            'entries': []
-        }
-        for pagenum in itertools.count(0): 
+        entries = []
+        for pagenum in itertools.count(0):
            result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
-            webpage = self._download_webpage(result_url, query,
-                                             note='Downloading results page '+str(pagenum+1))
-            info = json.loads(webpage)
+            info = self._download_json(result_url, query,
+                note='Downloading results page '+str(pagenum+1))
            m = info['m']
            results = info['results']

            for (i, r) in enumerate(results):
-                if (pagenum * 30) +i >= n:
+                if (pagenum * 30) + i >= n:
                    break
                mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
                e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
-                res['entries'].append(e)
-            if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):
+                entries.append(e)
+            if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
                break

-        return res
+        return {
+            '_type': 'playlist',
+            'id': query,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -7,13 +7,13 @@ import itertools
 import json
 import os.path
 import re
-import string
 import struct
 import traceback
 import zlib

 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
+from ..jsinterp import JSInterpreter
 from ..utils import (
    compat_chr,
    compat_parse_qs,
@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             )
                         ))
                         |youtu\.be/                                          # just youtu.be/xxxx
+                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
@ -438,113 +439,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _parse_sig_js(self, jscode):
        funcname = self._search_regex(
            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
+             u'Initial JS player signature function name')

-        functions = {}
-
-        def argidx(varname):
-            return string.lowercase.index(varname)
-
-        def interpret_statement(stmt, local_vars, allow_recursion=20):
-            if allow_recursion < 0:
-                raise ExtractorError(u'Recursion limit reached')
-
-            if stmt.startswith(u'var '):
-                stmt = stmt[len(u'var '):]
-            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                             r'=(?P<expr>.*)$', stmt)
-            if ass_m:
-                if ass_m.groupdict().get('index'):
-                    def assign(val):
-                        lvar = local_vars[ass_m.group('out')]
-                        idx = interpret_expression(ass_m.group('index'),
-                                                   local_vars, allow_recursion)
-                        assert isinstance(idx, int)
-                        lvar[idx] = val
-                        return val
-                    expr = ass_m.group('expr')
-                else:
-                    def assign(val):
-                        local_vars[ass_m.group('out')] = val
-                        return val
-                    expr = ass_m.group('expr')
-            elif stmt.startswith(u'return '):
-                assign = lambda v: v
-                expr = stmt[len(u'return '):]
-            else:
-                raise ExtractorError(
-                    u'Cannot determine left side of statement in %r' % stmt)
-
-            v = interpret_expression(expr, local_vars, allow_recursion)
-            return assign(v)
-
-        def interpret_expression(expr, local_vars, allow_recursion):
-            if expr.isdigit():
-                return int(expr)
-
-            if expr.isalpha():
-                return local_vars[expr]
-
-            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
-            if m:
-                member = m.group('member')
-                val = local_vars[m.group('in')]
-                if member == 'split("")':
-                    return list(val)
-                if member == 'join("")':
-                    return u''.join(val)
-                if member == 'length':
-                    return len(val)
-                if member == 'reverse()':
-                    return val[::-1]
-                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-                if slice_m:
-                    idx = interpret_expression(
-                        slice_m.group('idx'), local_vars, allow_recursion-1)
-                    return val[idx:]
-
-            m = re.match(
-                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
-            if m:
-                val = local_vars[m.group('in')]
-                idx = interpret_expression(m.group('idx'), local_vars,
-                                           allow_recursion-1)
-                return val[idx]
-
-            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-            if m:
-                a = interpret_expression(m.group('a'),
-                                         local_vars, allow_recursion)
-                b = interpret_expression(m.group('b'),
-                                         local_vars, allow_recursion)
-                return a % b
-
-            m = re.match(
-                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
-            if m:
-                fname = m.group('func')
-                if fname not in functions:
-                    functions[fname] = extract_function(fname)
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in m.group('args').split(',')]
-                return functions[fname](argvals)
-            raise ExtractorError(u'Unsupported JS expression %r' % expr)
-
-        def extract_function(funcname):
-            func_m = re.search(
-                r'function ' + re.escape(funcname) +
-                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
-                jscode)
-            argnames = func_m.group('args').split(',')
-
-            def resf(args):
-                local_vars = dict(zip(argnames, args))
-                for stmt in func_m.group('code').split(';'):
-                    res = interpret_statement(stmt, local_vars)
-                return res
-            return resf
-
-        initial_function = extract_function(funcname)
+        jsi = JSInterpreter(jscode)
+        initial_function = jsi.extract_function(funcname)
        return lambda s: initial_function([s])

    def _parse_sig_swf(self, file_contents):
@ -1549,12 +1447,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break

            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']

        playlist_title = self._html_search_regex(
-                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
+            r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
+            page, u'title')

        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)
@ -1712,7 +1613,7 @@ class YoutubeUserIE(InfoExtractor):

 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@ -1723,9 +1624,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
+        PAGE_SIZE = 50

-        while (50 * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+        while (PAGE_SIZE * pagenum) < limit:
+            result_url = self._API_URL % (
+                compat_urllib_parse.quote_plus(query.encode('utf-8')),
+                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@ -1836,11 +1740,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        feed_entries = []
        paging = 0
        for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
-            info = json.loads(info)
-            feed_html = info['feed_html']
+            feed_html = info.get('feed_html') or info.get('content_html')
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(
@ -1852,7 +1755,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)

 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
    _FEED_NAME = 'subscriptions'
    _PLAYLIST_TITLE = u'Youtube Subscriptions'
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+import re
+
+from .utils import (
+    ExtractorError,
+)
+
+
+class JSInterpreter(object):
+    def __init__(self, code):
+        self.code = code
+        self._functions = {}
+
+    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+        if allow_recursion < 0:
+            raise ExtractorError('Recursion limit reached')
+
+        if stmt.startswith('var '):
+            stmt = stmt[len('var '):]
+        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
+                         r'=(?P<expr>.*)$', stmt)
+        if ass_m:
+            if ass_m.groupdict().get('index'):
+                def assign(val):
+                    lvar = local_vars[ass_m.group('out')]
+                    idx = self.interpret_expression(
+                        ass_m.group('index'), local_vars, allow_recursion)
+                    assert isinstance(idx, int)
+                    lvar[idx] = val
+                    return val
+                expr = ass_m.group('expr')
+            else:
+                def assign(val):
+                    local_vars[ass_m.group('out')] = val
+                    return val
+                expr = ass_m.group('expr')
+        elif stmt.startswith('return '):
+            assign = lambda v: v
+            expr = stmt[len('return '):]
+        else:
+            raise ExtractorError(
+                'Cannot determine left side of statement in %r' % stmt)
+
+        v = self.interpret_expression(expr, local_vars, allow_recursion)
+        return assign(v)
+
+    def interpret_expression(self, expr, local_vars, allow_recursion):
+        if expr.isdigit():
+            return int(expr)
+
+        if expr.isalpha():
+            return local_vars[expr]
+
+        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+        if m:
+            member = m.group('member')
+            val = local_vars[m.group('in')]
+            if member == 'split("")':
+                return list(val)
+            if member == 'join("")':
+                return u''.join(val)
+            if member == 'length':
+                return len(val)
+            if member == 'reverse()':
+                return val[::-1]
+            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
+            if slice_m:
+                idx = self.interpret_expression(
+                    slice_m.group('idx'), local_vars, allow_recursion - 1)
+                return val[idx:]
+
+        m = re.match(
+            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+        if m:
+            val = local_vars[m.group('in')]
+            idx = self.interpret_expression(
+                m.group('idx'), local_vars, allow_recursion - 1)
+            return val[idx]
+
+        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
+        if m:
+            a = self.interpret_expression(
+                m.group('a'), local_vars, allow_recursion)
+            b = self.interpret_expression(
+                m.group('b'), local_vars, allow_recursion)
+            return a % b
+
+        m = re.match(
+            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+        if m:
+            fname = m.group('func')
+            if fname not in self._functions:
+                self._functions[fname] = self.extract_function(fname)
+            argvals = [int(v) if v.isdigit() else local_vars[v]
+                       for v in m.group('args').split(',')]
+            return self._functions[fname](argvals)
+        raise ExtractorError('Unsupported JS expression %r' % expr)
+
+    def extract_function(self, funcname):
+        func_m = re.search(
+            (r'(?:function %s|%s\s*=\s*function)' % (
+                re.escape(funcname), re.escape(funcname))) +
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            self.code)
+        if func_m is None:
+            raise ExtractorError('Could not find JS function %r' % funcname)
+        argnames = func_m.group('args').split(',')
+
+        def resf(args):
+            local_vars = dict(zip(argnames, args))
+            for stmt in func_m.group('code').split(';'):
+                res = self.interpret_statement(stmt, local_vars)
+            return res
+        return resf
+
--- a/youtube_dl/postprocessor/init.py
+++ b/youtube_dl/postprocessor/init.py
@ -1,5 +1,7 @@

+from .atomicparsley import AtomicParsleyPP
 from .ffmpeg import (
+    FFmpegAudioFixPP,
    FFmpegMergerPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
@ -9,6 +11,8 @@ from .ffmpeg import (
 from .xattrpp import XAttrMetadataPP

 __all__ = [
+    'AtomicParsleyPP',
+    'FFmpegAudioFixPP',
    'FFmpegMergerPP',
    'FFmpegMetadataPP',
    'FFmpegVideoConvertor',
--- a/youtube_dl/postprocessor/atomicparsley.py
+++ b/youtube_dl/postprocessor/atomicparsley.py
@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .common import PostProcessor
+
+from ..utils import (
+    check_executable,
+    compat_urlretrieve,
+    encodeFilename,
+    PostProcessingError,
+    prepend_extension,
+    shell_quote
+)
+
+
+class AtomicParsleyPPError(PostProcessingError):
+    pass
+
+
+class AtomicParsleyPP(PostProcessor):
+    def run(self, info):
+        if not check_executable('AtomicParsley', ['-v']):
+            raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+        temp_thumbnail = prepend_extension(filename, 'thumb')
+
+        if not info.get('thumbnail'):
+            raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
+
+        compat_urlretrieve(info['thumbnail'], temp_thumbnail)
+
+        cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
+
+        self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+        if self._downloader.params.get('verbose', False):
+            self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+
+        if p.returncode != 0:
+            msg = stderr.decode('utf-8', 'replace').strip()
+            raise AtomicParsleyPPError(msg)
+
+        os.remove(encodeFilename(filename))
+        os.remove(encodeFilename(temp_thumbnail))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -54,7 +54,7 @@ class FFmpegPostProcessor(PostProcessor):
        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout,stderr = p.communicate()
+        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
@ -464,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
        filename = info['filepath']
        temp_filename = prepend_extension(filename, 'temp')

-        options = ['-c', 'copy']
+        if info['ext'] == u'm4a':
+            options = ['-vn', '-acodec', 'copy']
+        else:
+            options = ['-c', 'copy']
+
        for (name, value) in metadata.items():
            options.extend(['-metadata', '%s=%s' % (name, value)])

@ -483,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
        return True, info

+
+class FFmpegAudioFixPP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-vn', '-acodec', 'copy']
+        self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-

 import calendar
+import codecs
 import contextlib
 import ctypes
 import datetime
@ -539,7 +540,6 @@ def encodeFilename(s, for_subprocess=False):
        encoding = 'utf-8'
    return s.encode(encoding, 'ignore')

-
 def decodeOption(optval):
    if optval is None:
        return optval
@ -910,25 +910,84 @@ def platform_name():
    return res


-def write_string(s, out=None):
+def _windows_write_string(s, out):
+    """ Returns True if the string was written using special methods,
+    False if it has yet to be written out."""
+    # Adapted from http://stackoverflow.com/a/3259271/35070
+
+    import ctypes
+    import ctypes.wintypes
+
+    WIN_OUTPUT_IDS = {
+        1: -11,
+        2: -12,
+    }
+
+    def ucs2_len(s):
+        return sum((2 if ord(c) > 0xffff else 1) for c in s)
+
+    fileno = out.fileno()
+    if fileno not in WIN_OUTPUT_IDS:
+        return False
+
+    GetStdHandle = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+        ("GetStdHandle", ctypes.windll.kernel32))
+    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+    WriteConsoleW = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+    written = ctypes.wintypes.DWORD(0)
+
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    FILE_TYPE_CHAR = 0x0002
+    FILE_TYPE_REMOTE = 0x8000
+    GetConsoleMode = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+        ctypes.POINTER(ctypes.wintypes.DWORD))(
+        ("GetConsoleMode", ctypes.windll.kernel32))
+    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+    def not_a_console(handle):
+        if handle == INVALID_HANDLE_VALUE or handle is None:
+            return True
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+    if not_a_console(h):
+        return False
+
+    remaining = ucs2_len(s)
+    while remaining > 0:
+        ret = WriteConsoleW(
+            h, s, min(remaining, 1024), ctypes.byref(written), None)
+        if ret == 0:
+            raise OSError('Failed to write string')
+        remaining -= written.value
+    return True
+
+
+def write_string(s, out=None, encoding=None):
    if out is None:
        out = sys.stderr
    assert type(s) == compat_str

+    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+        if _windows_write_string(s, out):
+            return
+
    if ('b' in getattr(out, 'mode', '') or
            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
-        s = s.encode(preferredencoding(), 'ignore')
-    try:
+        byt = s.encode(encoding or preferredencoding(), 'ignore')
+        out.write(byt)
+    elif hasattr(out, 'buffer'):
+        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+        byt = s.encode(enc, 'ignore')
+        out.buffer.write(byt)
+    else:
        out.write(s)
-    except UnicodeEncodeError:
-        # In Windows shells, this can fail even when the codec is just charmap!?
-        # See https://wiki.python.org/moin/PrintFails#Issue
-        if sys.platform == 'win32' and hasattr(out, 'encoding'):
-            s = s.encode(out.encoding, 'ignore').decode(out.encoding)
-            out.write(s)
-        else:
-            raise
-
    out.flush()


@ -1177,8 +1236,12 @@ class HEADRequest(compat_urllib_request.Request):
        return "HEAD"


-def int_or_none(v, scale=1):
-    return v if v is None else (int(v) // scale)
+def int_or_none(v, scale=1, default=None):
+    return default if v is None else (int(v) // scale)
+
+
+def float_or_none(v, scale=1, default=None):
+    return default if v is None else (float(v) / scale)


 def parse_duration(s):
@ -1186,7 +1249,7 @@ def parse_duration(s):
        return None

    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
+        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
@ -1260,9 +1323,11 @@ class PagedList(object):


 def uppercase_escape(s):
+    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
-        r'\\U([0-9a-fA-F]{8})',
-        lambda m: compat_chr(int(m.group(1), base=16)), s)
+        r'\\U[0-9a-fA-F]{8}',
+        lambda m: unicode_escape(m.group(0))[0],
+        s)

 try:
    struct.pack(u'!I', 0)
@ -1328,3 +1393,7 @@ US_RATINGS = {
    'R': 16,
    'NC': 18,
 }
+
+
+def strip_jsonp(code):
+    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.03.24.1'
+__version__ = '2014.04.13'