release 2014.04.03.3

[cnet] Add new extractor (Fixes #2679 )
[comedycentral] Add test for #2677
2014-04-03 16:21:54 +02:00 · 2014-04-03 16:21:21 +02:00 · 2014-04-03 15:31:04 +02:00 · 2014-04-03 15:28:56 +02:00 · 2014-04-03 15:28:41 +02:00 · 2014-04-03 15:13:52 +02:00
58 changed files with 1588 additions and 726 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,5 +3,4 @@ include test/*.py
 include test/*.json
 include youtube-dl.bash-completion
 include youtube-dl.1
-recursive-include docs *
+recursive-include docs Makefile conf.py *.rst
 prune docs/_build
--- a/3
+++ b/3
@@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
 		--exclude '__pycache' \
 		--exclude '.git' \
 		--exclude 'testdata' \
 		--exclude 'docs/_build' \
 		-- \
-		bin devscripts test youtube_dl \
+		bin devscripts test youtube_dl docs \
 		CHANGELOG LICENSE README.md README.txt \
 		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
 		youtube-dl
--- a/README.md
+++ b/README.md
@@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     configuration in ~/.config/youtube-dl.conf
                                     (%APPDATA%/youtube-dl/config.txt on
                                     Windows)
    --encoding ENCODING              Force the specified encoding (experimental)
 ## Video Selection:
    --playlist-start NUMBER          playlist video to start at (default is 1)
@@ -169,6 +170,7 @@ which means you can modify it, redistribute it or use it however you like.
 ## Verbosity / Simulation Options:
    -q, --quiet                      activates quiet mode
    --no-warnings                    Ignore warnings
    -s, --simulate                   do not download the video and do not write
                                     anything to disk
    --skip-download                  do not download the video
@@ -180,7 +182,9 @@ which means you can modify it, redistribute it or use it however you like.
    --get-duration                   simulate, quiet but print video length
    --get-filename                   simulate, quiet but print output filename
    --get-format                     simulate, quiet but print output format
-    -j, --dump-json                  simulate, quiet but print JSON information
+    -j, --dump-json                  simulate, quiet but print JSON information.
                                     See --output for a description of available
                                     keys.
    --newline                        output progress bar as new lines
    --no-progress                    do not print progress bar
    --console-title                  display progress in console titlebar
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -143,5 +143,19 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
    def test_ComedyCentralShows(self):
        self.assertMatch(
            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
            ['ComedyCentralShows'])
        self.assertMatch(
            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
            ['ComedyCentralShows'])
        self.assertMatch(
            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
            ['ComedyCentralShows'])
        self.assertMatch(
            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
            ['ComedyCentralShows'])
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -42,6 +42,7 @@ from youtube_dl.extractor import (
    ToypicsUserIE,
    XTubeUserIE,
    InstagramUserIE,
    CSpanIE,
 )
@@ -314,6 +315,19 @@ class TestPlaylists(unittest.TestCase):
        }
        expect_info_dict(self, EXPECTED, test_video)
    def test_CSpan_playlist(self):
        dl = FakeYDL()
        ie = CSpanIE(dl)
        result = ie.extract(
            'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], '342759')
        self.assertEqual(
            result['title'], 'General Motors Ignition Switch Recall')
        self.assertEqual(len(result['entries']), 9)
        whole_duration = sum(e['duration'] for e in result['entries'])
        self.assertEqual(whole_duration, 14855)
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # Various small unit tests
 import io
 import json
 import xml.etree.ElementTree
 #from youtube_dl.utils import htmlentity_transform
@@ -36,6 +37,7 @@ from youtube_dl.utils import (
    urlencode_postdata,
    xpath_with_ns,
    parse_iso8601,
    strip_jsonp,
 )
 if sys.version_info < (3, 0):
@@ -272,5 +274,11 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
        self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
    def test_strip_jsonp(self):
        stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -8,6 +8,7 @@ import datetime
 import errno
 import io
 import json
 import locale
 import os
 import platform
 import re
@@ -94,6 +95,7 @@ class YoutubeDL(object):
    usenetrc:          Use netrc for authentication instead.
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
    no_warnings:       Do not print out anything for warnings.
    forceurl:          Force printing final URL.
    forcetitle:        Force printing title.
    forceid:           Force printing ID.
@@ -158,6 +160,7 @@ class YoutubeDL(object):
    include_ads:       Download ads as well
    default_search:    Prepend this string if an input url is not valid.
                       'auto' for elaborate guessing
    encoding:          Use this encoding instead of the system-specified.
    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@@ -376,6 +379,8 @@ class YoutubeDL(object):
        if self.params.get('logger') is not None:
            self.params['logger'].warning(message)
        else:
            if self.params.get('no_warnings'):
                return
            if self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
@@ -697,6 +702,11 @@ class YoutubeDL(object):
    def process_video_result(self, info_dict, download=True):
        assert info_dict.get('_type', 'video') == 'video'
        if 'id' not in info_dict:
            raise ExtractorError('Missing "id" field in extractor result')
        if 'title' not in info_dict:
            raise ExtractorError('Missing "title" field in extractor result')
        if 'playlist' not in info_dict:
            # It isn't part of a playlist
            info_dict['playlist'] = None
@@ -728,6 +738,9 @@ class YoutubeDL(object):
        # We check that all the formats have the format and format_id fields
        for i, format in enumerate(formats):
            if 'url' not in format:
                raise ExtractorError('Missing "url" key in result (index %d)' % i)
            if format.get('format_id') is None:
                format['format_id'] = compat_str(i)
            if format.get('format') is None:
@@ -738,7 +751,7 @@ class YoutubeDL(object):
                )
            # Automatically determine file extension if missing
            if 'ext' not in format:
-                format['ext'] = determine_ext(format['url'])
+                format['ext'] = determine_ext(format['url']).lower()
        format_limit = self.params.get('format_limit', None)
        if format_limit:
@@ -863,7 +876,7 @@ class YoutubeDL(object):
        try:
            dn = os.path.dirname(encodeFilename(filename))
-            if dn != '' and not os.path.exists(dn):
+            if dn and not os.path.exists(dn):
                os.makedirs(dn)
        except (OSError, IOError) as err:
            self.report_error('unable to create directory ' + compat_str(err))
@@ -1197,6 +1210,9 @@ class YoutubeDL(object):
    def print_debug_header(self):
        if not self.params.get('verbose'):
            return
        write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
                 (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
        write_string('[debug] youtube-dl version ' + __version__ + '\n')
        try:
            sp = subprocess.Popen(
@@ -1261,3 +1277,19 @@ class YoutubeDL(object):
        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
        opener.addheaders = []
        self._opener = opener
    def encode(self, s):
        if isinstance(s, bytes):
            return s  # Already encoded
        try:
            return s.encode(self.get_encoding())
        except UnicodeEncodeError as err:
            err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
            raise
    def get_encoding(self):
        encoding = self.params.get('encoding')
        if encoding is None:
            encoding = preferredencoding()
        return encoding
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -51,6 +51,7 @@ __authors__  = (
    'David Wagner',
    'Juan C. Olivares',
    'Mattias Harrysson',
    'phaer',
 )
 __license__ = 'Public Domain'
@@ -255,13 +256,17 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
+    general.add_option(
-            dest='default_search', metavar='PREFIX',
+        '--default-search',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+        dest='default_search', metavar='PREFIX',
        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
    general.add_option(
        '--encoding', dest='encoding', metavar='ENCODING',
        help='Force the specified encoding (experimental)')
    selection.add_option(
        '--playlist-start',
@@ -364,6 +369,10 @@ def parseOpts(overrideArguments=None):
    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
    verbosity.add_option(
        '--no-warnings',
        dest='no_warnings', action='store_true', default=False,
        help='Ignore warnings')
    verbosity.add_option('-s', '--simulate',
            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
    verbosity.add_option('--skip-download',
@@ -391,7 +400,7 @@ def parseOpts(overrideArguments=None):
            help='simulate, quiet but print output format', default=False)
    verbosity.add_option('-j', '--dump-json',
            action='store_true', dest='dumpjson',
-            help='simulate, quiet but print JSON information', default=False)
+            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
    verbosity.add_option('--newline',
            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
    verbosity.add_option('--no-progress',
@@ -535,8 +544,6 @@ def parseOpts(overrideArguments=None):
            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
            write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
                         (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
    return parser, opts, args
@@ -670,7 +677,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
    # Do not download videos when there are audio-only formats
@@ -708,6 +715,7 @@ def _real_main(argv=None):
        'password': opts.password,
        'videopassword': opts.videopassword,
        'quiet': (opts.quiet or any_printing),
        'no_warnings': opts.no_warnings,
        'forceurl': opts.geturl,
        'forcetitle': opts.gettitle,
        'forceid': opts.getid,
@@ -780,6 +788,7 @@ def _real_main(argv=None):
        'include_ads': opts.include_ads,
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
    }
    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
                        break
            frags_filenames.append(frag_filename)
        dest_stream.close()
        self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
        self.try_rename(tmpfilename, filename)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)
-        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+        args = [
-            '-bsf:a', 'aac_adtstoasc', tmpfilename]
+            '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
            '-bsf:a', 'aac_adtstoasc',
            encodeFilename(tmpfilename, for_subprocess=True)]
        for program in ['avconv', 'ffmpeg']:
            try:
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
        headers = {'Youtubedl-no-compression': 'True'}
        if 'user_agent' in info_dict:
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
        if 'http_referer' in info_dict:
            headers['Referer'] = info_dict['http_referer']
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -14,6 +14,7 @@ from .arte import (
    ArteTVConcertIE,
    ArteTVFutureIE,
    ArteTVDDCIE,
    ArteTVEmbedIE,
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
@@ -39,6 +40,7 @@ from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
 from .cmt import CMTIE
 from .cnet import CNETIE
 from .cnn import (
    CNNIE,
    CNNBlogsIE,
@@ -82,6 +84,7 @@ from .fktv import (
 )
 from .flickr import FlickrIE
 from .fourtube import FourTubeIE
 from .franceculture import FranceCultureIE
 from .franceinter import FranceInterIE
 from .francetv import (
    PluzzIE,
@@ -155,6 +158,7 @@ from .mtv import (
    MTVIE,
    MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@@ -176,6 +180,8 @@ from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ntv import NTVIE
 from .oe1 import OE1IE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .parliamentliveuk import ParliamentLiveUKIE
@@ -207,7 +213,6 @@ from .rutv import RUTVIE
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .smotri import (
    SmotriIE,
@@ -256,13 +261,13 @@ from .udemy import (
    UdemyCourseIE
 )
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vesti import VestiIE
 from .vevo import VevoIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
@@ -281,8 +286,12 @@ from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .vube import VubeIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
-from .wdr import WDRIE
+from .wdr import (
    WDRIE,
    WDRMausIE,
 )
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -6,7 +6,6 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    determine_ext,
 )
@@ -16,9 +15,10 @@ class AppleTrailersIE(InfoExtractor):
        "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
        "playlist": [
            {
                "file": "manofsteel-trailer4.mov",
                "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
                "info_dict": {
                    "id": "manofsteel-trailer4",
                    "ext": "mov",
                    "duration": 111,
                    "title": "Trailer 4",
                    "upload_date": "20130523",
@@ -26,9 +26,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
                "file": "manofsteel-trailer3.mov",
                "md5": "b8017b7131b721fb4e8d6f49e1df908c",
                "info_dict": {
                    "id": "manofsteel-trailer3",
                    "ext": "mov",
                    "duration": 182,
                    "title": "Trailer 3",
                    "upload_date": "20130417",
@@ -36,9 +37,10 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
                "file": "manofsteel-trailer.mov",
                "md5": "d0f1e1150989b9924679b441f3404d48",
                "info_dict": {
                    "id": "manofsteel-trailer",
                    "ext": "mov",
                    "duration": 148,
                    "title": "Trailer",
                    "upload_date": "20121212",
@@ -46,15 +48,16 @@ class AppleTrailersIE(InfoExtractor):
                },
            },
            {
                "file": "manofsteel-teaser.mov",
                "md5": "5fe08795b943eb2e757fa95cb6def1cb",
                "info_dict": {
                    "id": "manofsteel-teaser",
                    "ext": "mov",
                    "duration": 93,
                    "title": "Teaser",
                    "upload_date": "20120721",
                    "uploader_id": "wb",
                },
-            }
+            },
        ]
    }
@@ -65,16 +68,16 @@ class AppleTrailersIE(InfoExtractor):
        movie = mobj.group('movie')
        uploader_id = mobj.group('company')
-        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
+        playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
        def fix_html(s):
-            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
            # The ' in the onClick attributes are not escaped, it couldn't be parsed
            # like: http://trailers.apple.com/trailers/wb/gravity/
            def _clean_json(m):
-                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+                return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
            s = re.sub(self._JSON_RE, _clean_json, s)
-            s = u'<html>' + s + u'</html>'
+            s = '<html>' + s + u'</html>'
            return s
        doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
@@ -82,7 +85,7 @@ class AppleTrailersIE(InfoExtractor):
        for li in doc.findall('./div/ul/li'):
            on_click = li.find('.//a').attrib['onClick']
            trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, u'trailer info')
+                on_click, 'trailer info')
            trailer_info = json.loads(trailer_info_json)
            title = trailer_info['title']
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@@ -98,8 +101,7 @@ class AppleTrailersIE(InfoExtractor):
            first_url = trailer_info['url']
            trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
            settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
-            settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
+            settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
            settings = json.loads(settings_json)
            formats = []
            for format in settings['metadata']['sizes']:
@@ -107,7 +109,6 @@ class AppleTrailersIE(InfoExtractor):
                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
                formats.append({
                    'url': format_url,
                    'ext': determine_ext(format_url),
                    'format': format['type'],
                    'width': format['width'],
                    'height': int(format['height']),
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
@@ -19,114 +18,41 @@ from ..utils import (
 # is different for each one. The videos usually expire in 7 days, so we can't
 # add tests.
 class ArteTvIE(InfoExtractor):
    _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
    _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
    _LIVE_URL = r'index-[0-9]+\.html$'
 class ArteTvIE(InfoExtractor):
    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
    IE_NAME = 'arte.tv'
    @classmethod
    def suitable(cls, url):
        return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
    # TODO implement Live Stream
    # from ..utils import compat_urllib_parse
    # def extractLiveStream(self, url):
    #     video_lang = url.split('/')[-4]
    #     info = self.grep_webpage(
    #         url,
    #         r'src="(.*?/videothek_js.*?\.js)',
    #         0,
    #         [
    #             (1, 'url', 'Invalid URL: %s' % url)
    #         ]
    #     )
    #     http_host = url.split('/')[2]
    #     next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
    #     info = self.grep_webpage(
    #         next_url,
    #         r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
    #             '(http://.*?\.swf).*?' +
    #             '(rtmp://.*?)\'',
    #         re.DOTALL,
    #         [
    #             (1, 'path',   'could not extract video path: %s' % url),
    #             (2, 'player', 'could not extract video player: %s' % url),
    #             (3, 'url',    'could not extract video url: %s' % url)
    #         ]
    #     )
    #     video_url = '%s/%s' % (info.get('url'), info.get('path'))
    def _real_extract(self, url):
-        mobj = re.match(self._VIDEOS_URL, url)
+        mobj = re.match(self._VALID_URL, url)
-        if mobj is not None:
+        lang = mobj.group('lang')
-            id = mobj.group('id')
+        video_id = mobj.group('id')
            lang = mobj.group('lang')
            return self._extract_video(url, id, lang)
        mobj = re.match(self._LIVEWEB_URL, url)
        if mobj is not None:
            name = mobj.group('name')
            lang = mobj.group('lang')
            return self._extract_liveweb(url, name, lang)
        if re.search(self._LIVE_URL, url) is not None:
            raise ExtractorError('Arte live streams are not yet supported, sorry')
            # self.extractLiveStream(url)
            # return
        raise ExtractorError('No video found')
    def _extract_video(self, url, video_id, lang):
        """Extract from videos.arte.tv"""
        ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
        ref_xml_doc = self._download_xml(
            ref_xml_url, video_id, note='Downloading metadata')
        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
-        config_xml = self._download_webpage(
+        config = self._download_xml(
            config_xml_url, video_id, note='Downloading configuration')
-        video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
+        formats = [{
-        def _key(m):
+            'forma_id': q.attrib['quality'],
-            quality = m.group('quality')
+            'url': q.text,
-            if quality == 'hd':
+            'ext': 'flv',
-                return 2
+            'quality': 2 if q.attrib['quality'] == 'hd' else 1,
-            else:
+        } for q in config.findall('./urls/url')]
-                return 1
+        self._sort_formats(formats)
        # We pick the best quality
        video_urls = sorted(video_urls, key=_key)
        video_url = list(video_urls)[-1].group('url')
        title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
        thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
                                            config_xml, 'thumbnail')
        return {'id': video_id,
                'title': title,
                'thumbnail': thumbnail,
                'url': video_url,
                'ext': 'flv',
                }
-    def _extract_liveweb(self, url, name, lang):
+        title = config.find('.//name').text
-        """Extract form http://liveweb.arte.tv/"""
+        thumbnail = config.find('.//firstThumbnailUrl').text
-        webpage = self._download_webpage(url, name)
+        return {
-        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
+            'id': video_id,
-        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+            'title': title,
-                                            video_id, 'Downloading information')
+            'thumbnail': thumbnail,
-        event_doc = config_doc.find('event')
+            'formats': formats,
-        url_node = event_doc.find('video').find('urlHd')
+        }
        if url_node is None:
            url_node = event_doc.find('urlSd')
        return {'id': video_id,
                'title': event_doc.find('name%s' % lang.capitalize()).text,
                'url': url_node.text.replace('MP4', 'mp4'),
                'ext': 'flv',
                'thumbnail': self._og_search_thumbnail(webpage),
                }
 class ArteTVPlus7IE(InfoExtractor):
@@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
        return self._extract_from_json_url(json_url, video_id, lang)
    def _extract_from_json_url(self, json_url, video_id, lang):
-        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
+        info = self._download_json(json_url, video_id)
        self.report_extraction(video_id)
        info = json.loads(json_info)
        player_info = info['videoJsonPlayer']
        info_dict = {
@@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
                l = 'F'
            elif lang == 'de':
                l = 'A'
            else:
                l = lang
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
@@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
            'description': 'md5:486eb08f991552ade77439fe6d82c305',
        },
    }
 class ArteTVEmbedIE(ArteTVPlus7IE):
    IE_NAME = 'arte.tv:embed'
    _VALID_URL = r'''(?x)
        http://www\.arte\.tv
        /playerv2/embed\.php\?json_url=
        (?P<json_url>
            http://arte\.tv/papi/tvguide/videos/stream/player/
            (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
        )
    '''
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        lang = mobj.group('lang')
        json_url = mobj.group('json_url')
        return self._extract_from_json_url(json_url, video_id, lang)
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@@ -11,22 +11,24 @@ from ..utils import (
 class AUEngineIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
    _TEST = {
        'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
        'file': 'lfvlytY6.mp4',
        'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
        'info_dict': {
            'id': 'lfvlytY6',
            'ext': 'mp4',
            'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
        }
    }
    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
+        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
                webpage, 'title')
        title = title.strip()
        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
        links = map(compat_urllib_parse.unquote, links)
@@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
            elif '/videos/' in link:
                video_url = link
        if not video_url:
-            raise ExtractorError(u'Could not find video URL')
+            raise ExtractorError('Could not find video URL')
        ext = '.' + determine_ext(video_url)
        if ext == title[-len(ext):]:
            title = title[:-len(ext)]
        return {
-            'id':        video_id,
+            'id': video_id,
-            'url':       video_url,
+            'url': video_url,
-            'title':     title,
+            'title': title,
            'thumbnail': thumbnail,
            'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
        }
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -1,22 +1,21 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 class BloombergIE(InfoExtractor):
    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
    _TEST = {
-        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
-        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+        'md5': '7bf08858ff7c203c870e8a6190e221e5',
-        u'info_dict': {
+        'info_dict': {
-            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+            'id': 'qurhIVlJSB6hzkVi229d8g',
-            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+            'ext': 'flv',
-        },
+            'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
-        u'params': {
+            'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
            # Requires ffmpeg (m3u8 manifest)
            u'skip_download': True,
        },
    }
@@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        embed_code = self._search_regex(
+        f4m_url = self._search_regex(
-            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
+            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
-            'embed code')
+            'f4m url')
-        return OoyalaIE._build_url_result(embed_code)
+        title = re.sub(': Video$', '', self._og_search_title(webpage))
        return {
            'id': name.split('-')[-1],
            'title': title,
            'url': f4m_url,
            'ext': 'flv',
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
        object_str = object_str.replace('<--', '<!--')
        object_str = fix_xml_ampersands(object_str)
-        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
        if fv_el is not None:
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -2,39 +2,46 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 class C56IE(InfoExtractor):
-    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
    IE_NAME = '56.com'
    _TEST = {
        'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
        'file': '93440716.flv',
        'md5': 'e59995ac63d0457783ea05f93f12a866',
        'info_dict': {
            'id': '93440716',
            'ext': 'flv',
            'title': '网事知多少 第32期：车怒',
            'duration': 283.813,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        text_id = mobj.group('textid')
-        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
+
-                                           text_id, 'Downloading video info')
+        page = self._download_json(
-        info = json.loads(info_page)['info']
+            'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
-        formats = [{
+
-            'format_id': f['type'],
+        info = page['info']
-            'filesize': int(f['filesize']),
+
-            'url': f['url']
+        formats = [
-        } for f in info['rfiles']]
+            {
                'format_id': f['type'],
                'filesize': int(f['filesize']),
                'url': f['url']
            } for f in info['rfiles']
        ]
        self._sort_formats(formats)
        return {
            'id': info['vid'],
            'title': info['Subject'],
            'duration': int(info['duration']) / 1000.0,
            'formats': formats,
            'thumbnail': info.get('bimg') or info.get('img'),
        }
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
        video_id = mobj.groupdict().get('id')
        if video_id is None:
            webpage = self._download_webpage(url, mobj.group('path'))
-            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
+            video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
        doc = self._download_xml(info_url,video_id, 
                                           u'Downloading video info')
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -1,22 +1,28 @@
 from __future__ import unicode_literals
 import re
 import time
 import xml.etree.ElementTree
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
    ExtractorError,
    parse_duration,
 )
 class ClipfishIE(InfoExtractor):
-    IE_NAME = u'clipfish'
+    IE_NAME = 'clipfish'
    _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
    _TEST = {
-        u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
+        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        u'file': u'3966754.mp4',
+        'md5': '2521cd644e862936cf2e698206e47385',
-        u'md5': u'2521cd644e862936cf2e698206e47385',
+        'info_dict': {
-        u'info_dict': {
+            'id': '3966754',
-            u'title': u'FIFA 14 - E3 2013 Trailer',
+            'ext': 'mp4',
-            u'duration': 82,
+            'title': 'FIFA 14 - E3 2013 Trailer',
            'duration': 82,
        },
        u'skip': 'Blocked in the US'
    }
@@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
        video_url = doc.find('filename').text
        if video_url is None:
            xml_bytes = xml.etree.ElementTree.tostring(doc)
-            raise ExtractorError(u'Cannot find video URL in document %r' %
+            raise ExtractorError('Cannot find video URL in document %r' %
                                 xml_bytes)
        thumbnail = doc.find('imageurl').text
-        duration_str = doc.find('duration').text
+        duration = parse_duration(doc.find('duration').text)
        m = re.match(
            r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
            duration_str)
        if m:
            duration = (
                (int(m.group('hours')) * 60 * 60) +
                (int(m.group('minutes')) * 60) +
                (int(m.group('seconds')))
            )
        else:
            duration = None
        return {
            'id': video_id,
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
+        'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
-        u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
+        'md5': '4d7d549451bad625e0ff3d7bd56d776c',
-        u'info_dict': {
+        'info_dict': {
-            u'id': u'4629301',
+            'id': '4629301',
-            u'ext': u'mp4',
+            'ext': 'mp4',
-            u'title': u'Brick Briscoe',
+            'title': 'Brick Briscoe',
-            u'duration': 612,
+            'duration': 612,
            'thumbnail': 're:^https?://.+\.jpg',
        },
    }
@@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
        video_id = mobj.group('id')
        js_player = self._download_webpage(
            'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
-            video_id, u'Downlaoding player')
+            video_id, 'Downlaoding player')
        # it includes a required token
-        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
+        flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
        pdoc = self._download_xml(
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
-            video_id, u'Downloading video info',
+            video_id, 'Downloading video info',
            transform_source=fix_xml_ampersands)
        track_doc = pdoc.find('trackList/track')
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -0,0 +1,70 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
 )
 class CNETIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
    _TEST = {
        'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
        'md5': '041233212a0d06b179c87cbcca1577b8',
        'info_dict': {
            'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
            'ext': 'mp4',
            'title': 'Hands-on with Microsoft Windows 8.1 Update',
            'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
            'thumbnail': 're:^http://.*/flmswindows8.jpg$',
            'uploader_id': 'sarah.mitroff@cbsinteractive.com',
            'uploader': 'Sarah Mitroff',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        webpage = self._download_webpage(url, display_id)
        data_json = self._html_search_regex(
            r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
            webpage, 'data json')
        data = json.loads(data_json)
        vdata = data['video']
        video_id = vdata['id']
        title = vdata['headline']
        description = vdata.get('dek')
        thumbnail = vdata.get('image', {}).get('path')
        author = vdata.get('author')
        if author:
            uploader = '%s %s' % (author['firstName'], author['lastName'])
            uploader_id = author.get('email')
        else:
            uploader = None
            uploader_id = None
        formats = [{
            'format_id': '%s-%s-%s' % (
                f['type'], f['format'],
                int_or_none(f.get('bitrate'), 1000, default='')),
            'url': f['uri'],
            'tbr': int_or_none(f.get('bitrate'), 1000),
        } for f in vdata['files']['data']]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'formats': formats,
            'description': description,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
 from ..utils import (
    compat_str,
    compat_urllib_parse,
    ExtractorError,
    float_or_none,
    unified_strdate,
 )
@@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
 class ComedyCentralShowsIE(InfoExtractor):
-    IE_DESC = 'The Daily Show / Colbert Report'
+    IE_DESC = 'The Daily Show / The Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
    #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
-    _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
+    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
-                      |(https?://)?(www\.)?
+                      |https?://(:www\.)?
-                          (?P<showname>thedailyshow|colbertnation)\.com/
+                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                          )|
                          (?P<interview>
-                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
+                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     $"""
+                     (?:[?#].*|$)'''
    _TEST = {
-        'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
+        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'file': '422212.mp4',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
        'info_dict': {
-            "upload_date": "20121214",
+            'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
-            "description": "Kristen Stewart",
+            'ext': 'mp4',
-            "uploader": "thedailyshow",
+            'upload_date': '20121213',
-            "title": "thedailyshow-kristen-stewart part 1"
+            'description': 'Kristen Stewart learns to let loose in "On the Road."',
            'uploader': 'thedailyshow',
            'title': 'thedailyshow kristen-stewart part 1',
        }
    }
@@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor):
        '400': (384, 216),
    }
    @classmethod
    def suitable(cls, url):
        """Receives a URL and returns True if suitable for this IE."""
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
    @staticmethod
    def _transform_rtmp_url(rtmp_video_url):
        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
@@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor):
        if mobj.group('shortname'):
            if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                url = 'http://www.thedailyshow.com/full-episodes/'
+                url = 'http://thedailyshow.cc.com/full-episodes/'
            else:
-                url = 'http://www.colbertnation.com/full-episodes/'
+                url = 'http://thecolbertreport.cc.com/full-episodes/'
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            assert mobj is not None
        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
                epTitle = mobj.group('videotitle')
            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor):
                epTitle = mobj.group('showname')
            else:
                epTitle = mobj.group('episode')
        show_name = mobj.group('showname')
-        self.report_extraction(epTitle)
+        webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
        webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
        if dlNewest:
            url = htmlHandle.geturl()
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor):
                raise ExtractorError('Invalid redirected URL: ' + url)
            if mobj.group('episode') == '':
                raise ExtractorError('Redirected URL is still not specific: ' + url)
-            epTitle = mobj.group('episode')
+            epTitle = mobj.group('episode').rpartition('/')[-1]
        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
        if len(mMovieParams) == 0:
            # The Colbert Report embeds the information in a without
            # a URL prefix; so extract the alternate reference
            # and then add the URL prefix manually.
-            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
+            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
                raise ExtractorError('unable to find Flash URL in webpage ' + url)
            else:
                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
        uri = mMovieParams[0][1]
-        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
+        # Correct cc.com in uri
-        idoc = self._download_xml(indexUrl, epTitle,
+        uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
                                          'Downloading show index',
                                          'unable to download episode index')
-        results = []
+        index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
        idoc = self._download_xml(
            index_url, epTitle,
            'Downloading show index', 'Unable to download episode index')
-        itemEls = idoc.findall('.//item')
+        title = idoc.find('./channel/title').text
-        for partNum,itemEl in enumerate(itemEls):
+        description = idoc.find('./channel/description').text
            mediaId = itemEl.findall('./guid')[0].text
            shortMediaId = mediaId.split(':')[-1]
            showId = mediaId.split(':')[-2].replace('.com', '')
            officialTitle = itemEl.findall('./title')[0].text
            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
-            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
+        entries = []
-                        compat_urllib_parse.urlencode({'uri': mediaId}))
+        item_els = idoc.findall('.//item')
-            cdoc = self._download_xml(configUrl, epTitle,
+        for part_num, itemEl in enumerate(item_els):
-                                               'Downloading configuration for %s' % shortMediaId)
+            upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
            thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
            content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
            duration = float_or_none(content.attrib.get('duration'))
            mediagen_url = content.attrib['url']
            guid = itemEl.find('./guid').text.rpartition(':')[-1]
            cdoc = self._download_xml(
                mediagen_url, epTitle,
                'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
            turls = []
            for rendition in cdoc.findall('.//rendition'):
                finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
                turls.append(finfo)
            if len(turls) == 0:
                self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
                continue
            formats = []
            for format, rtmp_video_url in turls:
                w, h = self._video_dimensions.get(format, (None, None))
                formats.append({
                    'format_id': 'vhttp-%s' % format,
                    'url': self._transform_rtmp_url(rtmp_video_url),
                    'ext': self._video_extensions.get(format, 'mp4'),
                    'format_id': format,
                    'height': h,
                    'width': w,
                })
                formats.append({
                    'format_id': 'rtmp-%s' % format,
                    'url': rtmp_video_url,
                    'ext': self._video_extensions.get(format, 'mp4'),
                    'height': h,
                    'width': w,
                })
                self._sort_formats(formats)
-            effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
+            virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
-            results.append({
+            entries.append({
-                'id': shortMediaId,
+                'id': guid,
                'title': virtual_id,
                'formats': formats,
-                'uploader': showId,
+                'uploader': show_name,
-                'upload_date': officialDate,
+                'upload_date': upload_date,
-                'title': effTitle,
+                'duration': duration,
-                'thumbnail': None,
+                'thumbnail': thumbnail,
-                'description': compat_str(officialTitle),
+                'description': description,
            })
-        return results
+        return {
            '_type': 'playlist',
            'entries': entries,
            'title': show_name + ' ' + title,
            'description': description,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -252,6 +252,17 @@ class InfoExtractor(object):
                outf.write(webpage_bytes)
        content = webpage_bytes.decode(encoding, 'replace')
        if (u'<title>Access to this site is blocked</title>' in content and
                u'Websense' in content[:512]):
            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
            blocked_iframe = self._html_search_regex(
                r'<iframe src="([^"]+)"', content,
                u'Websense information URL', default=None)
            if blocked_iframe:
                msg += u' Visit %s for more details' % blocked_iframe
            raise ExtractorError(msg, expected=True)
        return (content, urlh)
    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -4,6 +4,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unescapeHTML,
    find_xpath_attr,
 )
@@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
        info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
        data = self._download_json(info_url, video_id)
-        url = unescapeHTML(data['video']['files'][0]['path']['#text'])
+        doc = self._download_xml(
-
+            'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
        doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
            video_id)
-        def find_string(s):
+        title = find_xpath_attr(doc, './/string', 'name', 'title').text
-            return find_xpath_attr(doc, './/string', 'name', s).text
+        thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
        files = data['video']['files']
        entries = [{
            'id': '%s_%d' % (video_id, partnum + 1),
            'title': (
                title if len(files) == 1 else
                '%s part %d' % (title, partnum + 1)),
            'url': unescapeHTML(f['path']['#text']),
            'description': description,
            'thumbnail': thumbnail,
            'duration': int_or_none(f.get('length', {}).get('#text')),
        } for partnum, f in enumerate(files)]
        return {
            '_type': 'playlist',
            'entries': entries,
            'title': title,
            'id': video_id,
            'title': find_string('title'),
            'url': url,
            'description': description,
            'thumbnail': find_string('poster'),
        }
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
    _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
    _TEST = {
        'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
        'file': '614784.mp4',
        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
        'info_dict': {
            'id': '614784',
            'ext': 'mp4',
            'title': 'MythBusters: Mission Impossible Outtakes',
            'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                ' each other -- to the point of confusing Jamie\'s dog -- and '
@@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
        formats = []
        for f in info['mp4']:
            formats.append(
-                {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
+                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
        return {
            'id': info['contentId'],
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -1,23 +1,25 @@
 from __future__ import unicode_literals
 import re
 from ..utils import (
    compat_urllib_parse,
    determine_ext
 )
 from .common import InfoExtractor
 class EHowIE(InfoExtractor):
-    IE_NAME = u'eHow'
+    IE_NAME = 'eHow'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
    _TEST = {
-        u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
+        'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
-        u'file': u'12245069.flv',
+        'md5': '9809b4e3f115ae2088440bcb4efbf371',
-        u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
+        'info_dict': {
-        u'info_dict': {
+            'id': '12245069',
-            u"title": u"Hardwood Flooring Basics",
+            'ext': 'flv',
-            u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
+            'title': 'Hardwood Flooring Basics',
-   			u"uploader": u"Erick Nathan"
+            'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
            'uploader': 'Erick Nathan',
        }
    }
@@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-            webpage, u'video URL')
+            webpage, 'video URL')
-        final_url = compat_urllib_parse.unquote(video_url)        
+        final_url = compat_urllib_parse.unquote(video_url)
-        uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
+        uploader = self._html_search_meta('uploader', webpage)
            webpage, u'uploader')
        title = self._og_search_title(webpage).replace(' | eHow', '')
        ext = determine_ext(final_url)
        return {
-            '_type':       'video',
+            'id': video_id,
-            'id':          video_id,
+            'url': final_url,
-            'url':         final_url,
+            'title': title,
-            'ext':         ext,
+            'thumbnail': self._og_search_thumbnail(webpage),
            'title':       title,
            'thumbnail':   self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
-            'uploader':    uploader,
+            'uploader': uploader,
        }
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@@ -0,0 +1,77 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_parse_qs,
    compat_urlparse,
 )
 class FranceCultureIE(InfoExtractor):
    _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
        'info_dict': {
            'id': '4795174',
            'ext': 'mp3',
            'title': 'Rendez-vous au pays des geeks',
            'vcodec': 'none',
            'uploader': 'Colette Fellous',
            'upload_date': '20140301',
            'duration': 3601,
            'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
            'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        baseurl = mobj.group('baseurl')
        webpage = self._download_webpage(url, video_id)
        params_code = self._search_regex(
            r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
            webpage, 'parameter code')
        params = compat_parse_qs(params_code)
        video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
        title = self._html_search_regex(
            r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
        uploader = self._html_search_regex(
            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
            webpage, 'uploader', fatal=False)
        thumbnail_part = self._html_search_regex(
            r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
            'thumbnail', fatal=False)
        if thumbnail_part is None:
            thumbnail = None
        else:
            thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
        description = self._html_search_regex(
            r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
        info = json.loads(params['infoData'][0])[0]
        duration = info.get('media_length')
        upload_date_candidate = info.get('media_section5')
        upload_date = (
            upload_date_candidate
            if (upload_date_candidate is not None and
                re.match(r'[0-9]{8}$', upload_date_candidate))
            else None)
        return {
            'id': video_id,
            'url': video_url,
            'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
            'duration': duration,
            'uploader': uploader,
            'upload_date': upload_date,
            'title': title,
            'thumbnail': thumbnail,
            'description': description,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -25,6 +25,7 @@ from ..utils import (
 from .brightcove import BrightcoveIE
 from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .smotri import SmotriIE
 class GenericIE(InfoExtractor):
@@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': ['Brightcove'],
        },
        {
            'url': 'http://www.championat.com/video/football/v/87/87499.html',
            'md5': 'fb973ecf6e4a78a67453647444222983',
            'info_dict': {
                'id': '3414141473001',
                'ext': 'mp4',
                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
                'uploader': 'Championat',
            },
        },
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@@ -197,6 +209,36 @@ class GenericIE(InfoExtractor):
                'description': 'No description',
            },
        },
        # arte embed
        {
            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
            'md5': '7653032cbb25bf6c80d80f217055fa43',
            'info_dict': {
                'id': '048195-004_PLUS7-F',
                'ext': 'flv',
                'title': 'X:enius',
                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
                'upload_date': '20140320',
            },
            'params': {
                'skip_download': 'Requires rtmpdump'
            }
        },
        # smotri embed
        {
            'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
            'md5': 'ec40048448e9284c9a1de77bb188108b',
            'info_dict': {
                'id': 'v27008541fad',
                'ext': 'mp4',
                'title': 'Крым и Севастополь вошли в состав России',
                'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
                'duration': 900,
                'upload_date': '20140318',
                'uploader': 'rbctv_2012_4',
                'uploader_id': 'rbctv_2012_4',
            },
        },
    ]
    def report_download_webpage(self, video_id):
@@ -285,13 +327,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'
-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
                    if default_search == 'auto_warning':
                        self._downloader.report_warning(
                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
@@ -525,6 +570,18 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')
        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
        # Look for embedded smotri.com player
        smotri_url = SmotriIE._extract_url(webpage)
        if smotri_url:
            return self.url_result(smotri_url, 'Smotri')
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):
    _TEST = {
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
        'file': '52dd3e4b02a7602131000677.mp4',
        'md5': '55f5e8981c1c80a64706a44b74833de8',
        'info_dict': {
            'id': '52dd3e4b02a7602131000677',
            'ext': 'mp4',
            'title': 'Legalese It! with @MikeSacksHP',
            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
            'duration': 1549,
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -1,10 +1,8 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class IGNIE(InfoExtractor):
@@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
    """
    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
-    IE_NAME = u'ign.com'
+    IE_NAME = 'ign.com'
    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
-    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
+    _DESCRIPTION_RE = [
-                       r'id="my_show_video">.*?<p>(.*?)</p>',
+        r'<span class="page-object-description">(.+?)</span>',
-                       ]
+        r'id="my_show_video">.*?<p>(.*?)</p>',
    ]
    _TESTS = [
        {
-            u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
+            'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
+            'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
-            u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
+            'info_dict': {
-            u'info_dict': {
+                'id': '8f862beef863986b2785559b9e1aa599',
-                u'title': u'The Last of Us Review',
+                'ext': 'mp4',
-                u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
+                'title': 'The Last of Us Review',
                'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            }
        },
        {
-            u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
-            u'playlist': [
+            'playlist': [
                {
-                    u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
+                    'info_dict': {
-                    u'info_dict': {
+                        'id': '5ebbd138523268b93c9141af17bec937',
-                        u'title': u'GTA 5 Video Review',
+                        'ext': 'mp4',
-                        u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
+                        'title': 'GTA 5 Video Review',
                        'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    },
                },
                {
-                    u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
+                    'info_dict': {
-                    u'info_dict': {
+                        'id': '638672ee848ae4ff108df2a296418ee2',
-                        u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
+                        'ext': 'mp4',
-                        u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
+                        'title': '26 Twisted Moments from GTA 5 in Slow Motion',
                        'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    },
                },
            ],
-            u'params': {
+            'params': {
-                u'skip_download': True,
+                'skip_download': True,
            },
        },
    ]
    def _find_video_id(self, webpage):
-        res_id = [r'data-video-id="(.+?)"',
+        res_id = [
-                  r'<object id="vid_(.+?)"',
+            r'data-video-id="(.+?)"',
-                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+            r'<object id="vid_(.+?)"',
-                  ]
+            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
        ]
        return self._search_regex(res_id, webpage, 'video id')
    def _real_extract(self, url):
@@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
@@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
-                                              webpage, 'video description',
+            webpage, 'video description', flags=re.DOTALL)
                                              flags=re.DOTALL)
        result['description'] = description
        return result
    def _get_video_info(self, video_id):
        config_url = self._CONFIG_URL_TEMPLATE % video_id
-        config = json.loads(self._download_webpage(config_url, video_id,
+        config = self._download_json(config_url, video_id)
                            u'Downloading video info'))
        media = config['playlist']['media']
        video_url = media['url']
-        return {'id': media['metadata']['videoId'],
+        return {
-                'url': video_url,
+            'id': media['metadata']['videoId'],
-                'ext': determine_ext(video_url),
+            'url': media['url'],
-                'title': media['metadata']['title'],
+            'title': media['metadata']['title'],
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
+            'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
-                }
+        }
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'
    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
    _TEST = {
-        u'url': u'http://gamevideos.1up.com/video/id/34976',
+        'url': 'http://gamevideos.1up.com/video/id/34976',
-        u'file': u'34976.mp4',
+        'md5': '68a54ce4ebc772e4b71e3123d413163d',
-        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
+        'info_dict': {
-        u'info_dict': {
+            'id': '34976',
-            u'title': u'Sniper Elite V2 - Trailer',
+            'ext': 'mp4',
-            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+            'title': 'Sniper Elite V2 - Trailer',
            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
    }
@@ -123,7 +122,6 @@ class OneUPIE(IGNIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        id = mobj.group('name_or_id')
        result = super(OneUPIE, self)._real_extract(url)
-        result['id'] = id
+        result['id'] = mobj.group('name_or_id')
        return result
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@@ -1,37 +1,39 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class KickStarterIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
+    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
    _TEST = {
-        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
+        'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
-        u"file": u"1404461844.mp4",
+        'md5': 'c81addca81327ffa66c642b5d8b08cab',
-        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
+        'info_dict': {
-        u"info_dict": {
+            'id': '1404461844',
-            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
+            'ext': 'mp4',
            'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
            'description': 'A unique motocross documentary that examines the '
                'life and mind of one of sports most elite athletes: Josh Grant.',
        },
    }
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
-        webpage_src = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(r'data-video="(.*?)">',
+        video_url = self._search_regex(r'data-video-url="(.*?)"',
-            webpage_src, u'video URL')
+            webpage, 'video URL')
-        if 'mp4' in video_url:
+        video_title = self._html_search_regex(r'<title>(.*?)</title>',
-            ext = 'mp4'
+            webpage, 'title').rpartition('— Kickstarter')[0].strip()
        else:
            ext = 'flv'
        video_title = self._html_search_regex(r"<title>(.*?)</title>",
            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
-        results = [{
+        return {
-                    'id': video_id,
+            'id': video_id,
-                    'url': video_url,
+            'url': video_url,
-                    'title': video_title,
+            'title': video_title,
-                    'ext': ext,
+            'description': self._og_search_description(webpage),
-                    }]
+            'thumbnail': self._og_search_thumbnail(webpage),
-        return results
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):
    _TEST = {
        'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
        'file': '3698222.mp4',
        'info_dict': {
            'id': '3698222',
            'ext': 'mp4',
            'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            'duration': 221,
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@@ -14,7 +14,7 @@ from ..utils import (
 class MooshareIE(InfoExtractor):
    IE_NAME = 'mooshare'
    IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
+    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
    _TESTS = [
        {
--- a/youtube_dl/extractor/musicplayon.py
+++ b/youtube_dl/extractor/musicplayon.py
@@ -0,0 +1,75 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class MusicPlayOnIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
    _TEST = {
        'url': 'http://en.musicplayon.com/play?v=433377',
        'info_dict': {
            'id': '433377',
            'ext': 'mp4',
            'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
            'description': 'Rick Ross Interview On Chelsea Lately',
            'duration': 342,
            'uploader': 'ultrafish',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id)
        title = self._og_search_title(page)
        description = self._og_search_description(page)
        thumbnail = self._og_search_thumbnail(page)
        duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
        view_count = self._og_search_property('count', page, fatal=False)
        uploader = self._html_search_regex(
            r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
        formats = [
            {
                'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
                'ext': 'mp4',
            }
        ]
        manifest = self._download_webpage(
            'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
        for entry in manifest.split('#')[1:]:
            if entry.startswith('EXT-X-STREAM-INF:'):
                meta, url, _ = entry.split('\n')
                params = dict(param.split('=') for param in meta.split(',')[1:])
                formats.append({
                    'url': url,
                    'ext': 'mp4',
                    'tbr': int(params['BANDWIDTH']),
                    'width': int(params['RESOLUTION'].split('x')[1]),
                    'height': int(params['RESOLUTION'].split('x')[-1]),
                    'format_note': params['NAME'].replace('"', '').strip(),
                })
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'duration': int_or_none(duration),
            'view_count': int_or_none(view_count),
            'formats': formats,
        }
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -6,12 +6,13 @@ from .common import InfoExtractor
 class NBAIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
+    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
    _TEST = {
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
        'file': u'0021200253-okc-bkn-recap.nba.mp4',
        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
        'info_dict': {
            'id': '0021200253-okc-bkn-recap.nba',
            'ext': 'mp4',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'title': 'Thunder vs. Nets',
        },
@@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
@@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
        return {
            'id': shortened_video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -1,12 +1,10 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 import socket
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
@@ -18,57 +16,54 @@ from ..utils import (
 class NiconicoIE(InfoExtractor):
-    IE_NAME = u'niconico'
+    IE_NAME = 'niconico'
-    IE_DESC = u'ニコニコ動画'
+    IE_DESC = 'ニコニコ動画'
    _TEST = {
-        u'url': u'http://www.nicovideo.jp/watch/sm22312215',
+        'url': 'http://www.nicovideo.jp/watch/sm22312215',
-        u'file': u'sm22312215.mp4',
+        'md5': 'd1a75c0823e2f629128c43e1212760f9',
-        u'md5': u'd1a75c0823e2f629128c43e1212760f9',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'sm22312215',
-            u'title': u'Big Buck Bunny',
+            'ext': 'mp4',
-            u'uploader': u'takuya0301',
+            'title': 'Big Buck Bunny',
-            u'uploader_id': u'2698420',
+            'uploader': 'takuya0301',
-            u'upload_date': u'20131123',
+            'uploader_id': '2698420',
-            u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
+            'upload_date': '20131123',
            'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
        },
-        u'params': {
+        'params': {
-            u'username': u'ydl.niconico@gmail.com',
+            'username': 'ydl.niconico@gmail.com',
-            u'password': u'youtube-dl',
+            'password': 'youtube-dl',
        },
    }
    _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
    _NETRC_MACHINE = 'niconico'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = True
    def _real_initialize(self):
        self._login()
    def _login(self):
        (username, password) = self._get_login_info()
        # No authentication to be performed
        if username is None:
-            if self._LOGIN_REQUIRED:
+            # Login is required
-                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
            return False
        # Log in
        login_form_strs = {
-            u'mail': username,
+            'mail': username,
-            u'password': password,
+            'password': password,
        }
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
-        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
        request = compat_urllib_request.Request(
-            u'https://secure.nicovideo.jp/secure/login', login_data)
+            'https://secure.nicovideo.jp/secure/login', login_data)
        login_results = self._download_webpage(
-            request, u'', note=u'Logging in', errnote=u'Unable to log in')
+            request, None, note='Logging in', errnote='Unable to log in')
        if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
-            self._downloader.report_warning(u'unable to log in: bad username or password')
+            self._downloader.report_warning('unable to log in: bad username or password')
            return False
        return True
@@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):
        video_info = self._download_xml(
            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
-            note=u'Downloading video info page')
+            note='Downloading video info page')
        # Get flv info
        flv_info_webpage = self._download_webpage(
-            u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+            'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
-            video_id, u'Downloading flv info')
+            video_id, 'Downloading flv info')
        video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
        # Start extracting information
@@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
        url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
        try:
            user_info = self._download_xml(
-                url, video_id, note=u'Downloading user information')
+                url, video_id, note='Downloading user information')
            video_uploader = user_info.find('.//nickname').text
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+        except ExtractorError as err:
-            self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
+            self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
        return {
-            'id':          video_id,
+            'id': video_id,
-            'url':         video_real_url,
+            'url': video_real_url,
-            'title':       video_title,
+            'title': video_title,
-            'ext':         video_extension,
+            'ext': video_extension,
-            'format':      video_format,
+            'format': video_format,
-            'thumbnail':   video_thumbnail,
+            'thumbnail': video_thumbnail,
            'description': video_description,
-            'uploader':    video_uploader,
+            'uploader': video_uploader,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
-            'view_count':  video_view_count,
+            'view_count': video_view_count,
            'webpage_url': video_webpage_url,
        }
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@@ -0,0 +1,157 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unescapeHTML
 )
 class NTVIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
            'info_dict': {
                'id': '746000',
                'ext': 'flv',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'duration': 136,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
            'info_dict': {
                'id': '750370',
                'ext': 'flv',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'duration': 172,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
            'info_dict': {
                'id': '747480',
                'ext': 'flv',
                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
                'duration': 1496,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'info_dict': {
                'id': '750783',
                'ext': 'flv',
                'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
                'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
                'duration': 28,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
            'info_dict': {
                'id': '751482',
                'ext': 'flv',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
                'duration': 2590,
            },
            'params': {
                    # rtmp download
                    'skip_download': True,
                },
        },
    ]
    _VIDEO_ID_REGEXES = [
        r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
        r'<video embed=[^>]+><id>(\d+)</id>',
        r'<video restriction[^>]+><key>(\d+)</key>'
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id, 'Downloading page')
        for pattern in self._VIDEO_ID_REGEXES:
            mobj = re.search(pattern, page)
            if mobj:
                break
        if not mobj:
            raise ExtractorError('No media links available for %s' % video_id)
        video_id = mobj.group(1)
        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
        title = unescapeHTML(player.find('./data/title').text)
        description = unescapeHTML(player.find('./data/description').text)
        video = player.find('./data/video')
        video_id = video.find('./id').text
        thumbnail = video.find('./splash').text
        duration = int(video.find('./totaltime').text)
        view_count = int(video.find('./views').text)
        puid22 = video.find('./puid22').text
        apps = {
            '4': 'video1',
            '7': 'video2',
        }
        app = apps[puid22] if puid22 in apps else apps['4']
        formats = []
        for format_id in ['', 'hi', 'webm']:
            file = video.find('./%sfile' % format_id)
            if file is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
                'url': 'rtmp://media.ntv.ru/%s' % app,
                'app': app,
                'play_path': file.text,
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
                'flash_ver': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@@ -0,0 +1,40 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import datetime
 import re
 from .common import InfoExtractor
 # audios on oe1.orf.at are only available for 7 days, so we can't
 # add tests.
 class OE1IE(InfoExtractor):
    IE_DESC = 'oe1.orf.at'
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_id = mobj.group('id')
        data = self._download_json(
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
            show_id
        )
        timestamp = datetime.datetime.strptime('%s %s' % (
            data['item']['day_label'],
            data['item']['time']
        ), '%d.%m.%Y %H:%M')
        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
        return {
            'id': show_id,
            'title': data['item']['title'],
            'url': data['item']['url_stream'],
            'ext': 'mp3',
            'description': data['item'].get('info'),
            'timestamp': unix_timestamp
        }
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 import os
@@ -5,45 +7,50 @@ from .common import InfoExtractor
 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
+
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+    _TESTS = [
-        u'file': u'24_4WWkSmNo.mp4',
+        {
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'info_dict': {
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
-            u"title": u"Become a logging expert in 30 minutes",
+            'info_dict': {
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
+                'id': '24_4WWkSmNo',
-            u"upload_date": u"20130320",
+                'ext': 'mp4',
-            u"uploader": u"NextDayVideo",
+                'title': 'Become a logging expert in 30 minutes',
-            u"uploader_id": u"NextDayVideo",
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
                'upload_date': '20130320',
                'uploader': 'NextDayVideo',
                'uploader_id': 'NextDayVideo',
            },
            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
+        {
-    },
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-    {
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'info_dict': {
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
+                'id': '2542',
-        u'info_dict': {
+                'ext': 'm4v',
-            u'id': u'2542',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
-            u'ext': u'm4v',
+            },
            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
        },
    },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        webpage = self._download_webpage(url, video_id)
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')
-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
+        title = self._html_search_regex(
-            webpage, u'title', flags=re.DOTALL)
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
+        video_url = self._search_regex(
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+            webpage, 'video url', flags=re.DOTALL)
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dl/extractor/roxwel.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import unified_strdate, determine_ext
@@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
    _TEST = {
-        u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
+        'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
-        u'file': u'passionpittakeawalklive.flv',
+        'info_dict': {
-        u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
+            'id': 'passionpittakeawalklive',
-        u'info_dict': {
+            'ext': 'flv',
-            u'title': u'Take A Walk (live)',
+            'title': 'Take A Walk (live)',
-            u'uploader': u'Passion Pit',
+            'uploader': 'Passion Pit',
-            u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
+            'uploader_id': 'passionpit',
            'upload_date': '20120928',
            'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
        },
-        u'skip': u'Requires rtmpdump',
+        'params': {
            # rtmp download
            'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        filename = mobj.group('filename')
        info_url = 'http://www.roxwel.com/api/videos/%s' % filename
-        info_page = self._download_webpage(info_url, filename,
+        info = self._download_json(info_url, filename)
                                           u'Downloading video info')
        self.report_extraction(filename)
        info = json.loads(info_page)
        rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
        best_rate = rtmp_rates[-1]
        url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
-        rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
+        rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
        ext = determine_ext(rtmp_url)
        if ext == 'f4v':
            rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
-        return {'id': filename,
+        return {
-                'title': info['title'],
+            'id': filename,
-                'url': rtmp_url,
+            'title': info['title'],
-                'ext': 'flv',
+            'url': rtmp_url,
-                'description': info['description'],
+            'ext': 'flv',
-                'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
+            'description': info['description'],
-                'uploader': info['artist'],
+            'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
-                'uploader_id': info['artistname'],
+            'uploader': info['artist'],
-                'upload_date': unified_strdate(info['dbdate']),
+            'uploader_id': info['artistname'],
-                }
+            'upload_date': unified_strdate(info['dbdate']),
        }
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -28,6 +28,7 @@ class RTSIE(InfoExtractor):
            'uploader': 'Divers',
            'upload_date': '19680921',
            'timestamp': -40280400,
            'thumbnail': 're:^https?://.*\.image'
        },
    }
@@ -58,4 +59,5 @@ class RTSIE(InfoExtractor):
            'duration': duration,
            'uploader': info.get('programName'),
            'timestamp': upload_timestamp,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 import re
 import json
 import itertools
 from .common import InfoExtractor
@@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):
    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
            'id': '3eac3b4561676c17df9132a9a1e62e3e',
            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        
+
-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
+        video = self._download_json(
-                                              video_id, 'Downloading video JSON')
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
-        video = json.loads(api_response)
+            video_id, 'Downloading video JSON')
-        
+
-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+        trackinfo = self._download_json(
-                                              video_id, 'Downloading trackinfo JSON')
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-        trackinfo = json.loads(api_response)
+            video_id, 'Downloading trackinfo JSON')
-        
+
        # Some videos don't have the author field
        author = trackinfo.get('author') or {}
        m3u8_url = trackinfo['video_balancer'].get('m3u8')
@@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
    def _extract_videos(self, channel_id, channel_title=None):
        entries = []
        for pagenum in itertools.count(1):
-            api_response = self._download_webpage(
+            page = self._download_json(
                self._PAGE_TEMPLATE % (channel_id, pagenum),
                channel_id, 'Downloading page %s' % pagenum)
            page = json.loads(api_response)
            results = page['results']
            if not results:
                break
@@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie_id = mobj.group('id')
-        api_response = self._download_webpage(
+        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
        movie = json.loads(api_response)
        movie_name = movie['name']
        return self._extract_videos(movie_id, movie_name)
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@@ -1,24 +0,0 @@
 import re
 from .common import InfoExtractor
 class SlashdotIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
    _TEST = {
        u'add_ie': ['Ooyala'],
        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
        u'info_dict': {
            u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
        return self.url_result(ooyala_url, 'Ooyala')
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -13,22 +13,24 @@ from ..utils import (
    compat_urllib_request,
    ExtractorError,
    url_basename,
    int_or_none,
 )
 class SmotriIE(InfoExtractor):
    IE_DESC = 'Smotri.com'
    IE_NAME = 'smotri'
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
+    _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
    _NETRC_MACHINE = 'smotri'
    _TESTS = [
        # real video id 2610366
        {
            'url': 'http://smotri.com/video/view/?id=v261036632ab',
            'file': 'v261036632ab.mp4',
            'md5': '2a7b08249e6f5636557579c368040eb9',
            'info_dict': {
                'id': 'v261036632ab',
                'ext': 'mp4',
                'title': 'катастрофа с камер видеонаблюдения',
                'uploader': 'rbc2008',
                'uploader_id': 'rbc08',
@@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
        # real video id 57591
        {
            'url': 'http://smotri.com/video/view/?id=v57591cb20',
            'file': 'v57591cb20.flv',
            'md5': '830266dfc21f077eac5afd1883091bcd',
            'info_dict': {
                'id': 'v57591cb20',
                'ext': 'flv',
                'title': 'test',
                'uploader': 'Support Photofile@photofile',
                'uploader_id': 'support-photofile',
@@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
        # video-password
        {
            'url': 'http://smotri.com/video/view/?id=v1390466a13c',
            'file': 'v1390466a13c.mp4',
            'md5': 'f6331cef33cad65a0815ee482a54440b',
            'info_dict': {
                'id': 'v1390466a13c',
                'ext': 'mp4',
                'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
                'uploader': 'timoxa40',
                'uploader_id': 'timoxa40',
@@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
        # age limit + video-password
        {
            'url': 'http://smotri.com/video/view/?id=v15408898bcf',
            'file': 'v15408898bcf.flv',
            'md5': '91e909c9f0521adf5ee86fbe073aad70',
            'info_dict': {
                'id': 'v15408898bcf',
                'ext': 'flv',
                'title': 'этот ролик не покажут по ТВ',
                'uploader': 'zzxxx',
                'uploader_id': 'ueggb',
@@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
            'params': {
                'videopassword': '333'
            }
-        }
+        },
        # swf player
        {
            'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
            'md5': '4d47034979d9390d14acdf59c4935bc2',
            'info_dict': {
                'id': 'v9188090500',
                'ext': 'mp4',
                'title': 'Shakira - Don\'t Bother',
                'uploader': 'HannahL',
                'uploader_id': 'lisaha95',
                'upload_date': '20090331',
                'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
                'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
            },
        },
    ]
    _SUCCESS = 0
@@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
    _PASSWORD_DETECTED = 2
    _VIDEO_NOT_FOUND = 3
    @classmethod
    def _extract_url(cls, webpage):
        mobj = re.search(
            r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
            webpage)
        if mobj is not None:
            return mobj.group('url')
        mobj = re.search(
            r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
                    <div\s+class="video_image">[^<]+</div>\s*
                    <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
        if mobj is not None:
            return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
    def _search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
@@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):
        # Video JSON does not provide enough meta data
        # We will extract some from the video web page instead
-        video_page_url = 'http://' + mobj.group('url')
+        video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
        video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
        # Warning if video is unavailable
@@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'duration': video_duration,
-            'view_count': video_view_count,
+            'view_count': int_or_none(video_view_count),
            'age_limit': 18 if adult_content else 0,
            'video_page_url': video_page_url
        }
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
            |
            ((?P<type_talk>talks)) # We have a simple talk
            |
            (?P<type_watch>watch)/[^/]+/[^/]+
        )
        (/lang/(.*?))? # The url may contain the language
-        /(?P<name>\w+) # Here goes the name and then ".html"
+        /(?P<name>[\w-]+) # Here goes the name and then ".html"
        .*)$
        '''
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
        'info_dict': {
@@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
        }
-    }
+    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
        'md5': '226f4fb9c62380d11b7995efa4c87994',
        'info_dict': {
            'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
            'ext': 'mp4',
            'title': 'Vishal Sikka: The beauty and power of algorithms',
            'thumbnail': 're:^https?://.+\.jpg',
            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
        }
    }]
    _FORMATS_PREFERENCE = {
        'low': 1,
@@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
        name = m.group('name')
        if m.group('type_talk'):
            return self._talk_info(url, name)
        elif m.group('type_watch'):
            return self._watch_info(url, name)
        else:
            return self._playlist_videos_info(url, name)
@@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
        else:
            self._downloader.report_warning(u'video doesn\'t have subtitles')
            return {}
    def _watch_info(self, url, name):
        webpage = self._download_webpage(url, name)
        config_json = self._html_search_regex(
            r"data-config='([^']+)", webpage, 'config')
        config = json.loads(config_json)
        video_url = config['video']['url']
        thumbnail = config.get('image', {}).get('url')
        title = self._html_search_regex(
            r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
        description = self._html_search_regex(
            r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
            webpage, 'description', fatal=False)
        return {
            'id': name,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
            'description': description,
        }
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -1,33 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
+    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
    _TEST = {
-        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
+        'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
-        u'file': u'10635995.mp4',
+        'info_dict': {
-        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
+            'id': '10635995',
-        u'info_dict': {
+            'ext': 'mp4',
-            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
+            'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
-            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
+            'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
        },
        'params': {
            # Sometimes wat serves the whole file with the --test option
            'skip_download': True,
        },
        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        id = mobj.group(1)
+        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, id)
+        webpage = self._download_webpage(url, video_id)
-        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
+        embed_url = self._html_search_regex(
-                                webpage, 'embed url')
+            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
-        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
+        embed_page = self._download_webpage(embed_url, video_id,
            'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
-        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
+        wat_info = self._download_json(
-        wat_info = json.loads(wat_info)['media']
+            'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
-        wat_url = wat_info['url']
+        return self.url_result(wat_info['media']['url'], 'Wat')
        return self.url_result(wat_url, 'Wat')
--- a/youtube_dl/extractor/urort.py
+++ b/youtube_dl/extractor/urort.py
@@ -0,0 +1,61 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    unified_strdate,
 )
 class UrortIE(InfoExtractor):
    IE_DESC = 'NRK P3 Urørt'
    _VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
    _TEST = {
        'url': 'https://urort.p3.no/#!/Band/Gerilja',
        'md5': '5ed31a924be8a05e47812678a86e127b',
        'info_dict': {
            'id': '33124-4',
            'ext': 'mp3',
            'title': 'The Bomb',
            'thumbnail': 're:^https?://.+\.jpg',
            'like_count': int,
            'uploader': 'Gerilja',
            'uploader_id': 'Gerilja',
            'upload_date': '20100323',
        },
        'params': {
            'matchtitle': '^The Bomb$',  # To test, we want just one video
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
        json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
        songs = self._download_json(json_url, playlist_id)
        print(songs[0])
        entries = [{
            'id': '%d-%s' % (s['BandId'], s['$id']),
            'title': s['Title'],
            'url': s['TrackUrl'],
            'ext': 'mp3',
            'uploader_id': playlist_id,
            'uploader': s.get('BandName', playlist_id),
            'like_count': s.get('LikeCount'),
            'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
            'upload_date': unified_strdate(s.get('Released')),
        } for s in songs]
        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': playlist_id,
            'entries': entries,
        }
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -1,38 +0,0 @@
 import re
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 from ..utils import ExtractorError
 class ViceIE(InfoExtractor):
    _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
    _TEST = {
        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
        u'info_dict': {
            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
        },
        u'params': {
            # Requires ffmpeg (m3u8 manifest)
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
        try:
            ooyala_url = self._og_search_video_url(webpage)
        except ExtractorError:
            try:
                embed_code = self._search_regex(
                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
                    u'ooyala embed code')
                ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
            except ExtractorError:
                raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
        return self.url_result(ooyala_url, ie='Ooyala')
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -16,7 +16,7 @@ from ..utils import (
 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'
    _TESTS = [
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -0,0 +1,103 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    strip_jsonp,
 )
 class WashingtonPostIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
    _TEST = {
        'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
        'playlist': [{
            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
            'info_dict': {
                'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
                'ext': 'mp4',
                'title': 'Breaking Points: The Paper Mine',
                'duration': 1287,
                'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
                'uploader': 'The Washington Post',
                'timestamp': 1395527908,
                'upload_date': '20140322',
            },
        }, {
            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
            'info_dict': {
                'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
                'ext': 'mp4',
                'title': 'The town bureaucracy sustains',
                'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
                'duration': 2217,
                'timestamp': 1395528005,
                'upload_date': '20140322',
                'uploader': 'The Washington Post',
            },
        }]
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_id = mobj.group('id')
        webpage = self._download_webpage(url, page_id)
        title = self._og_search_title(webpage)
        uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
        entries = []
        for i, uuid in enumerate(uuids, start=1):
            vinfo_all = self._download_json(
                'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
                page_id,
                transform_source=strip_jsonp,
                note='Downloading information of video %d/%d' % (i, len(uuids))
            )
            vinfo = vinfo_all[0]['contentConfig']
            uploader = vinfo.get('credits', {}).get('source')
            timestamp = int_or_none(
                vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
            formats = [{
                'format_id': (
                    '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
                    if s.get('width')
                    else s.get('type')),
                'vbr': s.get('bitrate') if s.get('width') != 0 else None,
                'width': s.get('width'),
                'height': s.get('height'),
                'acodec': s.get('audioCodec'),
                'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
                'filesize': s.get('fileSize'),
                'url': s.get('url'),
                'ext': 'mp4',
                'protocol': {
                    'MP4': 'http',
                    'F4F': 'f4m',
                }.get(s.get('type'))
            } for s in vinfo.get('streams', [])]
            source_media_url = vinfo.get('sourceMediaURL')
            if source_media_url:
                formats.append({
                    'format_id': 'source_media',
                    'url': source_media_url,
                })
            self._sort_formats(formats)
            entries.append({
                'id': uuid,
                'title': vinfo['title'],
                'description': vinfo.get('blurb'),
                'uploader': uploader,
                'formats': formats,
                'duration': int_or_none(vinfo.get('videoDuration'), 100),
                'timestamp': timestamp,
            })
        return {
            '_type': 'playlist',
            'entries': entries,
            'id': page_id,
            'title': title,
        }
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -1,37 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
 )
 class WatIE(InfoExtractor):
-    _VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
    _TEST = {
-        u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
-        u'file': u'10631273.mp4',
+        'info_dict': {
-        u'md5': u'd8b2231e1e333acd12aad94b80937e19',
+            'id': '10631273',
-        u'info_dict': {
+            'ext': 'mp4',
-            u'title': u'World War Z - Philadelphia VOST',
+            'title': 'World War Z - Philadelphia VOST',
-            u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
        },
        'params': {
            # Sometimes wat serves the whole file with the --test option
            'skip_download': True,
        },
        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }
-    
+
    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
        # videos, we don't need them
-        info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
+        info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
        info = json.loads(info)
        return info['media']
    def _real_extract(self, url):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
@@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
            entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
            return self.playlist_result(entries, real_id, video_info['title'])
        upload_date = None
        if 'date_diffusion' in first_chapter:
            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
-        info = {'id': real_id,
+        return {
-                'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'id': real_id,
-                'ext': 'mp4',
+            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
-                'title': first_chapter['title'],
+            'title': first_chapter['title'],
-                'thumbnail': first_chapter['preview'],
+            'thumbnail': first_chapter['preview'],
-                'description': first_chapter['description'],
+            'description': first_chapter['description'],
-                'view_count': video_info['views'],
+            'view_count': video_info['views'],
-                }
+            'upload_date': upload_date,
-        if 'date_diffusion' in first_chapter:
+        }
            info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
        return info
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -4,9 +4,10 @@ import re
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    compat_parse_qs,
    compat_urlparse,
    determine_ext,
    unified_strdate,
 )
@@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
-        }
+        }
 class WDRMausIE(InfoExtractor):
    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
    IE_DESC = 'Sendung mit der Maus'
    _TESTS = [{
        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
        'info_dict': {
            'id': 'aktuelle-sendung',
            'ext': 'mp4',
            'thumbnail': 're:^http://.+\.jpg',
            'upload_date': 're:^[0-9]{8}$',
            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
        }
    }, {
        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
        'info_dict': {
            'id': '40_jahre_maus',
            'ext': 'mp4',
            'thumbnail': 're:^http://.+\.jpg',
            'upload_date': '20131007',
            'title': '12.03.2011 - 40 Jahre Maus',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        param_code = self._html_search_regex(
            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
        title_date = self._search_regex(
            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
            webpage, 'air date')
        title_str = self._html_search_regex(
            r'<h1>(.*?)</h1>', webpage, 'title')
        title = '%s - %s' % (title_date, title_str)
        upload_date = unified_strdate(
            self._html_search_meta('dc.date', webpage))
        fields = compat_parse_qs(param_code)
        video_url = fields['firstVideo'][0]
        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
        formats = [{
            'format_id': 'rtmp',
            'url': video_url,
        }]
        jscode = self._download_webpage(
            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
            video_id, fatal=False,
            note='Downloading URL translation table',
            errnote='Could not download URL translation table')
        if jscode:
            for m in re.finditer(
                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
                    jscode):
                if video_url.startswith(m.group('stream')):
                    http_url = video_url.replace(
                        m.group('stream'), m.group('dl'))
                    formats.append({
                        'format_id': 'http',
                        'url': http_url,
                    })
                    break
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
        }
 # TODO test _1
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,13 +7,13 @@ import itertools
 import json
 import os.path
 import re
 import string
 import struct
 import traceback
 import zlib
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..utils import (
    compat_chr,
    compat_parse_qs,
@@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
    def _parse_sig_js(self, jscode):
        funcname = self._search_regex(
            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
+             u'Initial JS player signature function name')
-        functions = {}
+        jsi = JSInterpreter(jscode)
-
+        initial_function = jsi.extract_function(funcname)
        def argidx(varname):
            return string.lowercase.index(varname)
        def interpret_statement(stmt, local_vars, allow_recursion=20):
            if allow_recursion < 0:
                raise ExtractorError(u'Recursion limit reached')
            if stmt.startswith(u'var '):
                stmt = stmt[len(u'var '):]
            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
                             r'=(?P<expr>.*)$', stmt)
            if ass_m:
                if ass_m.groupdict().get('index'):
                    def assign(val):
                        lvar = local_vars[ass_m.group('out')]
                        idx = interpret_expression(ass_m.group('index'),
                                                   local_vars, allow_recursion)
                        assert isinstance(idx, int)
                        lvar[idx] = val
                        return val
                    expr = ass_m.group('expr')
                else:
                    def assign(val):
                        local_vars[ass_m.group('out')] = val
                        return val
                    expr = ass_m.group('expr')
            elif stmt.startswith(u'return '):
                assign = lambda v: v
                expr = stmt[len(u'return '):]
            else:
                raise ExtractorError(
                    u'Cannot determine left side of statement in %r' % stmt)
            v = interpret_expression(expr, local_vars, allow_recursion)
            return assign(v)
        def interpret_expression(expr, local_vars, allow_recursion):
            if expr.isdigit():
                return int(expr)
            if expr.isalpha():
                return local_vars[expr]
            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
            if m:
                member = m.group('member')
                val = local_vars[m.group('in')]
                if member == 'split("")':
                    return list(val)
                if member == 'join("")':
                    return u''.join(val)
                if member == 'length':
                    return len(val)
                if member == 'reverse()':
                    return val[::-1]
                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
                if slice_m:
                    idx = interpret_expression(
                        slice_m.group('idx'), local_vars, allow_recursion-1)
                    return val[idx:]
            m = re.match(
                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
            if m:
                val = local_vars[m.group('in')]
                idx = interpret_expression(m.group('idx'), local_vars,
                                           allow_recursion-1)
                return val[idx]
            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
            if m:
                a = interpret_expression(m.group('a'),
                                         local_vars, allow_recursion)
                b = interpret_expression(m.group('b'),
                                         local_vars, allow_recursion)
                return a % b
            m = re.match(
                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
            if m:
                fname = m.group('func')
                if fname not in functions:
                    functions[fname] = extract_function(fname)
                argvals = [int(v) if v.isdigit() else local_vars[v]
                           for v in m.group('args').split(',')]
                return functions[fname](argvals)
            raise ExtractorError(u'Unsupported JS expression %r' % expr)
        def extract_function(funcname):
            func_m = re.search(
                r'function ' + re.escape(funcname) +
                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
                jscode)
            argnames = func_m.group('args').split(',')
            def resf(args):
                local_vars = dict(zip(argnames, args))
                for stmt in func_m.group('code').split(';'):
                    res = interpret_statement(stmt, local_vars)
                return res
            return resf
        initial_function = extract_function(funcname)
        return lambda s: initial_function([s])
    def _parse_sig_swf(self, file_contents):
@@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break
            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
                'Downloading page #%s' % page_num,
                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']
@@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):
 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
        PAGE_SIZE = 50
-        while (50 * pagenum) < limit:
+        while (PAGE_SIZE * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+            result_url = self._API_URL % (
                compat_urllib_parse.quote_plus(query.encode('utf-8')),
                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        feed_entries = []
        paging = 0
        for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                          u'%s feed' % self._FEED_NAME,
                                          u'Downloading page %s' % i)
-            info = json.loads(info)
+            feed_html = info.get('feed_html') or info.get('content_html')
            feed_html = info['feed_html']
            m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
            ids = orderedSet(m.group(1) for m in m_ids)
            feed_entries.extend(
@@ -1852,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
    _FEED_NAME = 'subscriptions'
    _PLAYLIST_TITLE = u'Youtube Subscriptions'
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -0,0 +1,116 @@
 from __future__ import unicode_literals
 import re
 from .utils import (
    ExtractorError,
 )
 class JSInterpreter(object):
    def __init__(self, code):
        self.code = code
        self._functions = {}
    def interpret_statement(self, stmt, local_vars, allow_recursion=20):
        if allow_recursion < 0:
            raise ExtractorError('Recursion limit reached')
        if stmt.startswith('var '):
            stmt = stmt[len('var '):]
        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
                         r'=(?P<expr>.*)$', stmt)
        if ass_m:
            if ass_m.groupdict().get('index'):
                def assign(val):
                    lvar = local_vars[ass_m.group('out')]
                    idx = self.interpret_expression(
                        ass_m.group('index'), local_vars, allow_recursion)
                    assert isinstance(idx, int)
                    lvar[idx] = val
                    return val
                expr = ass_m.group('expr')
            else:
                def assign(val):
                    local_vars[ass_m.group('out')] = val
                    return val
                expr = ass_m.group('expr')
        elif stmt.startswith('return '):
            assign = lambda v: v
            expr = stmt[len('return '):]
        else:
            raise ExtractorError(
                'Cannot determine left side of statement in %r' % stmt)
        v = self.interpret_expression(expr, local_vars, allow_recursion)
        return assign(v)
    def interpret_expression(self, expr, local_vars, allow_recursion):
        if expr.isdigit():
            return int(expr)
        if expr.isalpha():
            return local_vars[expr]
        m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
        if m:
            member = m.group('member')
            val = local_vars[m.group('in')]
            if member == 'split("")':
                return list(val)
            if member == 'join("")':
                return u''.join(val)
            if member == 'length':
                return len(val)
            if member == 'reverse()':
                return val[::-1]
            slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
            if slice_m:
                idx = self.interpret_expression(
                    slice_m.group('idx'), local_vars, allow_recursion - 1)
                return val[idx:]
        m = re.match(
            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
        if m:
            val = local_vars[m.group('in')]
            idx = self.interpret_expression(
                m.group('idx'), local_vars, allow_recursion - 1)
            return val[idx]
        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
        if m:
            a = self.interpret_expression(
                m.group('a'), local_vars, allow_recursion)
            b = self.interpret_expression(
                m.group('b'), local_vars, allow_recursion)
            return a % b
        m = re.match(
            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
        if m:
            fname = m.group('func')
            if fname not in self._functions:
                self._functions[fname] = self.extract_function(fname)
            argvals = [int(v) if v.isdigit() else local_vars[v]
                       for v in m.group('args').split(',')]
            return self._functions[fname](argvals)
        raise ExtractorError('Unsupported JS expression %r' % expr)
    def extract_function(self, funcname):
        func_m = re.search(
            (r'(?:function %s|%s\s*=\s*function)' % (
                re.escape(funcname), re.escape(funcname))) +
            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
            self.code)
        if func_m is None:
            raise ExtractorError('Could not find JS function %r' % funcname)
        argnames = func_m.group('args').split(',')
        def resf(args):
            local_vars = dict(zip(argnames, args))
            for stmt in func_m.group('code').split(';'):
                res = self.interpret_statement(stmt, local_vars)
            return res
        return resf
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -53,8 +53,9 @@ class FFmpegPostProcessor(PostProcessor):
        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        bcmd = [self._downloader.encode(c) for c in cmd]
-        stdout,stderr = p.communicate()
+        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
            msg = stderr.strip().split('\n')[-1]
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
        encoding = 'utf-8'
    return s.encode(encoding, 'ignore')
 def decodeOption(optval):
    if optval is None:
        return optval
@@ -1177,8 +1176,12 @@ class HEADRequest(compat_urllib_request.Request):
        return "HEAD"
-def int_or_none(v, scale=1):
+def int_or_none(v, scale=1, default=None):
-    return v if v is None else (int(v) // scale)
+    return default if v is None else (int(v) // scale)
 def float_or_none(v, scale=1, default=None):
    return default if v is None else (float(v) / scale)
 def parse_duration(s):
@@ -1186,7 +1189,7 @@ def parse_duration(s):
        return None
    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
+        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
@@ -1261,8 +1264,8 @@ class PagedList(object):
 def uppercase_escape(s):
    return re.sub(
-        r'\\U([0-9a-fA-F]{8})',
+        r'\\U[0-9a-fA-F]{8}',
-        lambda m: compat_chr(int(m.group(1), base=16)), s)
+        lambda m: m.group(0).decode('unicode-escape'), s)
 try:
    struct.pack(u'!I', 0)
@@ -1328,3 +1331,7 @@ US_RATINGS = {
    'R': 16,
    'NC': 18,
 }
 def strip_jsonp(code):
    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.03.24.3'
+__version__ = '2014.04.03.3'
Author	SHA1	Message	Date
Philipp Hagemeister	a9f304031b	release 2014.04.03.3	2014-04-03 16:21:54 +02:00
Philipp Hagemeister	9271bc8355	[cnet] Add new extractor (Fixes #2679 )	2014-04-03 16:21:21 +02:00
Philipp Hagemeister	968ed2a777	[comedycentral] Add test for #2677	2014-04-03 15:31:04 +02:00
Philipp Hagemeister	24de5d2556	release 2014.04.03.2	2014-04-03 15:28:56 +02:00
Philipp Hagemeister	d26e981df4	Correct check for empty dirname (Fixes #2683 )	2014-04-03 15:28:41 +02:00
Jaime Marquínez Ferrándiz	e45d40b171	[youtube:subscriptions] Add space to the description	2014-04-03 15:13:52 +02:00
Sergey M․	4a419b8851	[c56] Modernize and add duration extraction	2014-04-03 19:53:11 +07:00
Philipp Hagemeister	bec1fad223	[YouTubeDL] Throw an early error if the info_dict result is invalid	2014-04-03 14:38:16 +02:00
Philipp Hagemeister	177fed41bc	[comedycentral:shows] Support guest/ URLs (Fixes #2677 )	2014-04-03 14:38:16 +02:00
Jaime Marquínez Ferrándiz	b900e7cba4	[downloader/f4m] Close the final video	2014-04-03 13:35:07 +02:00
Jaime Marquínez Ferrándiz	14cb4979f0	MANIFEST.in: Only list the files from the docs folder that will be included (closes #2623 ) Pruning the _build folder produced the message `no previously-included directories found matching 'docs/_build'` when installing from the source distribution.	2014-04-03 13:26:27 +02:00
Philipp Hagemeister	69e61e30fe	release 2014.04.03.1	2014-04-03 08:55:59 +02:00
Philipp Hagemeister	cce929eaac	[franceculture] Add extractor (Fixes #2669 )	2014-04-03 08:55:38 +02:00
Philipp Hagemeister	b6cfde99b7	Only mention websense URL once	2014-04-03 08:12:53 +02:00
Philipp Hagemeister	1be99f052d	release 2014.04.03	2014-04-03 06:09:45 +02:00
Philipp Hagemeister	2410c43d83	Detect Websense censorship (Fixes #2670 )	2014-04-03 06:09:38 +02:00
Philipp Hagemeister	aea6e7fc3c	[cspan] Support multiple segments (Fixes #2674 )	2014-04-03 06:09:38 +02:00
Sergey M․	91a76c40c0	[musicplayon] Add support for musicplayon.com	2014-04-02 22:10:20 +07:00
Philipp Hagemeister	d2b194607c	release 2014.04.02	2014-04-02 14:26:34 +02:00
Jaime Marquínez Ferrándiz	f6177462db	[youtube] feeds: Also look for the html in the 'content_html' field (fixes #2671 )	2014-04-02 14:13:08 +02:00
Jaime Marquínez Ferrándiz	9ddaf4ef8c	[comedycentral] Change XPath .//guid to ./guid (fixes #2668 ) It fails to find the element in python 2.6 and it's not required, the element is a direct child of the item node.	2014-04-01 21:38:07 +02:00
Jaime Marquínez Ferrándiz	97b5573848	[comedycentral] Update test title for `34cbc7ee8d`	2014-04-01 21:29:40 +02:00
Jaime Marquínez Ferrándiz	18c95c1ab0	[rutube] Use _download_json	2014-04-01 20:30:22 +02:00
Sergey M․	0479c625a4	[brightcove] Encode object_str with utf-8	2014-04-01 20:17:35 +07:00
Sergey M․	f659951e22	[vk] Support optional dash for oid in embedded links	2014-04-01 19:38:42 +07:00
Philipp Hagemeister	5853a7316e	release 2014.04.01.3	2014-04-01 13:17:15 +02:00
Philipp Hagemeister	a612753db9	[utils] Correct decoding of large unicode codepoints in uppercase_escape (Fixes #2664 )	2014-04-01 13:17:07 +02:00
Philipp Hagemeister	c8fc3fb524	release 2014.04.01.2	2014-04-01 05:57:15 +02:00
Philipp Hagemeister	5912c639df	[youtube] Transform google's JSON dialect (fixes #2663 )	2014-04-01 05:56:56 +02:00
Philipp Hagemeister	017e4dd58c	release 2014.04.01.1	2014-04-01 00:25:17 +02:00
Philipp Hagemeister	651486621d	[comedycentral] Allow URLs with query parts (fixes #2661 )	2014-04-01 00:25:11 +02:00
Philipp Hagemeister	28d9032c88	release 2014.04.01	2014-04-01 00:02:39 +02:00
Philipp Hagemeister	16f4eb723a	[comedycentral] Add support for /videos URLs (Fixes #2660 )	2014-04-01 00:02:32 +02:00
Sergey M․	1cbd410620	[pyvideo] Modernize	2014-03-31 19:31:48 +07:00
Philipp Hagemeister	d41ac5f5dc	release 2014.03.30.1	2014-03-30 15:57:47 +02:00
Philipp Hagemeister	9c1fc022ae	[generic] Warn before fallback to automatic search	2014-03-30 15:57:35 +02:00
Philipp Hagemeister	83d548ef0f	[youtube] Encode ytsearch query	2014-03-30 15:57:35 +02:00
Sergey M․	c72477bd32	[rutube] Modernize	2014-03-30 15:35:07 +07:00
Philipp Hagemeister	9a7b072e38	[wdr] Add support for more wdrmaus subpages	2014-03-30 07:42:35 +02:00
Philipp Hagemeister	cbc4a6cc7e	release 2014.03.30	2014-03-30 07:25:48 +02:00
Philipp Hagemeister	cd7481a39e	[wdr] Add support for wdrmaus.de (Fixes #2651 )	2014-03-30 07:25:42 +02:00
Philipp Hagemeister	acd213ed6d	Remove unusued imports	2014-03-30 07:16:07 +02:00
Philipp Hagemeister	77ffa95701	[jsinterp] Better error messages	2014-03-30 07:15:14 +02:00
Philipp Hagemeister	2b25cb5d76	[youtube] Move JavaScript interpreter into its own module	2014-03-30 07:02:58 +02:00
Philipp Hagemeister	62fec3b2ff	Add new --encoding option (Fixes #2650 )	2014-03-30 06:08:22 +02:00
Jaime Marquínez Ferrándiz	e79162558e	[wat] Modernize	2014-03-29 15:15:16 +01:00
Jaime Marquínez Ferrándiz	2da67107ee	[tf1] Modernize	2014-03-29 15:05:15 +01:00
Jaime Marquínez Ferrándiz	2ff7f8975e	[nba] Modernize	2014-03-29 14:57:48 +01:00
Jaime Marquínez Ferrándiz	87a2566048	[metacritic] Modernize test	2014-03-29 14:57:48 +01:00
Jaime Marquínez Ferrándiz	986f56736b	[roxwel] Modernize	2014-03-29 14:57:44 +01:00
Jaime Marquínez Ferrándiz	2583a0308b	[huffpost] Modernize test	2014-03-29 14:35:45 +01:00
Jaime Marquínez Ferrándiz	40c716d2a2	[ign] Modernize	2014-03-29 14:34:03 +01:00
Jaime Marquínez Ferrándiz	79bfd01001	[kickstarter] Fix extraction, extract more info and modernize	2014-03-29 14:22:28 +01:00
Jaime Marquínez Ferrándiz	f2bcdd8e02	[discovery] modernize	2014-03-29 14:22:27 +01:00
Philipp Hagemeister	8c5850eeb4	release 2014.03.29	2014-03-29 14:01:53 +01:00
Philipp Hagemeister	bd3e077a2d	Merge branch 'master' of github.com:rg3/youtube-dl	2014-03-29 14:01:19 +01:00
Jaime Marquínez Ferrándiz	7e70ac36b3	[bloomberg] Fix extraction (fixes #2154 ) Stop using the OoyalaIE, extract the f4m url instead.	2014-03-29 11:55:12 +01:00
Philipp Hagemeister	2cc0082dc0	Credit @phaer for OE1 (#2646 )	2014-03-29 10:11:32 +01:00
Sergey M․	056b56688a	[ntv] Simplify	2014-03-29 15:55:03 +07:00
Philipp Hagemeister	b17418313f	[oe1] Simplify (#2646 )	2014-03-28 23:23:58 +01:00
Philipp Hagemeister	e9a6fd6a68	Merge remote-tracking branch 'phaer/add-oe1-support'	2014-03-28 23:21:58 +01:00
Philipp Hagemeister	bf30f3bd9d	release 2014.03.28	2014-03-28 23:14:54 +01:00
Philipp Hagemeister	330edf2d84	Mention where to find keys in --dump-json (Fixes #2648 )	2014-03-28 23:13:03 +01:00
Philipp Hagemeister	43f775e4ca	[comedycentral] Duration can now be a float (Fixes #2647 )	2014-03-28 23:06:34 +01:00
Sergey M․	8f6562448c	[ntv] Move app guess outside formats loop	2014-03-28 23:09:56 +07:00
Sergey M․	263f4b514b	[ntv] Add support for ntv.ru (Closes #2581 )	2014-03-28 23:01:08 +07:00
phaer	f0da3f1ef9	[oe1] Add support for oe1.orf.at.	2014-03-28 17:57:25 +02:00
Sergey M․	cb3ac1c610	[smotri] Modernize and add support for emdebbed videos (Closes #2585 )	2014-03-28 19:58:49 +07:00
Sergey M․	8efd15f477	[canalplus] Fix video id extraction (Closes #2645 )	2014-03-28 18:47:15 +07:00
Jaime Marquínez Ferrándiz	d26ebe990f	[ehow] Modernize	2014-03-27 21:23:02 +01:00
Jaime Marquínez Ferrándiz	28acf5500a	[appletrailers] Modernize	2014-03-27 21:10:51 +01:00
Jaime Marquínez Ferrándiz	214c22c704	[niconico] Modernize	2014-03-27 21:01:09 +01:00
Sergey M․	8cdafb47b9	[mooshare] Add support for URLs starting with 'www'	2014-03-27 19:08:35 +07:00
Philipp Hagemeister	0dae5083f1	[urort] Add date	2014-03-27 02:56:23 +01:00
Philipp Hagemeister	4c89bbd22c	release 2014.03.27.1	2014-03-27 02:52:06 +01:00
Philipp Hagemeister	e2b06e76c1	[urort] Add extractor (Fixes #2634 )	2014-03-27 02:51:50 +01:00
Philipp Hagemeister	e9c076c317	[clipsyndicate] Modernize	2014-03-27 02:30:00 +01:00
Philipp Hagemeister	6c072e7d25	release 2014.03.27	2014-03-27 02:22:57 +01:00
Philipp Hagemeister	ac6c104871	[ted] Add support for watch/ URLs (Fixes #2637 )	2014-03-27 02:22:40 +01:00
Philipp Hagemeister	69c01a9f68	[comedycentral] Add a testcase for extended-interviews URLs (#2636 )	2014-03-27 02:02:48 +01:00
Philipp Hagemeister	e55213ce35	Merge remote-tracking branch 'malept/tds-extended-interviews'	2014-03-27 02:02:18 +01:00
Mark Lee	24a2aac445	[comedycentral] fix TDS extended interviews The new website broke the URL format. Added "playlist" as a valid ID keyword.	2014-03-26 10:51:02 -07:00
Jaime Marquínez Ferrándiz	98acdc895b	Merge remote-tracking branch 'dstftw/download-referer-header' (closes #2628 )	2014-03-26 15:20:11 +01:00
Jaime Marquínez Ferrándiz	bd3b5b8b10	[slashdot] Remove extractor The generic ooyala detection works fine.	2014-03-26 15:09:14 +01:00
Jaime Marquínez Ferrándiz	9a90636805	[vice] Remove extractor The generic ooyala detection works fine.	2014-03-26 15:03:34 +01:00
Sergey M․	6a66ae96ed	[cspan] Roll back unfinished rtmp support	2014-03-26 19:51:54 +07:00
Jaime Marquínez Ferrándiz	2c8a4ba6b5	Makefile: include the docs in the tarball	2014-03-26 12:01:08 +01:00
Philipp Hagemeister	ad8915b729	Add --no-warnings option (Fixes #2630 )	2014-03-26 00:43:46 +01:00
Philipp Hagemeister	34cbc7ee8d	[comedycentral] Better titles	2014-03-25 23:46:51 +01:00
Sergey M․	a59e40a1ea	Replace 'referer' with 'http_referer'	2014-03-25 21:53:26 +07:00
Sergey M․	ad0a75db6b	[auengine] Add referer	2014-03-25 21:22:41 +07:00
Sergey M․	1d0e49e1c7	Use explicitly set Referer header for downloading	2014-03-25 21:22:27 +07:00
Sergey M․	b4461b6ebe	[auengine] Modernize	2014-03-25 21:16:10 +07:00
Philipp Hagemeister	80959224fe	release 2014.03.25.1	2014-03-25 14:27:40 +01:00
Philipp Hagemeister	865cbf4fc5	[comedycentral] Correct uri (Fixes #2627 )	2014-03-25 14:27:23 +01:00
Philipp Hagemeister	196f061cac	release 2014.03.25	2014-03-25 04:01:13 +01:00
Philipp Hagemeister	99b380c33b	[comedycentral] Fix thedailyshow / thecolbertreport (Fixes #2600 , #2596 )	2014-03-25 04:00:57 +01:00
Philipp Hagemeister	02e4482e22	release 2014.03.24.5	2014-03-24 23:23:38 +01:00
Philipp Hagemeister	b8a792de80	Merge remote-tracking branch 'origin/master' into HEAD Conflicts: youtube_dl/extractor/arte.py	2014-03-24 23:23:17 +01:00
Philipp Hagemeister	fac55558ad	[washingtonpost] Add extractor (Fixes #2622 )	2014-03-24 23:21:20 +01:00
Jaime Marquínez Ferrándiz	b2799ff96d	[arte] Fix videos.arte.tv extraction	2014-03-24 22:38:51 +01:00
Philipp Hagemeister	7a249480b4	[arte] Fix video.arte.tv extractor	2014-03-24 22:34:03 +01:00
Philipp Hagemeister	f605128d13	[rts] Add thumbnail support	2014-03-24 22:32:04 +01:00
Philipp Hagemeister	ba40a74666	[clipfish] Modernize	2014-03-24 22:30:32 +01:00
Philipp Hagemeister	fb8ae2d438	release 2014.03.24.4	2014-03-24 22:03:51 +01:00
Philipp Hagemeister	893f8832b5	[arte] Add support for embedded videos (Fixes #2620 )	2014-03-24 22:01:47 +01:00
Philipp Hagemeister	878d11ec29	[arte] Add support for multiple formats	2014-03-24 21:36:26 +01:00
Philipp Hagemeister	515bbe4b5b	[arte] Remove liveweb support liveweb.arte.tv is no longer functional, everything has moved to concert	2014-03-24 21:31:19 +01:00
Philipp Hagemeister	75f2e25ba9	[downloader/hls] Encode filename (Fixes #2609 )	2014-03-24 21:23:05 +01:00
`@@ -1,2 +1,2 @@`

	`__version__ = '2014.03.24.3'`	`__version__ = '2014.04.03.3'`