release 2014.01.22.2

Add new --default-search option (#2193 )
[comedycentral] Use the generic _real_extract provided by the base class
2014-01-22 14:33:16 +01:00 · 2014-01-22 14:16:43 +01:00 · 2014-01-22 11:44:26 +01:00 · 2014-01-22 02:39:17 -08:00 · 2014-01-22 11:35:17 +01:00 · 2014-01-22 17:25:32 +07:00
47 changed files with 885 additions and 456 deletions
--- a/README.md
+++ b/README.md
@@ -44,6 +44,10 @@ which means you can modify it, redistribute it or use it however you like.
    --bidi-workaround          Work around terminals that lack bidirectional
                               text support. Requires bidiv or fribidi
                               executable in PATH
+    --default-search PREFIX    Use this prefix for unqualified URLs. For example
+                               "gvsearch2:" downloads two videos from google
+                               videos for  youtube-dl "large apple". By default
+                               (with value "auto") youtube-dl guesses.

 ## Video Selection:
    --playlist-start NUMBER    playlist video to start at (default is 1)
@@ -71,6 +75,7 @@ which means you can modify it, redistribute it or use it however you like.
    --download-archive FILE    Download only videos not listed in the archive
                               file. Record the IDs of all downloaded videos in
                               it.
+    --include-ads              Download advertisements as well (experimental)

 ## Download Options:
    -r, --rate-limit LIMIT     maximum download rate in bytes per second (e.g.
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -167,13 +167,13 @@ class TestTedSubtitles(BaseTestSubtitles):
    def test_subtitles(self):
        self.DL.params['writesubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
+        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')

    def test_subtitles_lang(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['subtitleslangs'] = ['fr']
        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
+        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')

    def test_allsubtitles(self):
        self.DL.params['writesubtitles'] = True
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -16,6 +16,7 @@ from youtube_dl.utils import (
    DateRange,
    encodeFilename,
    find_xpath_attr,
+    fix_xml_ampersands,
    get_meta_content,
    orderedSet,
    parse_duration,
@@ -200,5 +201,18 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('9:12:43'), 33163)
        self.assertEqual(parse_duration('x:y'), None)

+    def test_fix_xml_ampersands(self):
+        self.assertEqual(
+            fix_xml_ampersands('"&x=y&z=a'), '"&amp;x=y&amp;z=a')
+        self.assertEqual(
+            fix_xml_ampersands('"&amp;x=y&wrong;&z=a'),
+            '"&amp;x=y&amp;wrong;&amp;z=a')
+        self.assertEqual(
+            fix_xml_ampersands('&amp;&apos;&gt;&lt;&quot;'),
+            '&amp;&apos;&gt;&lt;&quot;')
+        self.assertEqual(
+            fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
+        self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -27,12 +27,6 @@ _TESTS = [
        85,
        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
    ),
-    (
-        u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
-        u'swf',
-        82,
-        u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
-    ),
 ]


--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -151,6 +151,9 @@ class YoutubeDL(object):
    bidi_workaround:   Work around buggy terminals without bidirectional text
                       support, using fridibi
    debug_printtraffic:Print out sent and received HTTP traffic
+    include_ads:       Download ads as well
+    default_search:    Prepend this string if an input url is not valid.
+                       'auto' for elaborate guessing

    The following parameters are not used by YoutubeDL itself, they are used by
    the FileDownloader:
@@ -908,6 +911,14 @@ class YoutubeDL(object):
                    if info_dict.get('requested_formats') is not None:
                        downloaded = []
                        success = True
+                        merger = FFmpegMergerPP(self)
+                        if not merger._get_executable():
+                            postprocessors = []
+                            self.report_warning('You have requested multiple '
+                                'formats but ffmpeg or avconv are not installed.'
+                                ' The formats won\'t be merged')
+                        else:
+                            postprocessors = [merger]
                        for f in info_dict['requested_formats']:
                            new_info = dict(info_dict)
                            new_info.update(f)
@@ -916,7 +927,7 @@ class YoutubeDL(object):
                            downloaded.append(fname)
                            partial_success = dl(fname, new_info)
                            success = success and partial_success
-                        info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
+                        info_dict['__postprocessors'] = postprocessors
                        info_dict['__files_to_merge'] = downloaded
                    else:
                        # Just a single file
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -39,6 +39,7 @@ __authors__  = (
    'Sergey M.',
    'Michael Orlitzky',
    'Chris Gahan',
+    'Saimadhav Heblikar',
 )

 __license__ = 'Public Domain'
@@ -198,7 +199,9 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-
+    general.add_option('--default-search',
+            dest='default_search', metavar='PREFIX',
+            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')

    selection.add_option(
        '--playlist-start',
@@ -237,7 +240,10 @@ def parseOpts(overrideArguments=None):
    selection.add_option('--download-archive', metavar='FILE',
                         dest='download_archive',
                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
-
+    selection.add_option(
+        '--include-ads', dest='include_ads',
+        action='store_true',
+        help='Download advertisements as well (experimental)')

    authentication.add_option('-u', '--username',
            dest='username', metavar='USERNAME', help='account username')
@@ -615,6 +621,8 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
+    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
    # this was the old behaviour if only --all-sub was given.
@@ -715,6 +723,8 @@ def _real_main(argv=None):
        'bidi_workaround': opts.bidi_workaround,
        'debug_printtraffic': opts.debug_printtraffic,
        'prefer_ffmpeg': opts.prefer_ffmpeg,
+        'include_ads': opts.include_ads,
+        'default_search': opts.default_search,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -47,6 +47,7 @@ from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
+from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
@@ -62,6 +63,7 @@ from .fktv import (
    FKTVPosteckeIE,
 )
 from .flickr import FlickrIE
+from .franceinter import FranceInterIE
 from .francetv import (
    PluzzIE,
    FranceTvInfoIE,
@@ -117,7 +119,10 @@ from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
-from .mtv import MTVIE
+from .mtv import (
+    MTVIE,
+    MTVIggyIE,
+)
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@@ -150,6 +155,7 @@ from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rutube import RutubeIE
+from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
@@ -168,6 +174,7 @@ from .southparkstudios import (
 from .space import SpaceIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
+from .spike import SpikeIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .steam import SteamIE
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -9,9 +9,11 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    find_xpath_attr,
+    fix_xml_ampersands,
    compat_urlparse,
    compat_str,
    compat_urllib_request,
+    compat_parse_qs,

    ExtractorError,
    unsmuggle_url,
@@ -83,17 +85,33 @@ class BrightcoveIE(InfoExtractor):
                            lambda m: m.group(1) + '/>', object_str)
        # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
        object_str = object_str.replace('<--', '<!--')
+        object_str = fix_xml_ampersands(object_str)

        object_doc = xml.etree.ElementTree.fromstring(object_str)
-        assert 'BrightcoveExperience' in object_doc.attrib['class']
-        params = {
-            'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
-        }
+
+        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
+        if fv_el is not None:
+            flashvars = dict(
+                (k, v[0])
+                for k, v in compat_parse_qs(fv_el.attrib['value']).items())
+        else:
+            flashvars = {}
+
        def find_param(name):
+            if name in flashvars:
+                return flashvars[name]
            node = find_xpath_attr(object_doc, './param', 'name', name)
            if node is not None:
                return node.attrib['value']
            return None
+
+        params = {}
+
+        playerID = find_param('playerID')
+        if playerID is None:
+            raise ExtractorError('Cannot find player ID')
+        params['playerID'] = playerID
+
        playerKey = find_param('playerKey')
        # Not all pages define this value
        if playerKey is not None:
@@ -114,8 +132,12 @@ class BrightcoveIE(InfoExtractor):
        if it can't be found
        """
        m_brightcove = re.search(
-            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
-            webpage, re.DOTALL)
+            r'''(?sx)<object
+            (?:
+                [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
+            ).+?</object>''',
+            webpage)
        if m_brightcove is not None:
            return cls._build_brighcove_url(m_brightcove.group())
        else:
@@ -156,6 +178,7 @@ class BrightcoveIE(InfoExtractor):
        info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
        info = json.loads(info)['data']
        video_info = info['programmedContent']['videoPlayer']['mediaDTO']
+        video_info['_youtubedl_adServerURL'] = info.get('adServerURL')

        return self._extract_video_info(video_info)

@@ -193,6 +216,23 @@ class BrightcoveIE(InfoExtractor):
            info.update({
                'url': video_info['FLVFullLengthURL'],
            })
-        else:
+
+        if self._downloader.params.get('include_ads', False):
+            adServerURL = video_info.get('_youtubedl_adServerURL')
+            if adServerURL:
+                ad_info = {
+                    '_type': 'url',
+                    'url': adServerURL,
+                }
+                if 'url' in info:
+                    return {
+                        '_type': 'playlist',
+                        'title': info['title'],
+                        'entries': [ad_info, info],
+                    }
+                else:
+                    return ad_info
+
+        if 'url' not in info and not info.get('formats'):
            raise ExtractorError('Unable to extract video url for %s' % info['id'])
        return info
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dl/extractor/clipsyndicate.py
@@ -3,7 +3,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    find_xpath_attr,
-    fix_xml_all_ampersand,
+    fix_xml_ampersands
 )


@@ -33,7 +33,7 @@ class ClipsyndicateIE(InfoExtractor):
        pdoc = self._download_xml(
            'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
            video_id, u'Downloading video info',
-            transform_source=fix_xml_all_ampersand) 
+            transform_source=fix_xml_ampersands)

        track_doc = pdoc.find('trackList/track')
        def find_param(name):
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -25,12 +25,13 @@ class CNNIE(InfoExtractor):
        },
    },
    {
-        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
-        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
-        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
-        u"info_dict": {
-            u"title": "Student's epic speech stuns new freshmen",
-            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
+        "info_dict": {
+            "title": "Student's epic speech stuns new freshmen",
+            "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
+            "upload_date": "20130821",
        }
    }]

--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -15,30 +17,22 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
        (video-clips|episodes|cc-studios|video-collections)
        /(?P<title>.*)'''
-    _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
+    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'

    _TEST = {
-        u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        u'md5': u'4167875aae411f903b751a21f357f1ee',
-        u'info_dict': {
-            u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
-            u'ext': u'mp4',
-            u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
-            u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
+        'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+        'md5': '4167875aae411f903b751a21f357f1ee',
+        'info_dict': {
+            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            'ext': 'mp4',
+            'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
+            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
        },
    }

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
-        mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
-                                  webpage, u'mgid')
-        return self._get_videos_info(mgid)
-

 class ComedyCentralShowsIE(InfoExtractor):
-    IE_DESC = u'The Daily Show / Colbert Report'
+    IE_DESC = 'The Daily Show / Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
@@ -55,14 +49,14 @@ class ComedyCentralShowsIE(InfoExtractor):
                              extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
                     $"""
    _TEST = {
-        u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
-        u'file': u'422212.mp4',
-        u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d',
-        u'info_dict': {
-            u"upload_date": u"20121214", 
-            u"description": u"Kristen Stewart", 
-            u"uploader": u"thedailyshow", 
-            u"title": u"thedailyshow-kristen-stewart part 1"
+        'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
+        'file': '422212.mp4',
+        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
+        'info_dict': {
+            "upload_date": "20121214",
+            "description": "Kristen Stewart",
+            "uploader": "thedailyshow",
+            "title": "thedailyshow-kristen-stewart part 1"
        }
    }

@@ -94,20 +88,20 @@ class ComedyCentralShowsIE(InfoExtractor):
    def _transform_rtmp_url(rtmp_video_url):
        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
        if not m:
-            raise ExtractorError(u'Cannot transform RTMP url')
+            raise ExtractorError('Cannot transform RTMP url')
        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
        return base + m.group('finalid')

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)

        if mobj.group('shortname'):
            if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                url = u'http://www.thedailyshow.com/full-episodes/'
+                url = 'http://www.thedailyshow.com/full-episodes/'
            else:
-                url = u'http://www.colbertnation.com/full-episodes/'
+                url = 'http://www.colbertnation.com/full-episodes/'
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            assert mobj is not None

@@ -133,9 +127,9 @@ class ComedyCentralShowsIE(InfoExtractor):
            url = htmlHandle.geturl()
            mobj = re.match(self._VALID_URL, url, re.VERBOSE)
            if mobj is None:
-                raise ExtractorError(u'Invalid redirected URL: ' + url)
+                raise ExtractorError('Invalid redirected URL: ' + url)
            if mobj.group('episode') == '':
-                raise ExtractorError(u'Redirected URL is still not specific: ' + url)
+                raise ExtractorError('Redirected URL is still not specific: ' + url)
            epTitle = mobj.group('episode')

        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
@@ -147,15 +141,15 @@ class ComedyCentralShowsIE(InfoExtractor):

            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
-                raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
+                raise ExtractorError('unable to find Flash URL in webpage ' + url)
            else:
                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]

        uri = mMovieParams[0][1]
        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
        idoc = self._download_xml(indexUrl, epTitle,
-                                          u'Downloading show index',
-                                          u'unable to download episode index')
+                                          'Downloading show index',
+                                          'unable to download episode index')

        results = []

@@ -170,7 +164,7 @@ class ComedyCentralShowsIE(InfoExtractor):
            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                        compat_urllib_parse.urlencode({'uri': mediaId}))
            cdoc = self._download_xml(configUrl, epTitle,
-                                               u'Downloading configuration for %s' % shortMediaId)
+                                               'Downloading configuration for %s' % shortMediaId)

            turls = []
            for rendition in cdoc.findall('.//rendition'):
@@ -178,7 +172,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                turls.append(finfo)

            if len(turls) == 0:
-                self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
+                self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
                continue

            formats = []
@@ -192,7 +186,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                    'width': w,
                })

-            effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
+            effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
            results.append({
                'id': shortMediaId,
                'formats': formats,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,4 +1,5 @@
 import base64
+import hashlib
 import json
 import os
 import re
@@ -219,6 +220,8 @@ class InfoExtractor(object):
                          webpage_bytes[:1024])
            if m:
                encoding = m.group(1).decode('ascii')
+            elif webpage_bytes.startswith(b'\xff\xfe'):
+                encoding = 'utf-16'
            else:
                encoding = 'utf-8'
        if self._downloader.params.get('dump_intermediate_pages', False):
@@ -234,6 +237,9 @@ class InfoExtractor(object):
                url = url_or_request.get_full_url()
            except AttributeError:
                url = url_or_request
+            if len(url) > 200:
+                h = u'___' + hashlib.md5(url).hexdigest()
+                url = url[:200 - len(h)] + h
            raw_filename = ('%s_%s.dump' % (video_id, url))
            filename = sanitize_filename(raw_filename, restricted=True)
            self.to_screen(u'Saving request to ' + filename)
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -1,4 +1,5 @@
 # coding: utf-8
+from __future__ import unicode_literals

 import re
 import json
@@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor):

    # The keys are the supported sites and the values are the name to be shown
    # to the user and in the extractor description.
-    _SITES = {'wired': u'WIRED',
-              'gq': u'GQ',
-              'vogue': u'Vogue',
-              'glamour': u'Glamour',
-              'wmagazine': u'W Magazine',
-              'vanityfair': u'Vanity Fair',
-              }
+    _SITES = {
+        'wired': 'WIRED',
+        'gq': 'GQ',
+        'vogue': 'Vogue',
+        'glamour': 'Glamour',
+        'wmagazine': 'W Magazine',
+        'vanityfair': 'Vanity Fair',
+    }

    _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
-    IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+    IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))

    _TEST = {
-        u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
-        u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
-        u'md5': u'1921f713ed48aabd715691f774c451f7',
-        u'info_dict': {
-            u'title': u'3D Printed Speakers Lit With LED',
-            u'description': u'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+        'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+        'file': '5171b343c2b4c00dd0c1ccb3.mp4',
+        'md5': '1921f713ed48aabd715691f774c451f7',
+        'info_dict': {
+            'title': '3D Printed Speakers Lit With LED',
+            'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
        }
    }

    def _extract_series(self, url, webpage):
        title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
-                                        webpage, u'series title', flags=re.DOTALL)
+                                        webpage, 'series title', flags=re.DOTALL)
        url_object = compat_urllib_parse_urlparse(url)
        base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
        m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
@@ -57,39 +59,41 @@ class CondeNastIE(InfoExtractor):
        description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
                                               r'<div class="video-post-content">(.+?)</div>',
                                               ],
-                                              webpage, u'description',
+                                              webpage, 'description',
                                              fatal=False, flags=re.DOTALL)
        params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
-                                    u'player params', flags=re.DOTALL)
-        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
-        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
-        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
+                                    'player params', flags=re.DOTALL)
+        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
+        player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
+        target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
        data = compat_urllib_parse.urlencode({'videoId': video_id,
                                              'playerId': player_id,
                                              'target': target,
                                              })
        base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
-                                           webpage, u'base info url',
+                                           webpage, 'base info url',
                                           default='http://player.cnevids.com/player/loader.js?')
        info_url = base_info_url + data
        info_page = self._download_webpage(info_url, video_id,
-                                           u'Downloading video info')
-        video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
+                                           'Downloading video info')
+        video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
        video_info = json.loads(video_info)

-        def _formats_sort_key(f):
-            type_ord = 1 if f['type'] == 'video/mp4' else 0
-            quality_ord = 1 if f['quality'] == 'high' else 0
-            return (quality_ord, type_ord)
-        best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
+        formats = [{
+            'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
+            'url': fdata['src'],
+            'ext': fdata['type'].split('/')[-1],
+            'quality': 1 if fdata['quality'] == 'high' else 0,
+        } for fdata in video_info['sources'][0]]
+        self._sort_formats(formats)

-        return {'id': video_id,
-                'url': best_format['src'],
-                'ext': best_format['type'].split('/')[-1],
-                'title': video_info['title'],
-                'thumbnail': video_info['poster_frame'],
-                'description': description,
-                }
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': video_info['title'],
+            'thumbnail': video_info['poster_frame'],
+            'description': description,
+        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -10,7 +10,7 @@ from ..utils import (


 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
+    _VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
    IE_DESC = 'C-SPAN'
    _TEST = {
        'url': 'http://www.c-spanvideo.org/program/HolderonV',
@@ -24,9 +24,9 @@ class CSpanIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        prog_name = mobj.group(1)
+        prog_name = mobj.group('name')
        webpage = self._download_webpage(url, prog_name)
-        video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
+        video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')

        title = self._html_search_regex(
            r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
--- a/youtube_dl/extractor/d8.py
+++ b/youtube_dl/extractor/d8.py
@@ -1,22 +1,25 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 from .canalplus import CanalplusIE


 class D8IE(CanalplusIE):
    _VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
-    IE_NAME = u'd8.tv'
+    IE_NAME = 'd8.tv'

    _TEST = {
-        u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
-        u'file': u'966289.flv',
-        u'info_dict': {
-            u'title': u'Campagne intime - Documentaire exceptionnel',
-            u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
-            u'upload_date': u'20131108',
+        'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
+        'file': '966289.flv',
+        'info_dict': {
+            'title': 'Campagne intime - Documentaire exceptionnel',
+            'description': 'md5:d2643b799fb190846ae09c61e59a859f',
+            'upload_date': '20131108',
        },
-        u'params': {
+        'params': {
            # rtmp
-            u'skip_download': True,
+            'skip_download': True,
        },
+        'skip': 'videos get deleted after a while',
    }
--- a/youtube_dl/extractor/dropbox.py
+++ b/youtube_dl/extractor/dropbox.py
@@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+
+from .common import InfoExtractor
+
+
+class DropboxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
+    _TEST = {
+        'url': 'https://www.dropbox.com/s/mcnzehi9wo55th4/20131219_085616.mp4',
+        'file': 'mcnzehi9wo55th4.mp4',
+        'md5': 'f6d65b1b326e82fd7ab7720bea3dacae',
+        'info_dict': {
+            'title': '20131219_085616'
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        title = os.path.splitext(mobj.group('title'))[0]
+        video_url = url + '?dl=1'
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -17,7 +17,12 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""

-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'''(?x)
+        (?:https?://)?(?:\w+\.)?facebook\.com/
+        (?:[^#?]*\#!/)?
+        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
+        (?:v|video_id)=(?P<id>[0-9]+)
+        (?:.*)'''
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
@@ -90,7 +95,7 @@ class FacebookIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
-        video_id = mobj.group('ID')
+        video_id = mobj.group('id')

        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
        webpage = self._download_webpage(url, video_id)
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -11,13 +13,13 @@ class FlickrIE(InfoExtractor):
    """Information Extractor for Flickr videos"""
    _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
    _TEST = {
-        u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
-        u'file': u'5645318632.mp4',
-        u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
-        u'info_dict': {
-            u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
-            u"uploader_id": u"forestwander-nature-pictures", 
-            u"title": u"Dark Hollow Waterfalls"
+        'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
+        'file': '5645318632.mp4',
+        'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+        'info_dict': {
+            "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.", 
+            "uploader_id": "forestwander-nature-pictures", 
+            "title": "Dark Hollow Waterfalls"
        }
    }

@@ -29,13 +31,13 @@ class FlickrIE(InfoExtractor):
        webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)

-        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
+        secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')

        first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
        first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')

        node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
-            first_xml, u'node_id')
+            first_xml, 'node_id')

        second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
        second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
@@ -44,7 +46,7 @@ class FlickrIE(InfoExtractor):

        mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
        if mobj is None:
-            raise ExtractorError(u'Unable to extract video url')
+            raise ExtractorError('Unable to extract video url')
        video_url = mobj.group(1) + unescapeHTML(mobj.group(2))

        return [{
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FranceInterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
+    _TEST = {
+        'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
+        'file': '793962.mp3',
+        'md5': '4764932e466e6f6c79c317d2e74f6884',
+        "info_dict": {
+            "title": "L’Histoire dans les jeux vidéo",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_regex(
+            r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
+        path = self._search_regex(
+            r'&urlAOD=(.*?)&startTime', webpage, 'video url')
+        video_url = 'http://www.franceinter.fr/' + path
+
+        return {
+            'id': video_id,
+            'formats': [{
+                'url': video_url,
+                'vcodec': 'none',
+            }],
+            'title': title,
+        }
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json

@@ -13,12 +15,12 @@ from ..utils import (
 class GameSpotIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
    _TEST = {
-        u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
-        u"file": u"gs-2300-6410818.mp4",
-        u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
-        u"info_dict": {
-            u"title": u"Arma 3 - Community Guide: SITREP I",
-            u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
+        "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
+        "file": "gs-2300-6410818.mp4",
+        "md5": "b2a30deaa8654fcccd43713a6b6a4825",
+        "info_dict": {
+            "title": "Arma 3 - Community Guide: SITREP I",
+            'description': 'Check out this video where some of the basics of Arma 3 is explained.',
        }
    }

--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,4 +1,4 @@
-import re
+from __future__ import unicode_literals

 from .mtv import MTVServicesInfoExtractor

@@ -6,22 +6,13 @@ from .mtv import MTVServicesInfoExtractor
 class GametrailersIE(MTVServicesInfoExtractor):
    _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
    _TEST = {
-        u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
-        u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
-        u'md5': u'4c8e67681a0ea7ec241e8c09b3ea8cf7',
-        u'info_dict': {
-            u'title': u'E3 2013: Debut Trailer',
-            u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
+        'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
+        'file': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
+        'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
+        'info_dict': {
+            'title': 'Mirror\'s Edge 2|E3 2013: Debut Trailer',
+            'description': 'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
        },
    }

    _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        mgid = self._search_regex([r'data-video="(?P<mgid>mgid:.*?)"',
-                                   r'data-contentId=\'(?P<mgid>mgid:.*?)\''],
-                                  webpage, u'mgid')
-        return self._get_videos_info(mgid)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -92,11 +92,12 @@ class GenericIE(InfoExtractor):
        # ooyala video
        {
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+            'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
            'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
            'info_dict': {
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                'ext': 'mp4',
-                'title': '2cc213299525360.mov', #that's what we get
+                'title': '2cc213299525360.mov',  # that's what we get
            },
        },
    ]
@@ -161,8 +162,19 @@ class GenericIE(InfoExtractor):
    def _real_extract(self, url):
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
-            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
-            return self.url_result('http://' + url)
+            default_search = self._downloader.params.get('default_search')
+            if default_search is None:
+                default_search = 'auto'
+
+            if default_search == 'auto':
+                if '/' in url:
+                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+                    return self.url_result('http://' + url)
+                else:
+                    return self.url_result('ytsearch:' + url)
+            else:
+                assert ':' in default_search
+                return self.url_result(default_search + url)
        video_id = os.path.splitext(url.split('/')[-1])[0]

        self.to_screen('%s: Requesting header' % video_id)
@@ -318,6 +330,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Novamov')

+        # Look for embedded Facebook player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Facebook')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@@ -1,17 +1,25 @@
+from __future__ import unicode_literals
+
 import re
 import base64

 from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    ExtractorError,
+    HEADRequest,
+)


 class HotNewHipHopIE(InfoExtractor):
    _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
    _TEST = {
-        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
-        u'file': u'1435540.mp3',
-        u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
-        u'info_dict': {
-            u"title": u'Freddie Gibbs "Lay It Down"'
+        'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
+        'file': '1435540.mp3',
+        'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
+        'info_dict': {
+            'title': 'Freddie Gibbs - Lay It Down'
        }
    }

@@ -21,24 +29,41 @@ class HotNewHipHopIE(InfoExtractor):

        webpage_src = self._download_webpage(url, video_id)

-        video_url_base64 = self._search_regex(r'data-path="(.*?)"',
-            webpage_src, u'video URL', fatal=False)
+        video_url_base64 = self._search_regex(
+            r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)

-        if video_url_base64 == None:
-            video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src,
-                u'video URL')
+        if video_url_base64 is None:
+            video_url = self._search_regex(
+                r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
            return self.url_result(video_url, ie='Youtube')

-        video_url = base64.b64decode(video_url_base64).decode('utf-8')
+        reqdata = compat_urllib_parse.urlencode([
+            ('mediaType', 's'),
+            ('mediaId', video_id),
+        ])
+        r = compat_urllib_request.Request(
+            'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
+        r.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        mkd = self._download_json(
+            r, video_id, note='Requesting media key',
+            errnote='Could not download media key')
+        if 'mediaKey' not in mkd:
+            raise ExtractorError('Did not get a media key')

-        video_title = self._html_search_regex(r"<title>(.*)</title>",
-            webpage_src, u'title')
+        redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
+        redirect_req = HEADRequest(redirect_url)
+        req = self._request_webpage(
+            redirect_req, video_id,
+            note='Resolving final URL', errnote='Could not resolve final URL')
+        video_url = req.geturl()
+        if video_url.endswith('.html'):
+            raise ExtractorError('Redirect failed')

-        results = [{
-                    'id': video_id,
-                    'url' : video_url,
-                    'title' : video_title,
-                    'thumbnail' : self._og_search_thumbnail(webpage_src),
-                    'ext' : 'mp3',
-                    }]
-        return results
+        video_title = self._og_search_title(webpage_src).strip()
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': self._og_search_thumbnail(webpage_src),
+        }
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -68,22 +68,15 @@ class ImdbListIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        list_id = mobj.group('id')

-        # RSS XML is sometimes malformed
-        rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, 'Downloading list RSS')
-        list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, 'list title')
+        webpage = self._download_webpage(url, list_id)
+        list_code = self._search_regex(
+            r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
+            webpage, 'list code')
+        entries = [
+            self.url_result('http://www.imdb.com' + m, 'Imdb')
+            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]

-        # Export is independent of actual author_id, but returns 404 if no author_id is provided.
-        # However, passing dummy author_id seems to be enough.
-        csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
-                                     list_id, 'Downloading list CSV')
-        
-        entries = []
-        for item in csv.split('\n')[1:]:
-            cols = item.split(',')
-            if len(cols) < 2:
-                continue
-            item_id = cols[1][1:-1]
-            if item_id.startswith('vi'):
-                entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
+        list_title = self._html_search_regex(
+            r'<h1 class="header">(.*?)</h1>', webpage, 'list title')

        return self.playlist_result(entries, list_id, list_title)
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -1,21 +1,24 @@
+from __future__ import unicode_literals
+
 import re
 import hashlib

 from .common import InfoExtractor
-from ..utils import determine_ext

 _md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()

+
 class KankanIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
    
    _TEST = {
-        u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
-        u'file': u'48863.flv',
-        u'md5': u'29aca1e47ae68fc28804aca89f29507e',
-        u'info_dict': {
-            u'title': u'Ready To Go',
+        'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
+        'file': '48863.flv',
+        'md5': '29aca1e47ae68fc28804aca89f29507e',
+        'info_dict': {
+            'title': 'Ready To Go',
        },
+        'skip': 'Only available from China',
    }

    def _real_extract(self, url):
@@ -23,22 +26,23 @@ class KankanIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

-        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
        gcid = gcids[-1]

-        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
-                                                 video_id, u'Downloading video url info')
-        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
-        path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
-        param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
-        param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+        info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
+        video_info_page = self._download_webpage(
+            info_url, video_id, 'Downloading video url info')
+        ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
+        path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
+        param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
+        param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
        key = _md5('xl_mp43651' + param1 + param2)
        video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)

-        return {'id': video_id,
-                'title': title,
-                'url': video_url,
-                'ext': determine_ext(video_url),
-                }
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -4,7 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    fix_xml_all_ampersand,
+    fix_xml_ampersands,
 )


@@ -27,7 +27,7 @@ class MetacriticIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        # The xml is not well formatted, there are raw '&'
        info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
-            video_id, 'Downloading info xml', transform_source=fix_xml_all_ampersand)
+            video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)

        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
        formats = []
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,4 +1,5 @@
-import json
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -10,17 +11,17 @@ from ..utils import (

 class MixcloudIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
-    IE_NAME = u'mixcloud'
+    IE_NAME = 'mixcloud'

    _TEST = {
-        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
-        u'file': u'dholbach-cryptkeeper.mp3',
-        u'info_dict': {
-            u'title': u'Cryptkeeper',
-            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
-            u'uploader': u'Daniel Holbach',
-            u'uploader_id': u'dholbach',
-            u'upload_date': u'20111115',
+        'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+        'file': 'dholbach-cryptkeeper.mp3',
+        'info_dict': {
+            'title': 'Cryptkeeper',
+            'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+            'uploader': 'Daniel Holbach',
+            'uploader_id': 'dholbach',
+            'upload_date': '20111115',
        },
    }

@@ -42,17 +43,18 @@ class MixcloudIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-
        uploader = mobj.group(1)
        cloudcast_name = mobj.group(2)
        track_id = '-'.join((uploader, cloudcast_name))
-        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
-        webpage = self._download_webpage(url, track_id)
-        json_data = self._download_webpage(api_url, track_id,
-            u'Downloading cloudcast info')
-        info = json.loads(json_data)

-        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+        webpage = self._download_webpage(url, track_id)
+
+        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+        info = self._download_json(
+            api_url, track_id, 'Downloading cloudcast info')
+
+        preview_url = self._search_regex(
+            r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
        song_url = preview_url.replace('/previews/', '/c/originals/')
        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
        final_song_url = self._get_url(template_url)
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -34,7 +34,7 @@ class MporaIE(InfoExtractor):

        data = json.loads(data_json)

-        uploader = data['info_overlay']['name']
+        uploader = data['info_overlay'].get('username')
        duration = data['video']['duration'] // 1000
        thumbnail = data['video']['encodings']['sd']['poster']
        title = data['info_overlay']['title']
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -1,12 +1,18 @@
+from __future__ import unicode_literals
+
 import re
-import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    ExtractorError,
+    find_xpath_attr,
+    fix_xml_ampersands,
+    url_basename,
+    RegexNotFoundError,
 )

+
 def _media_xml_tag(tag):
    return '{http://search.yahoo.com/mrss/}%s' % tag

@@ -33,10 +39,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
        else:
            return thumb_node.attrib['url']

-    def _extract_video_formats(self, metadataXml):
-        if '/error_country_block.swf' in metadataXml:
-            raise ExtractorError(u'This video is not available from your country.', expected=True)
-        mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
+    def _extract_video_formats(self, mdoc):
+        if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
+            raise ExtractorError('This video is not available from your country.', expected=True)

        formats = []
        for rendition in mdoc.findall('.//rendition'):
@@ -59,11 +64,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
        self.report_extraction(video_id)
        mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
        # Remove the templates, like &device={device}
-        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
+        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
        if 'acceptMethods' not in mediagen_url:
            mediagen_url += '&acceptMethods=fms'
-        mediagen_page = self._download_webpage(mediagen_url, video_id,
-                                               u'Downloading video urls')
+
+        mediagen_doc = self._download_xml(mediagen_url, video_id,
+            'Downloading video urls')

        description_node = itemdoc.find('description')
        if description_node is not None:
@@ -71,9 +77,23 @@ class MTVServicesInfoExtractor(InfoExtractor):
        else:
            description = None

+        title_el = None
+        if title_el is None:
+            title_el = find_xpath_attr(
+                itemdoc, './/{http://search.yahoo.com/mrss/}category',
+                'scheme', 'urn:mtvn:video_title')
+        if title_el is None:
+            title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
+        if title_el is None:
+            title_el = itemdoc.find('.//title')
+        title = title_el.text
+        if title is None:
+            raise ExtractorError('Could not find video title')
+        title = title.strip()
+
        return {
-            'title': itemdoc.find('title').text,
-            'formats': self._extract_video_formats(mediagen_page),
+            'title': title,
+            'formats': self._extract_video_formats(mediagen_doc),
            'id': video_id,
            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
            'description': description,
@@ -83,14 +103,25 @@ class MTVServicesInfoExtractor(InfoExtractor):
        video_id = self._id_from_uri(uri)
        data = compat_urllib_parse.urlencode({'uri': uri})

-        def fix_ampersand(s):
-            """ Fix unencoded ampersand in XML """
-            return s.replace(u'& ', '&amp; ')
        idoc = self._download_xml(
            self._FEED_URL + '?' + data, video_id,
-            u'Downloading info', transform_source=fix_ampersand)
+            'Downloading info', transform_source=fix_xml_ampersands)
        return [self._get_video_info(item) for item in idoc.findall('.//item')]

+    def _real_extract(self, url):
+        title = url_basename(url)
+        webpage = self._download_webpage(url, title)
+        try:
+            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
+            # or http://media.mtvnservices.com/{mgid}
+            og_url = self._og_search_video_url(webpage)
+            mgid = url_basename(og_url)
+            if mgid.endswith('.swf'):
+                mgid = mgid[:-4]
+        except RegexNotFoundError:
+            mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
+        return self._get_videos_info(mgid)
+

 class MTVIE(MTVServicesInfoExtractor):
    _VALID_URL = r'''(?x)^https?://
@@ -101,25 +132,25 @@ class MTVIE(MTVServicesInfoExtractor):

    _TESTS = [
        {
-            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            u'file': u'853555.mp4',
-            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
-            u'info_dict': {
-                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
-                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+            'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+            'file': '853555.mp4',
+            'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
+            'info_dict': {
+                'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
+                'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
            },
        },
        {
-            u'add_ie': ['Vevo'],
-            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            u'file': u'USCJY1331283.mp4',
-            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
-            u'info_dict': {
-                u'title': u'Everything Has Changed',
-                u'upload_date': u'20130606',
-                u'uploader': u'Taylor Swift',
+            'add_ie': ['Vevo'],
+            'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
+            'file': 'USCJY1331283.mp4',
+            'md5': '73b4e7fcadd88929292fe52c3ced8caf',
+            'info_dict': {
+                'title': 'Everything Has Changed',
+                'upload_date': '20130606',
+                'uploader': 'Taylor Swift',
            },
-            u'skip': u'VEVO is only available in some countries',
+            'skip': 'VEVO is only available in some countries',
        },
    ]

@@ -138,8 +169,22 @@ class MTVIE(MTVServicesInfoExtractor):
                               webpage, re.DOTALL)
            if m_vevo:
                vevo_id = m_vevo.group(1);
-                self.to_screen(u'Vevo video detected: %s' % vevo_id)
+                self.to_screen('Vevo video detected: %s' % vevo_id)
                return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
    
-            uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
+            uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
        return self._get_videos_info(uri)
+
+
+class MTVIggyIE(MTVServicesInfoExtractor):
+    IE_NAME = 'mtviggy.com'
+    _VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
+    _TEST = {
+        'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
+        'info_dict': {
+            'id': '984696',
+            'ext': 'mp4',
+            'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
+        }
+    }
+    _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dl/extractor/myspace.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json

@@ -8,41 +10,75 @@ from ..utils import (


 class MySpaceIE(InfoExtractor):
-    _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'

-    _TEST = {
-        u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
-        u'info_dict': {
-            u'id': u'100008689',
-            u'ext': u'flv',
-            u'title': u'Viva La Vida',
-            u'description': u'The official Viva La Vida video, directed by Hype Williams',
-            u'uploader': u'Coldplay',
-            u'uploader_id': u'coldplay',
+    _TESTS = [
+        {
+            'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
+            'info_dict': {
+                'id': '100008689',
+                'ext': 'flv',
+                'title': 'Viva La Vida',
+                'description': 'The official Viva La Vida video, directed by Hype Williams',
+                'uploader': 'Coldplay',
+                'uploader_id': 'coldplay',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
        },
-        u'params': {
-            # rtmp download
-            u'skip_download': True,
+        # song
+        {
+            'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
+            'info_dict': {
+                'id': '39008454',
+                'ext': 'flv',
+                'title': 'Darkness In My Heart',
+                'uploader_id': 'spiderbags',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
        },
-    }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
-        context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
-            u'context'))
-        video = context['video']
-        rtmp_url, play_path = video['streamUrl'].split(';', 1)

-        return {
-            'id': compat_str(video['mediaId']),
-            'title': video['title'],
+        if mobj.group('mediatype').startswith('music/song'):
+            # songs don't store any useful info in the 'context' variable
+            def search_data(name):
+                return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
+                    name)
+            streamUrl = search_data('stream-url')
+            info = {
+                'id': video_id,
+                'title': self._og_search_title(webpage),
+                'uploader_id': search_data('artist-username'),
+                'thumbnail': self._og_search_thumbnail(webpage),
+            }
+        else:
+            context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
+                u'context'))
+            video = context['video']
+            streamUrl = video['streamUrl']
+            info = {
+                'id': compat_str(video['mediaId']),
+                'title': video['title'],
+                'description': video['description'],
+                'thumbnail': video['imageUrl'],
+                'uploader': video['artistName'],
+                'uploader_id': video['artistUsername'],
+            }
+
+        rtmp_url, play_path = streamUrl.split(';', 1)
+        info.update({
            'url': rtmp_url,
            'play_path': play_path,
            'ext': 'flv',
-            'description': video['description'],
-            'thumbnail': video['imageUrl'],
-            'uploader': video['artistName'],
-            'uploader_id': video['artistUsername'],
-        }
+        })
+        return info
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -19,7 +19,8 @@ class NovamovIE(InfoExtractor):
        'info_dict': {
            'title': 'search engine optimization',
            'description': 'search engine optimization is used to rank the web page in the google search engine'
-        }
+        },
+        'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
    }

    def _real_extract(self, url):
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -6,14 +8,14 @@ from .common import InfoExtractor
 class RedTubeIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
    _TEST = {
-        u'url': u'http://www.redtube.com/66418',
-        u'file': u'66418.mp4',
+        'url': 'http://www.redtube.com/66418',
+        'file': '66418.mp4',
        # md5 varies from time to time, as in
        # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
-        #u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
-        u'info_dict': {
-            u"title": u"Sucked on a toilet",
-            u"age_limit": 18,
+        #'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
+        'info_dict': {
+            "title": "Sucked on a toilet",
+            "age_limit": 18,
        }
    }

@@ -33,14 +35,19 @@ class RedTubeIE(InfoExtractor):
            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
            webpage, u'title')

+        video_thumbnail = self._html_search_regex(
+            r'playerInnerHTML.+?<img\s+src="(.+?)"',
+            webpage, u'thumbnail', fatal=False)
+
        # No self-labeling, but they describe themselves as
        # "Home of Videos Porno"
        age_limit = 18

        return {
-            'id':        video_id,
-            'url':       video_url,
-            'ext':       video_extension,
-            'title':     video_title,
+            'id': video_id,
+            'url': video_url,
+            'ext': video_extension,
+            'title': video_title,
+            'thumbnail': video_thumbnail,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/ringtv.py
+++ b/youtube_dl/extractor/ringtv.py
@@ -1,37 +1,44 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor


 class RingTVIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
    _TEST = {
-        u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown",
-        u"file": u"746619.mp4",
-        u"md5": u"7c46b4057d22de32e0a539f017e64ad3",
-        u"info_dict": {
-            u"title": u"Canelo Alvarez talks about Mayweather showdown",
-            u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king."
+        "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
+        "file": "857645.mp4",
+        "md5": "d25945f5df41cdca2d2587165ac28720",
+        "info_dict": {
+            "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
+            "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1).split('-')[0]
+        video_id = mobj.group('id').split('-')[0]
        webpage = self._download_webpage(url, video_id)
-        title = self._search_regex(r'<title>(.+?)</title>',
-        		webpage, 'video title').replace(' | RingTV','')
-        description = self._search_regex(r'<div class="blurb">(.+?)</div>',
-        		webpage, 'Description')
-        final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id))
-        thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id))
-        ext = final_url.split('.')[-1]
-        return [{
-            'id'          : video_id,
-            'url'         : final_url,
-            'ext'         : ext,
-            'title'       : title,
-            'thumbnail'   : thumbnail_url,
-            'description' : description,
-        }]
+
+        if mobj.group('type') == 'news':
+            video_id = self._search_regex(
+                r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/
+                        embed_iframe/[0-9]+/video/([0-9]+)/''',
+                webpage, 'real video ID')
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(
+            r'addthis:description="([^"]+)"',
+            webpage, 'description', fatal=False)
+        final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" % video_id
+        thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" % video_id
+
+        return {
+            'id': video_id,
+            'url': final_url,
+            'title': title,
+            'thumbnail': thumbnail_url,
+            'description': description,
+        }

--- a/youtube_dl/extractor/servingsys.py
+++ b/youtube_dl/extractor/servingsys.py
@@ -0,0 +1,71 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+)
+
+
+class ServingSysIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
+        'playlist': [{
+            'file': '29955898.flv',
+            'md5': 'baed851342df6846eb8677a60a011a0f',
+            'info_dict': {
+                'title': 'AdAPPter_Hyundai_demo (1)',
+                'duration': 74,
+                'tbr': 1378,
+                'width': 640,
+                'height': 400,
+            },
+        }, {
+            'file': '29907998.flv',
+            'md5': '979b4da2655c4bc2d81aeb915a8c5014',
+            'info_dict': {
+                'title': 'AdAPPter_Hyundai_demo (2)',
+                'duration': 34,
+                'width': 854,
+                'height': 480,
+                'tbr': 516,
+            },
+        }],
+        'params': {
+            'playlistend': 2,
+        },
+        'skip': 'Blocked in the US [sic]',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        pl_id = mobj.group('id')
+
+        vast_doc = self._download_xml(url, pl_id)
+        title = vast_doc.find('.//AdTitle').text
+        media = vast_doc.find('.//MediaFile').text
+        info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
+
+        doc = self._download_xml(info_url, pl_id, 'Downloading video info')
+        entries = [{
+            '_type': 'video',
+            'id': a.attrib['id'],
+            'title': '%s (%s)' % (title, a.attrib['assetID']),
+            'url': a.attrib['URL'],
+            'duration': int_or_none(a.attrib.get('length')),
+            'tbr': int_or_none(a.attrib.get('bitrate')),
+            'height': int_or_none(a.attrib.get('height')),
+            'width': int_or_none(a.attrib.get('width')),
+        } for a in doc.findall('.//AdditionalAssets/asset')]
+
+        return {
+            '_type': 'playlist',
+            'id': pl_id,
+            'title': title,
+            'entries': entries,
+        }
+
+ 
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import json
 import re
 import itertools
@@ -32,58 +34,58 @@ class SoundcloudIE(InfoExtractor):
                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                    )
                    '''
-    IE_NAME = u'soundcloud'
+    IE_NAME = 'soundcloud'
    _TESTS = [
        {
-            u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
-            u'file': u'62986583.mp3',
-            u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
-            u'info_dict': {
-                u"upload_date": u"20121011", 
-                u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", 
-                u"uploader": u"E.T. ExTerrestrial Music", 
-                u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+            'file': '62986583.mp3',
+            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+            'info_dict': {
+                "upload_date": "20121011",
+                "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+                "uploader": "E.T. ExTerrestrial Music",
+                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
            }
        },
        # not streamable song
        {
-            u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
-            u'info_dict': {
-                u'id': u'47127627',
-                u'ext': u'mp3',
-                u'title': u'Goldrushed',
-                u'uploader': u'The Royal Concept',
-                u'upload_date': u'20120521',
+            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+            'info_dict': {
+                'id': '47127627',
+                'ext': 'mp3',
+                'title': 'Goldrushed',
+                'uploader': 'The Royal Concept',
+                'upload_date': '20120521',
            },
-            u'params': {
+            'params': {
                # rtmp
-                u'skip_download': True,
+                'skip_download': True,
            },
        },
        # private link
        {
-            u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
-            u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604',
-            u'info_dict': {
-                u'id': u'123998367',
-                u'ext': u'mp3',
-                u'title': u'Youtube - Dl Test Video \'\' Ä↭',
-                u'uploader': u'jaimeMF',
-                u'description': u'test chars:  \"\'/\\ä↭',
-                u'upload_date': u'20131209',
+            'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
+            'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+            'info_dict': {
+                'id': '123998367',
+                'ext': 'mp3',
+                'title': 'Youtube - Dl Test Video \'\' Ä↭',
+                'uploader': 'jaimeMF',
+                'description': 'test chars:  \"\'/\\ä↭',
+                'upload_date': '20131209',
            },
        },
        # downloadable song
        {
-            u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
-            u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
-            u'info_dict': {
-                u'id': u'105614606',
-                u'ext': u'wav',
-                u'title': u'Just Your Problem Baby (Acapella)',
-                u'description': u'Vocals',
-                u'uploader': u'Sim Gretina',
-                u'upload_date': u'20130815',
+            'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
+            'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+            'info_dict': {
+                'id': '105614606',
+                'ext': 'wav',
+                'title': 'Just Your Problem Baby (Acapella)',
+                'description': 'Vocals',
+                'uploader': 'Sim Gretina',
+                'upload_date': '20130815',
            },
        },
    ]
@@ -112,7 +114,7 @@ class SoundcloudIE(InfoExtractor):
        thumbnail = info['artwork_url']
        if thumbnail is not None:
            thumbnail = thumbnail.replace('-large', '-t500x500')
-        ext = u'mp3'
+        ext = 'mp3'
        result = {
            'id': track_id,
            'uploader': info['user']['username'],
@@ -124,11 +126,11 @@ class SoundcloudIE(InfoExtractor):
        if info.get('downloadable', False):
            # We can build a direct link to the song
            format_url = (
-                u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
+                'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
                    track_id, self._CLIENT_ID))
            result['formats'] = [{
                'format_id': 'download',
-                'ext': info.get('original_format', u'mp3'),
+                'ext': info.get('original_format', 'mp3'),
                'url': format_url,
                'vcodec': 'none',
            }]
@@ -138,7 +140,7 @@ class SoundcloudIE(InfoExtractor):
                'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
            stream_json = self._download_webpage(
                streams_url,
-                track_id, u'Downloading track url')
+                track_id, 'Downloading track url')

            formats = []
            format_dict = json.loads(stream_json)
@@ -165,20 +167,19 @@ class SoundcloudIE(InfoExtractor):
                # We fallback to the stream_url in the original info, this
                # cannot be always used, sometimes it can give an HTTP 404 error
                formats.append({
-                    'format_id': u'fallback',
+                    'format_id': 'fallback',
                    'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
                    'ext': ext,
                    'vcodec': 'none',
                })

-            def format_pref(f):
+            for f in formats:
                if f['format_id'].startswith('http'):
-                    return 2
+                    f['protocol'] = 'http'
                if f['format_id'].startswith('rtmp'):
-                    return 1
-                return 0
+                    f['protocol'] = 'rtmp'

-            formats.sort(key=format_pref)
+            self._sort_formats(formats)
            result['formats'] = formats

        return result
@@ -210,14 +211,14 @@ class SoundcloudIE(InfoExtractor):
    
            url = 'http://soundcloud.com/%s' % resolve_title
            info_json_url = self._resolv_url(url)
-        info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
+        info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')

        info = json.loads(info_json)
        return self._extract_info_dict(info, full_title, secret_token=token)

 class SoundcloudSetIE(SoundcloudIE):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
-    IE_NAME = u'soundcloud:set'
+    IE_NAME = 'soundcloud:set'
    # it's in tests/test_playlists.py
    _TESTS = []

@@ -254,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):

 class SoundcloudUserIE(SoundcloudIE):
    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
-    IE_NAME = u'soundcloud:user'
+    IE_NAME = 'soundcloud:user'

    # it's in tests/test_playlists.py
    _TESTS = []
@@ -266,7 +267,7 @@ class SoundcloudUserIE(SoundcloudIE):
        url = 'http://soundcloud.com/%s/' % uploader
        resolv_url = self._resolv_url(url)
        user_json = self._download_webpage(resolv_url, uploader,
-            u'Downloading user info')
+            'Downloading user info')
        user = json.loads(user_json)

        tracks = []
@@ -276,7 +277,7 @@ class SoundcloudUserIE(SoundcloudIE):
                                                  })
            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
            response = self._download_webpage(tracks_url, uploader, 
-                u'Downloading tracks page %s' % (i+1))
+                'Downloading tracks page %s' % (i+1))
            new_tracks = json.loads(response)
            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
            if len(new_tracks) < 50:
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -18,14 +18,6 @@ class SouthParkStudiosIE(MTVServicesInfoExtractor):
        },
    }]

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        url = u'http://www.' + mobj.group(u'url')
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
-                                  webpage, u'mgid')
-        return self._get_videos_info(mgid)

 class SouthparkDeIE(SouthParkStudiosIE):
    IE_NAME = u'southpark.de'
--- a/youtube_dl/extractor/space.py
+++ b/youtube_dl/extractor/space.py
@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError


 class SpaceIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
    _TEST = {
        u'add_ie': ['Brightcove'],
        u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re

@@ -11,17 +13,18 @@ from ..aes import (
    aes_decrypt_text
 )

+
 class SpankwireIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
    _TEST = {
-        u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        u'file': u'103545.mp4',
-        u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
-        u'info_dict': {
-            u"uploader": u"oreusz", 
-            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
-            u"description": u"Crazy Bitch X rated music video.",
-            u"age_limit": 18,
+        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+        'file': '103545.mp4',
+        'md5': '1b3f55e345500552dbc252a3e9c1af43',
+        'info_dict': {
+            "uploader": "oreusz",
+            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            "description": "Crazy Bitch X rated music video.",
+            "age_limit": 18,
        }
    }

@@ -34,17 +37,17 @@ class SpankwireIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
-            r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(
-            r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
-            r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
+            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)

        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
-            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+            password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))

        formats = []
@@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor):
            path = compat_urllib_parse_urlparse(video_url).path
            extension = os.path.splitext(path)[1][1:]
            format = path.split('/')[4].split('_')[:2]
+            resolution, bitrate_str = format
            format = "-".join(format)
+            height = int(resolution.rstrip('P'))
+            tbr = int(bitrate_str.rstrip('K'))
+
            formats.append({
                'url': video_url,
                'ext': extension,
+                'resolution': resolution,
                'format': format,
+                'tbr': tbr,
+                'height': height,
                'format_id': format,
            })
-        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+        self._sort_formats(formats)

        age_limit = self._rta_search(webpage)

--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -0,0 +1,19 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class SpikeIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
+    _TEST = {
+        'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
+        'md5': '1a9265f32b0c375793d6c4ce45255256',
+        'info_dict': {
+            'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
+            'ext': 'mp4',
+            'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
+            'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
+        },
+    }
+
+    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -9,61 +11,66 @@ from ..utils import (
 class TeamcocoIE(InfoExtractor):
    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
    _TEST = {
-        u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
-        u'file': u'19705.mp4',
-        u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
-        u'info_dict': {
-            u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
-            u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
+        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+        'file': '19705.mp4',
+        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+        'info_dict': {
+            "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+            "title": "Louis C.K. Interview Pt. 1 11/3/11"
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
        url_title = mobj.group('url_title')
        webpage = self._download_webpage(url, url_title)

-        video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
-            webpage, u'video id')
+        video_id = self._html_search_regex(
+            r'<article class="video" data-id="(\d+?)"',
+            webpage, 'video id')

        self.report_extraction(video_id)

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_xml(data_url, video_id, 'Downloading data webpage')

-
        qualities = ['500k', '480p', '1000k', '720p', '1080p']
        formats = []
-        for file in data.findall('files/file'):
-            if file.attrib.get('playmode') == 'all':
+        for filed in data.findall('files/file'):
+            if filed.attrib.get('playmode') == 'all':
                # it just duplicates one of the entries
                break
-            file_url = file.text
+            file_url = filed.text
            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
            if m_format is not None:
                format_id = m_format.group(1)
            else:
-                format_id = file.attrib['bitrate']
+                format_id = filed.attrib['bitrate']
+            tbr = (
+                int(filed.attrib['bitrate'])
+                if filed.attrib['bitrate'].isdigit()
+                else None)
+
+            try:
+                quality = qualities.index(format_id)
+            except ValueError:
+                quality = -1
            formats.append({
                'url': file_url,
                'ext': 'mp4',
+                'tbr': tbr,
                'format_id': format_id,
+                'quality': quality,
            })
-        def sort_key(f):
-            try:
-                return qualities.index(f['format_id'])
-            except ValueError:
-                return -1
-        formats.sort(key=sort_key)
-        if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+
+        self._sort_formats(formats)

        return {
-            'id':          video_id,
+            'id': video_id,
            'formats': formats,
-            'title':       self._og_search_title(webpage),
-            'thumbnail':   self._og_search_thumbnail(webpage),
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
        }
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import json
 import re

@@ -7,6 +9,7 @@ from ..utils import (
    RegexNotFoundError,
 )

+
 class TEDIE(SubtitlesInfoExtractor):
    _VALID_URL=r'''http://www\.ted\.com/
                   (
@@ -18,12 +21,12 @@ class TEDIE(SubtitlesInfoExtractor):
                   /(?P<name>\w+) # Here goes the name and then ".html"
                   '''
    _TEST = {
-        u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        u'file': u'102.mp4',
-        u'md5': u'2d76ee1576672e0bd8f187513267adf6',
-        u'info_dict': {
-            u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922", 
-            u"title": u"Dan Dennett: The illusion of consciousness"
+        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
+        'file': '102.mp4',
+        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+        'info_dict': {
+            "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
+            "title": "Dan Dennett: The illusion of consciousness"
        }
    }

@@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor):
        '''Returns the videos of the playlist'''

        webpage = self._download_webpage(
-            url, playlist_id, u'Downloading playlist webpage')
+            url, playlist_id, 'Downloading playlist webpage')
        matches = re.finditer(
            r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
            webpage)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json

@@ -10,19 +12,27 @@ from ..utils import (


 class VKIE(InfoExtractor):
-    IE_NAME = u'vk.com'
+    IE_NAME = 'vk.com'
    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'

-    _TEST = {
-        u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
-        u'md5': u'0deae91935c54e00003c2a00646315f0',
-        u'info_dict': {
-            u'id': u'162222515',
-            u'ext': u'flv',
-            u'title': u'ProtivoGunz - Хуёвая песня',
-            u'uploader': u'Noize MC',
+    _TESTS = [{
+        'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+        'file': '162222515.flv',
+        'md5': '0deae91935c54e00003c2a00646315f0',
+        'info_dict': {
+            'title': 'ProtivoGunz - Хуёвая песня',
+            'uploader': 'Noize MC',
        },
-    }
+    },
+    {
+        'url': 'http://vk.com/video4643923_163339118',
+        'file': '163339118.mp4',
+        'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
+        'info_dict': {
+            'uploader': 'Elvira Dzhonik',
+            'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -33,13 +43,21 @@ class VKIE(InfoExtractor):
        if m_yt is not None:
            self.to_screen(u'Youtube video detected')
            return self.url_result(m_yt.group(1), 'Youtube')
-        vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
-        vars = json.loads(vars_json)
+        data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
+        data = json.loads(data_json)
+
+        formats = [{
+            'format_id': k,
+            'url': v,
+            'width': int(k[len('url'):]),
+        } for k, v in data.items()
+            if k.startswith('url')]
+        self._sort_formats(formats)

        return {
-            'id': compat_str(vars['vid']),
-            'url': vars['url240'],
-            'title': unescapeHTML(vars['md_title']),
-            'thumbnail': vars['jpg'],
-            'uploader': vars['md_author'],
+            'id': compat_str(data['vid']),
+            'formats': formats,
+            'title': unescapeHTML(data['md_title']),
+            'thumbnail': data.get('jpg'),
+            'uploader': data.get('md_author'),
        }
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -1,5 +1,6 @@
+from __future__ import unicode_literals
+
 import re
-import base64

 from .common import InfoExtractor

@@ -7,12 +8,12 @@ from .common import InfoExtractor
 class WimpIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
    _TEST = {
-        u'url': u'http://www.wimp.com/deerfence/',
-        u'file': u'deerfence.flv',
-        u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
-        u'info_dict': {
-            u"title": u"Watch Till End: Herd of deer jump over a fence.",
-            u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
+        'url': 'http://www.wimp.com/deerfence/',
+        'file': 'deerfence.flv',
+        'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
+        'info_dict': {
+            "title": "Watch Till End: Herd of deer jump over a fence.",
+            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
        }
    }

@@ -20,13 +21,12 @@ class WimpIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
-        googleString = base64.b64decode(googleString).decode('ascii')
-        final_url = self._search_regex('","(.*?)"', googleString, u'final video url')
+        video_url = self._search_regex(
+            r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')

        return {
            'id': video_id,
-            'url': final_url,
+            'url': video_url,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -27,6 +27,7 @@ from ..utils import (
    get_element_by_id,
    get_element_by_attribute,
    ExtractorError,
+    RegexNotFoundError,
    unescapeHTML,
    unified_strdate,
    orderedSet,
@@ -1448,7 +1449,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                break

-        playlist_title = self._og_search_title(page)
+        try:
+            playlist_title = self._og_search_title(page)
+        except RegexNotFoundError:
+            self.report_warning(
+                u'Playlist page is missing OpenGraph title, falling back ...',
+                playlist_id)
+            playlist_title = self._html_search_regex(
+                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')

        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -479,6 +479,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
    def run(self, info):
        filename = info['filepath']
        args = ['-c', 'copy']
+        self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
        return True, info

--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -224,7 +224,7 @@ if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else:
@@ -1092,9 +1092,12 @@ def month_by_name(name):
        return None


-def fix_xml_all_ampersand(xml_str):
+def fix_xml_ampersands(xml_str):
    """Replace all the '&' by '&amp;' in XML"""
-    return xml_str.replace(u'&', u'&amp;')
+    return re.sub(
+        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+        u'&amp;',
+        xml_str)


 def setproctitle(title):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.01.17.1'
+__version__ = '2014.01.22.2'
Author	SHA1	Message	Date
Philipp Hagemeister	a70c83768e	release 2014.01.22.2	2014-01-22 14:33:16 +01:00
Philipp Hagemeister	04b4d394d9	Add new --default-search option (#2193 )	2014-01-22 14:16:43 +01:00
Jaime Marquínez Ferrándiz	130f12985a	[comedycentral] Use the generic `_real_extract` provided by the base class	2014-01-22 11:44:26 +01:00
Jaime Marquínez Ferrándiz	4ca5d43cd8	Merge pull request #2195 from dstftw/master [space] Add support for mobile URLs	2014-01-22 02:39:17 -08:00
Jaime Marquínez Ferrándiz	4bbf139aa7	[southparkstudios] Use the generic `_real_extract` provided by the base class	2014-01-22 11:35:17 +01:00
dst	47739636a9	[space] Add support for mobile URLs	2014-01-22 17:25:32 +07:00
Jaime Marquínez Ferrándiz	407ae733ab	[cspan] Make ‘www’ optional and improve the regex for extracting the id (fixes #2194 )	2014-01-22 11:06:03 +01:00
Jaime Marquínez Ferrándiz	c39f7013e1	[gametrailers] Use the generic `_real_extract` provided by the base class	2014-01-22 10:51:17 +01:00
Philipp Hagemeister	a4a028323e	[comedycentral] Use unicode_literals	2014-01-22 03:50:49 +01:00
Philipp Hagemeister	780ee4e501	[comedycentral] Adapt testcase In contrast to other sites, ComedyCentral seems to understand how to sensibly use MTV IE, but the additional text shouldn't hurt.	2014-01-22 03:49:17 +01:00
Philipp Hagemeister	d7b51547c0	[imdb:list] Switch to loading the webpage The RSS method seems to be defunct.	2014-01-22 03:41:25 +01:00
Philipp Hagemeister	43030f36db	[d8] typo	2014-01-22 03:10:31 +01:00
Philipp Hagemeister	48c63f1653	[d8] disable test; video got deleted	2014-01-22 03:09:21 +01:00
Philipp Hagemeister	90f479b6d5	[novamov] Skip tests	2014-01-22 03:04:10 +01:00
Philipp Hagemeister	6fd2957163	release 2014.01.22.1	2014-01-22 02:17:00 +01:00
Philipp Hagemeister	d3a1c71917	[ringtv] Fix and add news extraction	2014-01-22 02:16:40 +01:00
Philipp Hagemeister	af1588c05f	[mtv] Update tests and xpath function for new title extraction	2014-01-22 02:04:51 +01:00
Philipp Hagemeister	2250865fb0	[Wimp] Use new URL relay method	2014-01-22 02:01:39 +01:00
Philipp Hagemeister	99f770caa8	[hotnewhiphop] Retrieve media key	2014-01-22 01:55:50 +01:00
Philipp Hagemeister	00122de6a9	[gametrailers/mtv] Fix pre-3.x compatibility function for find_xpath_attr Fixes #2189	2014-01-22 01:04:12 +01:00
Philipp Hagemeister	a70515c0fd	[servingsys] Do not run test on travis Apparantly, even the advertisers do geoblocking now!? From the US, this isn't outright blocked, but there are no videos returned.	2014-01-22 00:27:18 +01:00
Philipp Hagemeister	398edd0689	release 2014.01.22	2014-01-22 00:21:41 +01:00
Philipp Hagemeister	6562df768d	Merge branch 'master' of github.com:rg3/youtube-dl Conflicts: youtube_dl/extractor/mtv.py	2014-01-22 00:21:27 +01:00
Philipp Hagemeister	06769acd71	[gametrailers] Use unicode_literals Conflicts: youtube_dl/extractor/gametrailers.py	2014-01-22 00:18:52 +01:00
Philipp Hagemeister	32dac6943d	[mtv] Use unicode_literals	2014-01-22 00:18:09 +01:00
Philipp Hagemeister	90834c78fe	[mtv] Fix title for gametrailers (Fixes #2188 ) We now prefer the title including the category, because that title is what is presented at the actual sites.	2014-01-22 00:17:33 +01:00
Jaime Marquínez Ferrándiz	47917f24c4	[brightcove] Fix extraction of embedded videos There was a leading ‘:’ in the regex. The ‘flashvars’ parameter is not always available.	2014-01-21 22:04:46 +01:00
Jaime Marquínez Ferrándiz	d614aa40e3	[brightcove] Fix check for url in the result It may have the ‘formats’ field instead of ‘url’.	2014-01-21 21:53:10 +01:00
Jaime Marquínez Ferrándiz	bc4ba05fcb	[mtv] Add an extractor for mtviggy.com (#2072 )	2014-01-21 20:59:31 +01:00
Jaime Marquínez Ferrándiz	8d9453b9e8	Add an extractor for spike.com (#2072 ) Added a generic _real_extract to MTVServicesInfoExtractor	2014-01-21 20:54:47 +01:00
Jaime Marquínez Ferrándiz	e4f320a4d0	[mtv] Check for geo-blocked videos in the xml document, not in the xml’s string Allows to use the `_download_xml` method	2014-01-21 19:59:02 +01:00
Jaime Marquínez Ferrándiz	ef9f2ba7af	[mtv] Use unicode_literals	2014-01-21 19:58:21 +01:00
Philipp Hagemeister	4a3b72771f	release 2014.01.21.1	2014-01-21 18:21:53 +01:00
Philipp Hagemeister	913f32929b	[vk] Add support for HQ videos (Fixes #2187 )	2014-01-21 18:21:44 +01:00
Philipp Hagemeister	9834872bf6	[facebook] Add support for embeds Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html	2014-01-21 18:10:17 +01:00
Philipp Hagemeister	94a23d2a1e	[vk] Use unicode_literals	2014-01-21 17:32:03 +01:00
Philipp Hagemeister	608bf69880	[vk] avoid built-in names	2014-01-21 17:29:04 +01:00
Philipp Hagemeister	032b3df5af	[redtube] Use unicode_literals	2014-01-21 14:16:44 +01:00
Mike Col	9d11a41fe4	[redtube] Add support for thumbnails Signed-off-by: Philipp Hagemeister <phihag@phihag.de>	2014-01-21 14:14:55 +01:00
Philipp Hagemeister	2989501131	release 2014.01.21	2014-01-21 14:07:41 +01:00
Philipp Hagemeister	7b0817e8e1	[servingsys] Add support This also adds support for brightcove advertisements. Fixes #2181	2014-01-21 02:09:51 +01:00
Philipp Hagemeister	9d4288b2d4	[extractor/common] Clarify when and when not we generate the filename	2014-01-21 01:41:13 +01:00
Philipp Hagemeister	3486df383b	[generic] Improve testcase	2014-01-21 01:40:34 +01:00
Philipp Hagemeister	b60016e831	Deal with implicitly UTF-16 decoded webpages These webpages don't specify an encoding and rely on the BOM	2014-01-21 01:39:40 +01:00
Philipp Hagemeister	5aafe895fc	Correct XML ampersand fixup	2014-01-20 22:11:34 +01:00
Philipp Hagemeister	b853d2e155	release 2014.01.20	2014-01-20 11:44:37 +01:00
Philipp Hagemeister	c91778f8c0	[youtube] Fall back to header if playlist title is not available Sometimes (in about 10% of requests), the og:title is missing for a weird reason. See #2170 for an example	2014-01-20 02:45:51 +01:00
Philipp Hagemeister	5016f3eac8	[myspace] More robust mediatype check	2014-01-20 02:44:08 +01:00
Jaime Marquínez Ferrándiz	efb1bb90a0	[myspace] Add support for song urls (fixes #2040 )	2014-01-19 11:38:48 +01:00
Philipp Hagemeister	4cf393bb4b	[dropbox] Correct test case (#2171 )	2014-01-19 06:16:40 +01:00
Philipp Hagemeister	ce4e242a6f	[dropbox] PEP8 and simplify (#2171 )	2014-01-19 06:14:24 +01:00
Philipp Hagemeister	b27bec212f	Merge remote-tracking branch 'sahutd/master'	2014-01-19 06:12:20 +01:00
sahutd	704519c7e3	Modified dropbox to reflect small changes	2014-01-19 10:24:20 +05:30
sahutd	6b79f40c3d	Added support for Dropbox	2014-01-19 10:20:26 +05:30
Philipp Hagemeister	dfa50793d8	Merge pull request #2153 from jaimeMF/ffmpeg-merger-check-install Don’t try to merge the formats if ffmpeg or avconv are not installed	2014-01-18 20:42:51 -08:00
sahutd	2a7c35dd46	added dropbox support	2014-01-18 20:50:42 +05:30
sahutd	f2ffd10bb2	Update __init__.py	2014-01-18 20:48:43 +05:30
sahutd	8da531359e	Added dropbox support. issue #2055	2014-01-18 20:45:53 +05:30
Philipp Hagemeister	e2b944cf43	Merge branch 'master' of github.com:rg3/youtube-dl	2014-01-17 14:48:15 +01:00
Philipp Hagemeister	3ec05685f7	[extractor/common] Limit --write-pages filename to 200 chars This avoids problems with very long URLs.	2014-01-17 14:47:47 +01:00
Jaime Marquínez Ferrándiz	e103fd46ca	FFmpegMergerPP: Print an info message with the destination before running ffmpeg	2014-01-17 14:31:23 +01:00
Philipp Hagemeister	877bfd69d1	[cnn] Improve test	2014-01-17 05:06:13 +01:00
Philipp Hagemeister	e0ef49f205	release 2014.01.17.2	2014-01-17 04:22:15 +01:00
Philipp Hagemeister	f68cd00fe3	[kankan] Skip test	2014-01-17 04:21:54 +01:00
Philipp Hagemeister	ca70d215cf	[kankan] Simplify	2014-01-17 04:21:22 +01:00
Philipp Hagemeister	d0390a0c92	[mixcloud] Use unicode_literals	2014-01-17 04:06:18 +01:00
Philipp Hagemeister	dd2535c38a	[mixcloud] Fix URL extraction	2014-01-17 04:05:15 +01:00
Philipp Hagemeister	b78d180170	[mpora] Fix uploader name extraction	2014-01-17 03:59:42 +01:00
Philipp Hagemeister	26dca1661e	[ted] Updated checksums	2014-01-17 03:54:54 +01:00
Philipp Hagemeister	f853f8594d	[ted] Use unicode_literals	2014-01-17 03:52:17 +01:00
Philipp Hagemeister	8307aa73fb	Remove youtube swf signature test Apparently, swf players are no longer in use. If we find one, we'll readd it.	2014-01-17 03:49:59 +01:00
Philipp Hagemeister	d0da491e1e	[condenast] Allow multiple formats, and sort centralized	2014-01-17 03:36:03 +01:00
Philipp Hagemeister	6e249060cf	[condenast] Use unicode_literals	2014-01-17 03:32:02 +01:00
Philipp Hagemeister	fbcd7b5f83	[soundcloud] Use unicode_literals and centralized sorting	2014-01-17 03:29:41 +01:00
Philipp Hagemeister	9ac0a67581	[spankwire] Use centralized format sorting and unicode_literals	2014-01-17 03:26:05 +01:00
Philipp Hagemeister	befdc8f3b6	[teamcoco] Use centralized sorting	2014-01-17 03:22:02 +01:00
Philipp Hagemeister	bb198c95e2	[teamcoco] Use unicode_literals	2014-01-17 03:15:09 +01:00
Philipp Hagemeister	c1195541b7	[gamespot] Use unicode_literals	2014-01-17 03:13:40 +01:00
Philipp Hagemeister	26844eb57b	[franceinter] Remove superfluous whitespace	2014-01-17 03:10:54 +01:00
Philipp Hagemeister	a7732b672e	Credit @sahutd for franceinter (#2152 )	2014-01-17 03:09:34 +01:00
Philipp Hagemeister	677b3ce82f	[franceinter] Minor improvements (#2152 )	2014-01-17 03:09:07 +01:00
Philipp Hagemeister	fabfe17d5e	[flickr] Use unicode literals	2014-01-17 03:07:01 +01:00
Philipp Hagemeister	82696d5d5d	Merge remote-tracking branch 'sahutd/master'	2014-01-17 03:02:55 +01:00
Jaime Marquínez Ferrándiz	58c3c7ae38	Don’t try to merge the formats if ffmpeg or avconv are not installed	2014-01-15 12:59:15 +01:00
sahutd	c8650f7ecd	Made modification as suggested on https://github.com/rg3/youtube-dl/pull/2151	2014-01-15 16:48:55 +05:30
Saimadhav Heblikar	14e7543a5a	franceinter [Issue #2105 ] Added franceinterIE import to reflect addition of FranceInter support. Issue #2105	2014-01-15 11:51:12 +05:30
Saimadhav Heblikar	bf6705f584	Added franceinter [Issue #2105 ]	2014-01-15 11:49:50 +05:30