release 2014.03.04.2

[vesti] Fix width and height
[soundcloud:set] Fix _VALID_URL regex (Closes #2509 )
2014-03-04 20:56:31 +01:00 · 2014-03-04 21:40:35 +07:00 · 2014-03-04 21:29:14 +07:00 · 2014-03-04 19:22:48 +07:00 · 2014-03-04 16:12:11 +07:00 · 2014-03-04 03:49:33 +01:00
62 changed files with 2008 additions and 559 deletions
--- a/README.md
+++ b/README.md
@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
                                     video id, %(playlist)s for the playlist the
                                     video is in, %(playlist_index)s for the
                                     position in the playlist and %% for a
-                                     literal percent. Use - to output to stdout.
+                                     literal percent. %(height)s and %(width)s
-                                     Can also be used to download to a different
+                                     for the width and height of the video
                                     format. %(resolution)s for a textual
                                     description of the resolution of the video
                                     format. Use - to output to stdout. Can also
                                     be used to download to a different
                                     directory, for example with -o '/my/downloa
                                     ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
    --autonumber-size NUMBER         Specifies the number of digits in
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -14,9 +14,9 @@
 set -e
-skip_tests=false
+skip_tests=true
-if [ "$1" = '--skip-test' ]; then
+if [ "$1" = '--run-tests' ]; then
-    skip_tests=true
+    skip_tests=false
    shift
 fi
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_youtube_truncated(self):
        self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
    def test_youtube_search_matching(self):
        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
    def test_justin_tv_channelid_matching(self):
        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
--- a/test/test_download.py
+++ b/test/test_download.py
@ -18,6 +18,7 @@ from test.helper import (
 import hashlib
 import io
 import json
 import re
 import socket
 import youtube_dl.YoutubeDL
@ -72,9 +73,7 @@ def generator(test_case):
        if 'playlist' not in test_case:
            info_dict = test_case.get('info_dict', {})
            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
-                print_skipping('The output file cannot be know, the "file" '
+                raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
                    'key is missing or the info_dict is incomplete')
                return
        if 'skip' in test_case:
            print_skipping(test_case['skip'])
            return
@ -137,12 +136,21 @@ def generator(test_case):
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
+                    if isinstance(expected, compat_str) and expected.startswith('re:'):
                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
-                    self.assertEqual(expected, got,
+                        match_str = expected[len('re:'):]
-                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                        match_rex = re.compile(match_str)
                        self.assertTrue(
                            isinstance(got, compat_str) and match_rex.match(got),
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                    else:
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
                            got = 'md5:' + md5(info_dict.get(info_field))
                        else:
                            got = info_dict.get(info_field)
                        self.assertEqual(expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -170,12 +170,12 @@ class TestPlaylists(unittest.TestCase):
    def test_AcademicEarthCourse(self):
        dl = FakeYDL()
        ie = AcademicEarthCourseIE(dl)
-        result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
+        result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'building-dynamic-websites')
+        self.assertEqual(result['id'], 'laws-of-nature')
-        self.assertEqual(result['title'], 'Building Dynamic Websites')
+        self.assertEqual(result['title'], 'Laws of Nature')
-        self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
-        self.assertEqual(len(result['entries']), 10)
+        self.assertEqual(len(result['entries']), 4)
    def test_ivi_compilation(self):
        dl = FakeYDL()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # Various small unit tests
 import io
 import xml.etree.ElementTree
 #from youtube_dl.utils import htmlentity_transform
@ -21,6 +22,7 @@ from youtube_dl.utils import (
    orderedSet,
    PagedList,
    parse_duration,
    read_batch_urls,
    sanitize_filename,
    shell_quote,
    smuggle_url,
@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
    def test_struct_unpack(self):
        self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
    def test_read_batch_urls(self):
        f = io.StringIO(u'''\xef\xbb\xbf foo
            bar\r
            baz
            # More after this line\r
            ; or after this
            bam''')
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -16,6 +16,7 @@ from youtube_dl.extractor import (
    YoutubeChannelIE,
    YoutubeShowIE,
    YoutubeTopListIE,
    YoutubeSearchURLIE,
 )
@ -118,6 +119,8 @@ class TestYoutubeLists(unittest.TestCase):
        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
    def test_youtube_toptracks(self):
        print('Skipping: The playlist page gives error 500')
        return
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
@ -131,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
        entries = result['entries']
        self.assertTrue(len(entries) >= 5)
    def test_youtube_search_url(self):
        dl = FakeYDL()
        ie = YoutubeSearchURLIE(dl)
        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
        entries = result['entries']
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], 'youtube-dl test video')
        self.assertTrue(len(entries) >= 5)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -409,6 +409,13 @@ class YoutubeDL(object):
            template_dict['autonumber'] = autonumber_templ % self._num_downloads
            if template_dict.get('playlist_index') is not None:
                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
            if template_dict.get('resolution') is None:
                if template_dict.get('width') and template_dict.get('height'):
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
                elif template_dict.get('height'):
                    res = '%sp' % template_dict['height']
                elif template_dict.get('width'):
                    res = '?x%d' % template_dict['width']
            sanitize = lambda k, v: sanitize_filename(
                compat_str(v),
@ -675,6 +682,9 @@ class YoutubeDL(object):
            info_dict['playlist'] = None
            info_dict['playlist_index'] = None
        if 'display_id' not in info_dict and 'id' in info_dict:
            info_dict['display_id'] = info_dict['id']
        # This extractors handle format selection themselves
        if info_dict['extractor'] in ['Youku']:
            if download:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -46,12 +46,17 @@ __authors__  = (
    'Andreas Schmitz',
    'Michael Kaiser',
    'Niklas Laxström',
    'David Triendl',
    'Anthony Weems',
    'David Wagner',
    'Juan C. Olivares',
 )
 __license__ = 'Public Domain'
 import codecs
 import getpass
 import io
 import locale
 import optparse
 import os
@ -70,6 +75,7 @@ from .utils import (
    get_cachedir,
    MaxDownloadsReached,
    preferredencoding,
    read_batch_urls,
    SameFileError,
    setproctitle,
    std_headers,
@ -424,6 +430,8 @@ def parseOpts(overrideArguments=None):
                  '%(extractor)s for the provider (youtube, metacafe, etc), '
                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
                  '%(height)s and %(width)s for the width and height of the video format. '
                  '%(resolution)s for a textual description of the resolution of the video format. '
                  'Use - to output to stdout. Can also be used to download to a different directory, '
                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
    filesystem.add_option('--autonumber-size',
@ -551,21 +559,19 @@ def _real_main(argv=None):
        sys.exit(0)
    # Batch file verification
-    batchurls = []
+    batch_urls = []
    if opts.batchfile is not None:
        try:
            if opts.batchfile == '-':
                batchfd = sys.stdin
            else:
-                batchfd = open(opts.batchfile, 'r')
+                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
-            batchurls = batchfd.readlines()
+            batch_urls = read_batch_urls(batchfd)
            batchurls = [x.strip() for x in batchurls]
            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
            if opts.verbose:
-                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+                write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
        except IOError:
            sys.exit(u'ERROR: batch file could not be read')
-    all_urls = batchurls + args
+    all_urls = batch_urls + args
    all_urls = [url.strip() for url in all_urls]
    _enc = preferredencoding()
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -12,7 +12,6 @@ from .http import HttpFD
 from ..utils import (
    struct_pack,
    struct_unpack,
    compat_urllib_request,
    compat_urlparse,
    format_bytes,
    encodeFilename,
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_char()
        # flags
        self.read(3)
-        # BootstrapinfoVersion
+
-        bootstrap_info_version = self.read_unsigned_int()
+        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
        self.read(1)
        # time scale
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_long_long()
        # SmpteTimeCodeOffset
        self.read_unsigned_long_long()
-        # MovieIdentifier
+
-        movie_identifier = self.read_string()
+        self.read_string()  # MovieIdentifier
        server_count = self.read_unsigned_char()
        # ServerEntryTable
        for i in range(server_count):
            self.read_string()
        quality_count = self.read_unsigned_char()
        # QualityEntryTable
-        for i in range(server_count):
+        for i in range(quality_count):
            self.read_string()
        # DrmData
        self.read_string()
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
                        else:
                            # The length does not match, we start the download over
                            self.report_unable_to_resume()
                            resume_len = 0
                            open_mode = 'wb'
                            break
            # Retry
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -19,12 +19,15 @@ from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
 from .cinemassacre import CinemassacreIE
@ -88,6 +91,7 @@ from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
 from .gamespot import GameSpotIE
 from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
@ -132,11 +136,12 @@ from .lynda import (
 )
 from .m6 import M6IE
 from .macgamestore import MacGameStoreIE
 from .mailru import MailRuIE
 from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
-from .mit import TechTVMITIE, MITIE
+from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mixcloud import MixcloudIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
@ -151,7 +156,10 @@ from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
-from .nbc import NBCNewsIE
+from .nbc import (
    NBCIE,
    NBCNewsIE,
 )
 from .ndr import NDRIE
 from .ndtv import NDTVIE
 from .newgrounds import NewgroundsIE
@ -160,7 +168,7 @@ from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .normalboots import NormalbootsIE
-from .novamov import NovamovIE
+from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
@ -171,6 +179,7 @@ from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .radiofrance import RadioFranceIE
 from .rbmaradio import RBMARadioIE
@ -186,6 +195,7 @@ from .rutube import (
    RutubeMovieIE,
    RutubePersonIE,
 )
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
@ -224,10 +234,12 @@ from .tinypic import TinyPicIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE, UstreamChannelIE
@ -238,6 +250,7 @@ from .vesti import VestiIE
 from .vevo import VevoIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
@ -272,19 +285,20 @@ from .youku import YoukuIE
 from .youporn import YouPornIE
 from .youtube import (
    YoutubeIE,
    YoutubePlaylistIE,
    YoutubeSearchIE,
    YoutubeSearchDateIE,
    YoutubeUserIE,
    YoutubeChannelIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
    YoutubeRecommendedIE,
    YoutubeTruncatedURLIE,
    YoutubeWatchLaterIE,
    YoutubeFavouritesIE,
    YoutubeHistoryIE,
    YoutubePlaylistIE,
    YoutubeRecommendedIE,
    YoutubeSearchDateIE,
    YoutubeSearchIE,
    YoutubeSearchURLIE,
    YoutubeShowIE,
    YoutubeSubscriptionsIE,
    YoutubeTopListIE,
    YoutubeTruncatedURLIE,
    YoutubeUserIE,
    YoutubeWatchLaterIE,
 )
 from .zdf import ZDFIE
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@ -5,7 +5,7 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
    IE_NAME = 'AcademicEarth:Course'
    def _real_extract(self, url):
@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor):
        webpage = self._download_webpage(url, playlist_id)
        title = self._html_search_regex(
-            r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+            r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
        description = self._html_search_regex(
-            r'<p class="excerpt">(.*?)</p>',
+            r'<p class="excerpt"[^>]*?>(.*?)</p>',
            webpage, u'description', fatal=False)
        urls = re.findall(
-            r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+            r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
            webpage)
        entries = [self.url_result(u) for u in urls]
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -0,0 +1,80 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class BRIE(InfoExtractor):
    IE_DESC = "Bayerischer Rundfunk Mediathek"
    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
    _BASE_URL = "http://www.br.de"
    _TEST = {
        "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
        "md5": "c4f83cf0f023ba5875aba0bf46860df2",
        "info_dict": {
            "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
            "ext": "mp4",
            "title": "Feiern und Verzichten",
            "description": "Anselm Grün: Feiern und Verzichten",
            "uploader": "BR/Birgit Baier",
            "upload_date": "20140301"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
        xml = self._download_xml(self._BASE_URL + xml_url, None)
        videos = [{
            "id": xml_video.get("externalId"),
            "title": xml_video.find("title").text,
            "formats": self._extract_formats(xml_video.find("assets")),
            "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
            "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
            "uploader": xml_video.find("author").text,
            "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
            "webpage_url": xml_video.find("permalink").text,
        } for xml_video in xml.findall("video")]
        if len(videos) > 1:
            self._downloader.report_warning(
                'found multiple videos; please '
                'report this with the video URL to http://yt-dl.org/bug')
        if not videos:
            raise ExtractorError('No video entries found')
        return videos[0]
    def _extract_formats(self, assets):
        formats = [{
            "url": asset.find("downloadUrl").text,
            "ext": asset.find("mediaType").text,
            "format_id": asset.get("type"),
            "width": int(asset.find("frameWidth").text),
            "height": int(asset.find("frameHeight").text),
            "tbr": int(asset.find("bitrateVideo").text),
            "abr": int(asset.find("bitrateAudio").text),
            "vcodec": asset.find("codecVideo").text,
            "container": asset.find("mediaType").text,
            "filesize": int(asset.find("size").text),
        } for asset in assets.findall("asset")
            if asset.find("downloadUrl") is not None]
        self._sort_formats(formats)
        return formats
    def _extract_thumbnails(self, variants):
        thumbnails = [{
            "url": self._BASE_URL + variant.find("url").text,
            "width": int(variant.find("width").text),
            "height": int(variant.find("height").text),
        } for variant in variants.findall("variant")]
        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
        return thumbnails
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
        video_id = mobj.group(1).split("-")[-1]
        embed_url = 'http://www.break.com/embed/%s' % video_id
        webpage = self._download_webpage(embed_url, video_id)
-        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+        info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
-                                       'info json', flags=re.DOTALL)
+            webpage, 'info json', flags=re.DOTALL)
        info = json.loads(info_json)
        video_url = info['videoUri']
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
--- a/youtube_dl/extractor/canal13cl.py
+++ b/youtube_dl/extractor/canal13cl.py
@ -0,0 +1,48 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class Canal13clIE(InfoExtractor):
    _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
    _TEST = {
        'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
        'md5': '4cb1fa38adcad8fea88487a078831755',
        'info_dict': {
            'id': '1403022125',
            'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
            'ext': 'mp4',
            'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
            'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        webpage = self._download_webpage(url, display_id)
        title = self._html_search_meta(
            'twitter:title', webpage, 'title', fatal=True)
        description = self._html_search_meta(
            'twitter:description', webpage, 'description')
        url = self._html_search_regex(
            r'articuloVideo = \"(.*?)\"', webpage, 'url')
        real_id = self._search_regex(
            r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
        thumbnail = self._html_search_regex(
            r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
        return {
            'id': real_id,
            'display_id': display_id,
            'url': url,
            'title': title,
            'description': description,
            'ext': 'mp4',
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -1,4 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
-        u'file': u'12163.mp4',
+        'md5': '060158428b650f896c542dfbb3d6487f',
-        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '12163',
-            u'title': u'Terrasses du Numérique'
+            'ext': 'mp4',
            'title': 'Terrasses du Numérique'
        }
    }
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
        title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, u'title')
+            r'class="evenement8">(.*?)</a>', webpage, 'title')
-        return {'id': video_id,
+        return {
-                'ext': 'mp4',
+            'id': video_id,
-                'url': video_url,
+            'ext': 'mp4',
-                'title': title,
+            'url': video_url,
-                }
+            'title': title,
        }
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@ -0,0 +1,126 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    ExtractorError,
 )
 class CeskaTelevizeIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
    _TESTS = [
        {
            'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
            'info_dict': {
                'id': '213512120230004',
                'ext': 'flv',
                'title': 'První republika: Španělská chřipka',
                'duration': 3107.4,
            },
            'params': {
                'skip_download': True,  # requires rtmpdump
            },
            'skip': 'Works only from Czech Republic.',
        },
        {
            'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
            'info_dict': {
                'id': '20138143440',
                'ext': 'flv',
                'title': 'Tsatsiki, maminka a policajt',
                'duration': 6754.1,
            },
            'params': {
                'skip_download': True,  # requires rtmpdump
            },
            'skip': 'Works only from Czech Republic.',
        },
        {
            'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
            'info_dict': {
                'id': '14716',
                'ext': 'flv',
                'title': 'První republika: Zpěvačka z Dupárny Bobina',
                'duration': 90,
            },
            'params': {
                'skip_download': True,  # requires rtmpdump
            },
        },
    ]
    def _real_extract(self, url):
        url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
        if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
        typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
        episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
        data = {
            'playlist[0][type]': typ,
            'playlist[0][id]': episode_id,
            'requestUrl': compat_urllib_parse_urlparse(url).path,
            'requestSource': 'iVysilani',
        }
        req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
                                            data=compat_urllib_parse.urlencode(data))
        req.add_header('Content-type', 'application/x-www-form-urlencoded')
        req.add_header('x-addr', '127.0.0.1')
        req.add_header('X-Requested-With', 'XMLHttpRequest')
        req.add_header('Referer', url)
        playlistpage = self._download_json(req, video_id)
        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
        req.add_header('Referer', url)
        playlist = self._download_xml(req, video_id)
        formats = []
        for i in playlist.find('smilRoot/body'):
            if 'AD' not in i.attrib['id']:
                base_url = i.attrib['base']
                parsedurl = compat_urllib_parse_urlparse(base_url)
                duration = i.attrib['duration']
                for video in i.findall('video'):
                    if video.attrib['label'] != 'AD':
                        format_id = video.attrib['label']
                        play_path = video.attrib['src']
                        vbr = int(video.attrib['system-bitrate'])
                        formats.append({
                            'format_id': format_id,
                            'url': base_url,
                            'vbr': vbr,
                            'play_path': play_path,
                            'app': parsedurl.path[1:] + '?' + parsedurl.query,
                            'rtmp_live': True,
                            'ext': 'flv',
                        })
        self._sort_formats(formats)
        return {
            'id': episode_id,
            'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
            'duration': float(duration),
            'formats': formats,
        }
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@ -1,4 +1,5 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -8,73 +9,63 @@ from ..utils import (
 class CinemassacreIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
+    _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
-    _TESTS = [{
+    _TESTS = [
-        u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
+        {
-        u'file': u'19911.flv',
+            'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
-        u'info_dict': {
+            'file': '19911.mp4',
-            u'upload_date': u'20121110',
+            'md5': 'fde81fbafaee331785f58cd6c0d46190',
-            u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
+            'info_dict': {
-            u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
+                'upload_date': '20121110',
                'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                'description': 'md5:fb87405fcb42a331742a0dce2708560b',
            },
        },
-        u'params': {
+        {
-            # rtmp download
+            'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
-            u'skip_download': True,
+            'file': '521be8ef82b16.mp4',
-        },
+            'md5': 'd72f10cd39eac4215048f62ab477a511',
-    },
+            'info_dict': {
-    {
+                'upload_date': '20131002',
-        u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
+                'title': 'The Mummy’s Hand (1940)',
-        u'file': u'521be8ef82b16.flv',
+            },
-        u'info_dict': {
+        }
-            u'upload_date': u'20131002',
+    ]
            u'title': u'The Mummy’s Hand (1940)',
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        webpage_url = u'http://' + mobj.group('url')
+        webpage = self._download_webpage(url, None)  # Don't know video id yet
        webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
        video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
        if not mobj:
-            raise ExtractorError(u'Can\'t extract embed url and video id')
+            raise ExtractorError('Can\'t extract embed url and video id')
-        playerdata_url = mobj.group(u'embed_url')
+        playerdata_url = mobj.group('embed_url')
-        video_id = mobj.group(u'video_id')
+        video_id = mobj.group('video_id')
        video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
-            webpage, u'title')
+            webpage, 'title')
        video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
-            webpage, u'description', flags=re.DOTALL, fatal=False)
+            webpage, 'description', flags=re.DOTALL, fatal=False)
        if len(video_description) == 0:
            video_description = None
        playerdata = self._download_webpage(playerdata_url, video_id)
        url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
-        sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
+        sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
-        hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
+        hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
-        video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
+        video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
        formats = [
            {
-                'url': url,
+                'url': sd_url,
-                'play_path': 'mp4:' + sd_file,
+                'ext': 'mp4',
                'rtmp_live': True, # workaround
                'ext': 'flv',
                'format': 'sd',
                'format_id': 'sd',
            },
            {
-                'url': url,
+                'url': hd_url,
-                'play_path': 'mp4:' + hd_file,
+                'ext': 'mp4',
                'rtmp_live': True, # workaround
                'ext': 'flv',
                'format': 'hd',
                'format_id': 'hd',
            },
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -40,7 +40,7 @@ class CollegeHumorIE(InfoExtractor):
            'id': 'W5gMp3ZjYg4',
            'ext': 'mp4',
            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
-            'uploader': 'Funnyplox TV',
+            'uploader': 'FunnyPlox TV',
            'uploader_id': 'funnyploxtv',
            'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
            'upload_date': '20140128',
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -88,6 +88,10 @@ class InfoExtractor(object):
    The following fields are optional:
    display_id      An alternative identifier for the video, not necessarily
                    unique, but available before title. Typically, id is
                    something like "4234987", title "Dancing naked mole rats",
                    and display_id "dancing-naked-mole-rats"
    thumbnails:     A list of dictionaries (with the entries "resolution" and
                    "url") for the varying thumbnails
    thumbnail:      Full URL to a video thumbnail image.
@ -432,14 +436,14 @@ class InfoExtractor(object):
        if secure: regexes = self._og_regexes('video:secure_url') + regexes
        return self._html_search_regex(regexes, html, name, **kargs)
-    def _html_search_meta(self, name, html, display_name=None):
+    def _html_search_meta(self, name, html, display_name=None, fatal=False):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
            r'''(?ix)<meta
                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
-            html, display_name, fatal=False)
+            html, display_name, fatal=fatal)
    def _dc_search_uploader(self, html):
        return self._html_search_meta('dc.creator', html, 'uploader')
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -1,7 +1,11 @@
 # encoding: utf-8
 from __future__ import unicode_literals
-import re, base64, zlib
+import re
 import json
 import base64
 import zlib
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
@ -19,13 +23,15 @@ from ..aes import (
    inc,
 )
 class CrunchyrollIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
-    _TESTS = [{
+    _TEST = {
        'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
        'file': '645513.flv',
        #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
        'info_dict': {
            'id': '645513',
            'ext': 'flv',
            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
@ -36,7 +42,7 @@ class CrunchyrollIE(InfoExtractor):
            # rtmp
            'skip_download': True,
        },
-    }]
+    }
    _FORMAT_IDS = {
        '360': ('60', '106'),
@ -80,9 +86,8 @@ class CrunchyrollIE(InfoExtractor):
        return zlib.decompress(decrypted_data)
    def _convert_subtitles_to_srt(self, subtitles):
        i=1
        output = ''
-        for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
+        for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1):
            start = start.replace('.', ',')
            end = end.replace('.', ',')
            text = clean_html(text)
@ -90,7 +95,6 @@ class CrunchyrollIE(InfoExtractor):
            if not text:
                continue
            output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
            i+=1
        return output
    def _real_extract(self,url):
@ -108,6 +112,12 @@ class CrunchyrollIE(InfoExtractor):
        if note_m:
            raise ExtractorError(note_m)
        mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
        if mobj:
            msg = json.loads(mobj.group('msg'))
            if msg.get('type') == 'error':
                raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
        video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
        video_title = re.sub(r' {2,}', ' ', video_title)
        video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
@ -161,7 +171,7 @@ class CrunchyrollIE(InfoExtractor):
            data = base64.b64decode(data)
            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
-            lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
            if not lang_code:
                continue
            subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import json
 import re
 import socket
@ -26,20 +28,21 @@ class FacebookIE(InfoExtractor):
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
-    IE_NAME = u'facebook'
+    IE_NAME = 'facebook'
    _TEST = {
-        u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
+        'url': 'https://www.facebook.com/photo.php?v=120708114770723',
-        u'file': u'120708114770723.mp4',
+        'md5': '48975a41ccc4b7a581abd68651c1a5a8',
-        u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
+        'info_dict': {
-        u'info_dict': {
+            'id': '120708114770723',
-            u"duration": 279,
+            'ext': 'mp4',
-            u"title": u"PEOPLE ARE AWESOME 2013"
+            'duration': 279,
            'title': 'PEOPLE ARE AWESOME 2013'
        }
    }
    def report_login(self):
        """Report attempt to log in."""
-        self.to_screen(u'Logging in')
+        self.to_screen('Logging in')
    def _login(self):
        (useremail, password) = self._get_login_info()
@ -50,9 +53,11 @@ class FacebookIE(InfoExtractor):
        login_page_req.add_header('Cookie', 'locale=en_US')
        self.report_login()
        login_page = self._download_webpage(login_page_req, None, note=False,
-            errnote=u'Unable to download login page')
+            errnote='Unable to download login page')
-        lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
+        lsd = self._search_regex(
-        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
+            r'<input type="hidden" name="lsd" value="([^"]*)"',
            login_page, 'lsd')
        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
        login_form = {
            'email': useremail,
@ -70,22 +75,22 @@ class FacebookIE(InfoExtractor):
        try:
            login_results = compat_urllib_request.urlopen(request).read()
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
-                self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
+                self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                return
            check_form = {
-                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
+                'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
-                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
+                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
                'name_action_selected': 'dont_save',
-                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
+                'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
            }
            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
            check_response = compat_urllib_request.urlopen(check_req).read()
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
-                self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
+                self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning('unable to log in: %s' % compat_str(err))
            return
    def _real_initialize(self):
@ -94,7 +99,7 @@ class FacebookIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
+            raise ExtractorError('Invalid URL: %s' % url)
        video_id = mobj.group('id')
        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
@ -107,10 +112,10 @@ class FacebookIE(InfoExtractor):
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
            if m_msg is not None:
                raise ExtractorError(
-                    u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                    expected=True)
            else:
-                raise ExtractorError(u'Cannot parse data')
+                raise ExtractorError('Cannot parse data')
        data = dict(json.loads(m.group(1)))
        params_raw = compat_urllib_parse.unquote(data['params'])
        params = json.loads(params_raw)
@ -119,12 +124,12 @@ class FacebookIE(InfoExtractor):
        if not video_url:
            video_url = video_data['sd_src']
        if not video_url:
-            raise ExtractorError(u'Cannot find video URL')
+            raise ExtractorError('Cannot find video URL')
        video_duration = int(video_data['video_duration'])
        thumbnail = video_data['thumbnail_src']
        video_title = self._html_search_regex(
-            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
+            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
        info = {
            'id': video_id,
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dl/extractor/fourtube.py
@ -8,8 +8,8 @@ from ..utils import (
    unified_strdate,
    str_to_int,
    parse_duration,
    clean_html,
 )
 from youtube_dl.utils import clean_html
 class FourTubeIE(InfoExtractor):
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@ -0,0 +1,134 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
 )
 class GDCVaultIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
    _TESTS = [
        {
            'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
            'md5': '7ce8388f544c88b7ac11c7ab1b593704',
            'info_dict': {
                'id': '1019721',
                'ext': 'mp4',
                'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
            }
        },
        {
            'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
            'info_dict': {
                'id': '1015683',
                'ext': 'flv',
                'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
            },
            'params': {
                'skip_download': True,  # Requires rtmpdump
            }
        },
    ]
    def _parse_mp4(self, xml_description):
        video_formats = []
        mp4_video = xml_description.find('./metadata/mp4video')
        if mp4_video is None:
            return None
        mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
        video_root = mobj.group('root')
        formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
        for format in formats:
            mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
            url = video_root + mobj.group('path')
            vbr = format.find('bitrate').text
            video_formats.append({
                'url': url,
                'vbr': int(vbr),
            })
        return video_formats
    def _parse_flv(self, xml_description):
        video_formats = []
        akami_url = xml_description.find('./metadata/akamaiHost').text
        slide_video_path = xml_description.find('./metadata/slideVideo').text
        video_formats.append({
            'url': 'rtmp://' + akami_url + '/' + slide_video_path,
            'format_note': 'slide deck video',
            'quality': -2,
            'preference': -2,
            'format_id': 'slides',
        })
        speaker_video_path = xml_description.find('./metadata/speakerVideo').text
        video_formats.append({
            'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
            'format_note': 'speaker video',
            'quality': -1,
            'preference': -1,
            'format_id': 'speaker',
        })
        return video_formats
    def _login(self, webpage_url, video_id):
        (username, password) = self._get_login_info()
        if username is None or password is None:
            self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
            return None
        mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
        login_url = mobj.group('root_url') + 'api/login.php'
        logout_url = mobj.group('root_url') + 'logout'
        login_form = {
            'email': username,
            'password': password,
        }
        request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        self._download_webpage(request, video_id, 'Logging in')
        start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
        self._download_webpage(logout_url, video_id, 'Logging out')
        return start_page
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'http://www.gdcvault.com/play/' + video_id
        start_page = self._download_webpage(webpage_url, video_id)
        xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
        if xml_root is None:
            # Probably need to authenticate
            start_page = self._login(webpage_url, video_id)
            if start_page is None:
                self.report_warning('Could not login.')
            else:
                # Grab the url from the authenticated page
                xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
        xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
        if xml_name is None:
            # Fallback to the older format
            xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
        xml_decription_url = xml_root + 'xml/' + xml_name
        xml_description = self._download_xml(xml_decription_url, video_id)
        video_title = xml_description.find('./metadata/title').text
        video_formats = self._parse_mp4(xml_description)
        if video_formats is None:
            video_formats = self._parse_flv(xml_description)
        return {
            'id': video_id,
            'title': video_title,
            'formats': video_formats,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -83,10 +83,10 @@ class GenericIE(InfoExtractor):
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
            'file': 'trailer.mp4',
            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
            'info_dict': {
                'id': 'trailer',
                'ext': 'mp4',
                'title': 'trailer',
                'upload_date': '20100513',
            }
@ -94,7 +94,6 @@ class GenericIE(InfoExtractor):
        # ooyala video
        {
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
            'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
            'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
            'info_dict': {
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
@ -102,6 +101,22 @@ class GenericIE(InfoExtractor):
                'title': '2cc213299525360.mov',  # that's what we get
            },
        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
            'info_dict': {
                'id': 'cmQHVoWB5FY',
                'ext': 'mp4',
                'upload_date': '20130224',
                'uploader_id': 'TheVerge',
                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
                'uploader': 'The Verge',
                'title': 'First Firefox OS phones side-by-side',
            },
            'params': {
                'skip_download': False,
            }
        }
    ]
    def report_download_webpage(self, video_id):
@ -363,11 +378,17 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')
-        # Look for embedded Novamov player
+        # Look for embedded NovaMov player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'Novamov')
+            return self.url_result(mobj.group('url'), 'NovaMov')
        # Look for embedded NowVideo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'NowVideo')
        # Look for embedded Facebook player
        mobj = re.search(
@ -375,6 +396,11 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Facebook')
        # Look for embedded VK player
        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'VK')
        # Look for embedded Huffington Post player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
@ -405,6 +431,18 @@ class GenericIE(InfoExtractor):
        if mobj is None:
            # HTML5 video
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
        if mobj is None:
            mobj = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
                webpage)
            if mobj:
                new_url = mobj.group(1)
                self.report_following_redirect(new_url)
                return {
                    '_type': 'url',
                    'url': new_url,
                }
        if mobj is None:
            raise ExtractorError('Unsupported URL: %s' % url)
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@ -10,7 +10,7 @@ from ..utils import compat_urllib_request
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
+    _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
    _TESTS = [{
        'url': 'http://play.iprima.cz/particka/particka-92',
@ -22,20 +22,32 @@ class IPrimaIE(InfoExtractor):
            'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
        },
        'params': {
-            'skip_download': True,
+            'skip_download': True,  # requires rtmpdump
        },
-    },
+    }, {
-    ]
+        'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
        'info_dict': {
            'id': '9718337',
            'ext': 'flv',
            'title': 'Tchibo Partička - Jarní móda',
            'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
            'thumbnail': 're:^http:.*\.jpg$',
        },
        'params': {
            'skip_download': True,  # requires rtmpdump
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
-        player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
+        player_url = (
-                         floor(random()*1073741824),
+            'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
-                         floor(random()*1073741824))
+            (floor(random()*1073741824), floor(random()*1073741824))
        )
        req = compat_urllib_request.Request(player_url)
        req.add_header('Referer', url)
@ -44,18 +56,20 @@ class IPrimaIE(InfoExtractor):
        base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
        zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
        if zoneGEO != '0':
-            base_url = base_url.replace('token', 'token_'+zoneGEO)
+            base_url = base_url.replace('token', 'token_' + zoneGEO)
        formats = []
        for format_id in ['lq', 'hq', 'hd']:
-            filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
+            filename = self._html_search_regex(
                r'"%s_id":(.+?),' % format_id, webpage, 'filename')
            if filename == 'null':
                continue
-            real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
+            real_id = self._search_regex(
                r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
                filename, 'real video id')
            if format_id == 'lq':
                quality = 0
@ -63,13 +77,13 @@ class IPrimaIE(InfoExtractor):
                quality = 1
            elif format_id == 'hd':
                quality = 2
-                filename = 'hq/'+filename
+                filename = 'hq/' + filename
            formats.append({
                'format_id': format_id,
                'url': base_url,
                'quality': quality,
-                'play_path': 'mp4:'+filename.replace('"', '')[:-4],
+                'play_path': 'mp4:' + filename.replace('"', '')[:-4],
                'rtmp_live': True,
                'ext': 'flv',
            })
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -6,7 +6,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
-    unified_strdate
+    unified_strdate,
    ExtractorError,
 )
@ -32,13 +33,11 @@ class LifeNewsIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
+        webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
-        video_url = self._html_search_regex(
+        videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
-            r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
+        if not videos:
-
+            raise ExtractorError('No media links available for %s' % video_id)
        thumbnail = self._html_search_regex(
            r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
        title = self._og_search_title(webpage)
        TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
@ -50,20 +49,26 @@ class LifeNewsIE(InfoExtractor):
        view_count = self._html_search_regex(
            r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
        comment_count = self._html_search_regex(
-            r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count', fatal=False)
+            r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
        upload_date = self._html_search_regex(
            r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
        if upload_date is not None:
            upload_date = unified_strdate(upload_date)
-        return {
+        def make_entry(video_id, media, video_number=None):
-            'id': video_id,
+            return {
-            'url': video_url,
+                'id': video_id,
-            'thumbnail': thumbnail,
+                'url': media[1],
-            'title': title,
+                'thumbnail': media[0],
-            'description': description,
+                'title': title if video_number is None else '%s-video%s' % (title, video_number),
-            'view_count': int_or_none(view_count),
+                'description': description,
-            'comment_count': int_or_none(comment_count),
+                'view_count': int_or_none(view_count),
-            'upload_date': upload_date,
+                'comment_count': int_or_none(comment_count),
-        }
+                'upload_date': upload_date,
            }
        if len(videos) == 1:
            return make_entry(video_id, videos[0])
        else:
            return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@ -0,0 +1,66 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 import datetime
 from .common import InfoExtractor
 class MailRuIE(InfoExtractor):
    IE_NAME = 'mailru'
    IE_DESC = 'Видео@Mail.Ru'
    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
    _TEST = {
        'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
        'md5': 'dea205f03120046894db4ebb6159879a',
        'info_dict': {
            'id': '46301138',
            'ext': 'mp4',
            'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
            'upload_date': '20140224',
            'uploader': 'sonypicturesrus',
            'uploader_id': 'sonypicturesrus@mail.ru',
            'duration': 184,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        video_data = self._download_json(
            'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
        author = video_data['author']
        uploader = author['name']
        uploader_id = author['id']
        movie = video_data['movie']
        content_id = str(movie['contentId'])
        title = movie['title']
        thumbnail = movie['poster']
        duration = movie['duration']
        upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
        view_count = video_data['views_count']
        formats = [
            {
                'url': video['url'],
                'format_id': video['name'],
            } for video in video_data['videos']
        ]
        return {
            'id': content_id,
            'title': title,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor):
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)
@ -183,6 +184,7 @@ class MetacafeIE(InfoExtractor):
            'uploader': video_uploader,
            'upload_date':  None,
            'title':    video_title,
            'thumbnail':thumbnail,
            'ext':      video_ext,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@ -1,24 +1,30 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    compat_urlparse,
    clean_html,
    ExtractorError,
    get_element_by_id,
 )
 class TechTVMITIE(InfoExtractor):
-    IE_NAME = u'techtv.mit.edu'
+    IE_NAME = 'techtv.mit.edu'
    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+        'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
-        u'file': u'25418.mp4',
+        'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
-        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '25418',
-            u'title': u'MIT DNA Learning Center Set',
+            'ext': 'mp4',
-            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+            'title': 'MIT DNA Learning Center Set',
            'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
        },
    }
@ -27,12 +33,12 @@ class TechTVMITIE(InfoExtractor):
        video_id = mobj.group('id')
        raw_page = self._download_webpage(
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
-        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
+        clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
-        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+        base_url = self._search_regex(
-            raw_page, u'base url')
+            r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
-        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
+        formats_json = self._search_regex(
-            u'video formats')
+            r'bitrates: (\[.+?\])', raw_page, 'video formats')
        formats_mit = json.loads(formats_json)
        formats = [
            {
@ -48,28 +54,31 @@ class TechTVMITIE(InfoExtractor):
        title = get_element_by_id('edit-title', clean_page)
        description = clean_html(get_element_by_id('edit-description', clean_page))
-        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+        thumbnail = self._search_regex(
-            raw_page, u'thumbnail', flags=re.DOTALL)
+            r'playlist:.*?url: \'(.+?)\'',
            raw_page, 'thumbnail', flags=re.DOTALL)
-        return {'id': video_id,
+        return {
-                'title': title,
+            'id': video_id,
-                'formats': formats,
+            'title': title,
-                'description': description,
+            'formats': formats,
-                'thumbnail': thumbnail,
+            'description': description,
-                }
+            'thumbnail': thumbnail,
        }
 class MITIE(TechTVMITIE):
-    IE_NAME = u'video.mit.edu'
+    IE_NAME = 'video.mit.edu'
    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
    _TEST = {
-        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+        'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
-        u'file': u'21783.mp4',
+        'md5': '7db01d5ccc1895fc5010e9c9e13648da',
-        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+        'info_dict': {
-        u'info_dict': {
+            'id': '21783',
-            u'title': u'The Government is Profiling You',
+            'ext': 'mp4',
-            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+            'title': 'The Government is Profiling You',
            'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
        },
    }
@ -77,7 +86,73 @@ class MITIE(TechTVMITIE):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
-        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+        embed_url = self._search_regex(
-        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+            r'<iframe .*?src="(.+?)"', webpage, 'embed url')
            u'embed url')
        return self.url_result(embed_url, ie='TechTVMIT')
 class OCWMITIE(InfoExtractor):
    IE_NAME = 'ocw.mit.edu'
    _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
    _BASE_URL = 'http://ocw.mit.edu/'
    _TESTS = [
        {
            'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
            'info_dict': {
                'id': 'EObHWIEKGjA',
                'ext': 'mp4',
                'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
                #'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
            }
        },
        {
            'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
            'info_dict': {
                'id': '7K1sB05pE0A',
                'ext': 'mp4',
                'title': 'Session 1: Introduction to Derivatives',
                'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
                #'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
            }
        }
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        topic = mobj.group('topic')
        webpage = self._download_webpage(url, topic)
        title = self._html_search_meta('WT.cg_s', webpage)
        description = self._html_search_meta('Description', webpage)
        # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
        embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
        if embed_chapter_media:
            metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
            metadata = re.split(r', ?', metadata)
            yt = metadata[1]
            subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
        else:
            # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
            embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
            if embed_media:
                metadata = re.sub(r'[\'"]', '', embed_media.group(1))
                metadata = re.split(r', ?', metadata)
                yt = metadata[1]
                subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
            else:
                raise ExtractorError('Unable to find embedded YouTube video.')
        video_id = YoutubeIE.extract_id(yt)
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'title': title,
            'description': description,
            'url': yt,
            'url_transparent'
            'subtitles': subs,
            'ie_key': 'Youtube',
        }
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -5,18 +5,20 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
    compat_urllib_parse,
    ExtractorError,
 )
 class MixcloudIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
    IE_NAME = 'mixcloud'
    _TEST = {
        'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
        'file': 'dholbach-cryptkeeper.mp3',
        'info_dict': {
            'id': 'dholbach-cryptkeeper',
            'ext': 'mp3',
            'title': 'Cryptkeeper',
            'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
            'uploader': 'Daniel Holbach',
@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        uploader = mobj.group(1)
        cloudcast_name = mobj.group(2)
-        track_id = '-'.join((uploader, cloudcast_name))
+        track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
        webpage = self._download_webpage(url, track_id)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -1,19 +1,46 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
 class NBCIE(InfoExtractor):
    _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)'
    _TEST = {
        'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
        'info_dict': {
            'id': 'u1RInQZRN7QJ',
            'ext': 'flv',
            'title': 'I Am a Firefighter',
            'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
        if theplatform_url.startswith('//'):
            theplatform_url = 'http:' + theplatform_url
        return self.url_result(theplatform_url)
 class NBCNewsIE(InfoExtractor):
    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
-        u'file': u'52753292.flv',
+        'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
-        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        'info_dict': {
-        u'info_dict': {
+            'id': '52753292',
-            u'title': u'Crew emerges after four-month Mars food study',
+            'ext': 'flv',
-            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+            'title': 'Crew emerges after four-month Mars food study',
            'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
        },
    }
@ -23,10 +50,11 @@ class NBCNewsIE(InfoExtractor):
        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
        info = all_info.find('video')
-        return {'id': video_id,
+        return {
-                'title': info.find('headline').text,
+            'id': video_id,
-                'ext': 'flv',
+            'title': info.find('headline').text,
-                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+            'ext': 'flv',
-                'description': compat_str(info.find('caption').text),
+            'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
-                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+            'description': compat_str(info.find('caption').text),
-                }
+            'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
        }
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@ -1,61 +1,51 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unified_strdate,
 )
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
+    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
    _TEST = {
-        u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
+        'url': 'http://normalboots.com/video/home-alone-games-jontron/',
-        u'file': u'home-alone-games-jontron.mp4',
+        'md5': '8bf6de238915dd501105b44ef5f1e0f6',
-        u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'home-alone-games-jontron',
-            u'title': u'Home Alone Games - JonTron - NormalBoots',
+            'ext': 'mp4',
-            u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
+            'title': 'Home Alone Games - JonTron - NormalBoots',
-            u'uploader': u'JonTron',
+            'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
-            u'upload_date': u'20140125',
+            'uploader': 'JonTron',
            'upload_date': '20140125',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        if url[:4] != 'http':
            url = 'http://' + url
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)
        video_thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
            webpage, 'uploader')
        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
            webpage, 'date')
        video_upload_date = unified_strdate(raw_upload_date)
        video_upload_date = unified_strdate(raw_upload_date)
        player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
        player_page = self._download_webpage(player_url, video_id)
-        video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
+        video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
-        info['url'] = video_url
+        return {
-        info['title'] = video_title
+            'id': video_id,
-        info['description'] = video_description
+            'url': video_url,
-        info['thumbnail'] = video_thumbnail
+            'title': self._og_search_title(webpage),
-        info['uploader'] = video_uploader
+            'description': self._og_search_description(webpage),
-        info['upload_date'] = video_upload_date
+            'thumbnail': self._og_search_thumbnail(webpage),
-        
+            'uploader': video_uploader,
-        return info
+            'upload_date': video_upload_date,
        }
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@ -9,14 +9,25 @@ from ..utils import (
 )
-class NovamovIE(InfoExtractor):
+class NovaMovIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})'
+    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'
    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
    _HOST = 'www.novamov.com'
    _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
    _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
    _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
    _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
    _TEST = {
        'url': 'http://www.novamov.com/video/4rurhn9x446jj',
        'file': '4rurhn9x446jj.flv',
        'md5': '7205f346a52bbeba427603ba10d4b935',
        'info_dict': {
            'id': '4rurhn9x446jj',
            'ext': 'flv',
            'title': 'search engine optimization',
            'description': 'search engine optimization is used to rank the web page in the google search engine'
        },
@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-        page = self._download_webpage('http://www.novamov.com/video/%s' % video_id,
+        page = self._download_webpage(
-                                      video_id, 'Downloading video page')
+            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
-        if re.search(r'This file no longer exists on our servers!</h2>', page) is not None:
+        if re.search(self._FILE_DELETED_REGEX, page) is not None:
            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
-        filekey = self._search_regex(
+        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
            r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey')
-        title = self._html_search_regex(
+        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>',
            page, 'title', fatal=False)
-        description = self._html_search_regex(
+        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>',
            page, 'description', fatal=False)
        api_response = self._download_webpage(
-            'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id),
+            'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
-            video_id, 'Downloading video api response')
+            'Downloading video api response')
        response = compat_urlparse.parse_qs(api_response)
        if 'error_msg' in response:
-            raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True)
+            raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
        video_url = response['url'][0]
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@ -1,46 +1,28 @@
-import re
+from __future__ import unicode_literals
-from .common import InfoExtractor
+from .novamov import NovaMovIE
 from ..utils import compat_urlparse
-class NowVideoIE(InfoExtractor):
+class NowVideoIE(NovaMovIE):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)'
+    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'
    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
    _HOST = 'www.nowvideo.ch'
    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
    _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
    _TITLE_REGEX = r'<h4>([^<]+)</h4>'
    _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
    _TEST = {
-        u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
+        'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
-        u'file': u'0mw0yow7b6dxa.flv',
+        'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
-        u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
+        'info_dict': {
-        u'info_dict': {
+            'id': '0mw0yow7b6dxa',
-            u"title": u"youtubedl test video _BaW_jenozKc.mp4"
+            'ext': 'flv',
            'title': 'youtubedl test video _BaW_jenozKc.mp4',
            'description': 'Description',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'http://www.nowvideo.ch/video/' + video_id
        embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
        embed_page = self._download_webpage(embed_url, video_id,
            u'Downloading embed page')
        self.report_extraction(video_id)
        video_title = self._html_search_regex(r'<h4>(.*)</h4>',
            webpage, u'video title')
        video_key = self._search_regex(r'var fkzd="(.*)";',
            embed_page, u'video key')
        api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
        api_response = self._download_webpage(api_call, video_id,
            u'Downloading API page')
        video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
        return [{
            'id':        video_id,
            'url':       video_url,
            'ext':       'flv',
            'title':     video_title,
        }]
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -8,6 +8,7 @@ from .common import InfoExtractor
 from ..utils import (
    HEADRequest,
    unified_strdate,
    ExtractorError,
 )
@ -35,7 +36,15 @@ class ORFIE(InfoExtractor):
        data_json = self._search_regex(
            r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
        all_data = json.loads(data_json)
-        sdata = all_data[0]['values']['segments']
+
        def get_segments(all_data):
            for data in all_data:
                if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
                    return data['values']['segments']
        sdata = get_segments(all_data)
        if not sdata:
            raise ExtractorError('Unable to extract segments')
        def quality_to_int(s):
            m = re.search('([0-9]+)', s)
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@ -1,7 +1,10 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class PodomaticIE(InfoExtractor):
@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
    _TEST = {
-        u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+        "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
-        u"file": u"2009-01-02T16_03_35-08_00.mp3",
+        "file": "2009-01-02T16_03_35-08_00.mp3",
-        u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
+        "md5": "84bb855fcf3429e6bf72460e1eed782d",
-        u"info_dict": {
+        "info_dict": {
-            u"uploader": u"Science Teaching Tips",
+            "uploader": "Science Teaching Tips",
-            u"uploader_id": u"scienceteachingtips",
+            "uploader_id": "scienceteachingtips",
-            u"title": u"64.  When the Moon Hits Your Eye",
+            "title": "64.  When the Moon Hits Your Eye",
-            u"duration": 446,
+            "duration": 446,
        }
    }
@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
        uploader = data['podcast']
        title = data['title']
        thumbnail = data['imageLocation']
-        duration = int(data['length'] / 1000.0)
+        duration = int_or_none(data.get('length'), 1000)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -0,0 +1,297 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from hashlib import sha1
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    unified_strdate,
    clean_html,
    RegexNotFoundError,
 )
 class ProSiebenSat1IE(InfoExtractor):
    IE_NAME = 'prosiebensat1'
    IE_DESC = 'ProSiebenSat.1 Digital'
    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
    _TESTS = [
        {
            'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
            'info_dict': {
                'id': '2104602',
                'ext': 'mp4',
                'title': 'Staffel 2, Episode 18 - Jahresrückblick',
                'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                'upload_date': '20131231',
                'duration': 5845.04,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
            'info_dict': {
                'id': '2570327',
                'ext': 'mp4',
                'title': 'Lady-Umstyling für Audrina',
                'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
                'upload_date': '20131014',
                'duration': 606.76,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Seems to be broken',
        },
        {
            'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
            'info_dict': {
                'id': '2437108',
                'ext': 'mp4',
                'title': 'Folge 48: Gold Rogers Heimat',
                'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
                'upload_date': '20140226',
                'duration': 1401.48,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
            'info_dict': {
                'id': '2904997',
                'ext': 'mp4',
                'title': 'Sexy laufen in Ugg Boots',
                'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
                'upload_date': '20140122',
                'duration': 245.32,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
            'info_dict': {
                'id': '2906572',
                'ext': 'mp4',
                'title': 'Im Interview: Kai Wiesinger',
                'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
                'upload_date': '20140225',
                'duration': 522.56,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
            'info_dict': {
                'id': '2992323',
                'ext': 'mp4',
                'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
                'description': 'md5:2669cde3febe9bce13904f701e774eb6',
                'upload_date': '20140225',
                'duration': 2410.44,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
            'info_dict': {
                'id': '3004256',
                'ext': 'mp4',
                'title': 'Schalke: Tönnies möchte Raul zurück',
                'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
                'upload_date': '20140226',
                'duration': 228.96,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
            'info_dict': {
                'id': '2572814',
                'ext': 'mp4',
                'title': 'Andreas Kümmert: Rocket Man',
                'description': 'md5:6ddb02b0781c6adf778afea606652e38',
                'upload_date': '20131017',
                'duration': 469.88,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
            'info_dict': {
                'id': '2156342',
                'ext': 'mp4',
                'title': 'Kurztrips zum Valentinstag',
                'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
                'upload_date': '20130206',
                'duration': 307.24,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
    ]
    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
    ]
    _TITLE_REGEXES = [
        r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
        r'<header class="clearfix">\s*<h3>(.+?)</h3>',
        r'<!-- start video -->\s*<h1>(.+?)</h1>',
        r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
    ]
    _DESCRIPTION_REGEXES = [
        r'<p itemprop="description">\s*(.+?)</p>',
        r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
        r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
        r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
    ]
    _UPLOAD_DATE_REGEXES = [
        r'<meta property="og:published_time" content="(.+?)">',
        r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
        r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
        r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
        r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id, 'Downloading page')
        def extract(patterns, name, page, fatal=False):
            for pattern in patterns:
                mobj = re.search(pattern, page)
                if mobj:
                    return clean_html(mobj.group(1))
            if fatal:
                raise RegexNotFoundError(u'Unable to extract %s' % name)
            return None
        clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True)
        access_token = 'testclient'
        client_name = 'kolibri-1.2.5'
        client_location = url
        videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
            'access_token': access_token,
            'client_location': client_location,
            'client_name': client_name,
            'ids': clip_id,
        })
        videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
        duration = float(videos[0]['duration'])
        source_ids = [source['id'] for source in videos[0]['sources']]
        source_ids_str = ','.join(map(str, source_ids))
        g = '01!8d8F_)r9]4s[qeuXfP%'
        client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
                                 .encode('utf-8')).hexdigest()
        sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({
            'access_token': access_token,
            'client_id': client_id,
            'client_location': client_location,
            'client_name': client_name,
        }))
        sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
        server_id = sources['server_id']
        client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
                                          client_location, source_ids_str, g, client_name])
                                 .encode('utf-8')).hexdigest()
        url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({
            'access_token': access_token,
            'client_id': client_id,
            'client_location': client_location,
            'client_name': client_name,
            'server_id': server_id,
            'source_ids': source_ids_str,
        }))
        urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
        title = extract(self._TITLE_REGEXES, 'title', page, fatal=True)
        description = extract(self._DESCRIPTION_REGEXES, 'description', page)
        thumbnail = self._og_search_thumbnail(page)
        upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page)
        if upload_date:
            upload_date = unified_strdate(upload_date)
        formats = []
        urls_sources = urls['sources']
        if isinstance(urls_sources, dict):
            urls_sources = urls_sources.values()
        def fix_bitrate(bitrate):
            return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
        for source in urls_sources:
            protocol = source['protocol']
            if protocol == 'rtmp' or protocol == 'rtmpe':
                mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
                if not mobj:
                    continue
                formats.append({
                    'url': mobj.group('url'),
                    'app': mobj.group('app'),
                    'play_path': mobj.group('playpath'),
                    'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
                    'page_url': 'http://www.prosieben.de',
                    'vbr': fix_bitrate(source['bitrate']),
                    'ext': 'mp4',
                    'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
                })
            else:
                formats.append({
                    'url': source['url'],
                    'vbr': fix_bitrate(source['bitrate']),
                })
        self._sort_formats(formats)
        return {
            'id': clip_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -1,148 +1,165 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    ExtractorError,
    clean_html,
    unified_strdate,
    int_or_none,
 )
 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'''(?x)
-    _TESTS = [{
+                        (?:https?://)?
-        'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
+                        (?P<url>
-        'file': '90419.flv',
+                            (?P<domain>
-        'info_dict': {
+                                rtl-now\.rtl\.de|
-            'upload_date': '20070416',
+                                rtl2now\.rtl2\.de|
-            'title': 'Ahornallee - Folge 1 - Der Einzug',
+                                (?:www\.)?voxnow\.de|
-            'description': 'Folge 1 - Der Einzug',
+                                (?:www\.)?rtlnitronow\.de|
                                (?:www\.)?superrtlnow\.de|
                                (?:www\.)?n-tvnow\.de)
                            /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
                            (?:container_id|film_id)=(?P<video_id>[0-9]+)&
                            player=1(?:&season=[0-9]+)?(?:&.*)?
                        )'''
    _TESTS = [
        {
            'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
            'info_dict': {
                'id': '90419',
                'ext': 'flv',
                'title': 'Ahornallee - Folge 1 - Der Einzug',
                'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
                'upload_date': '20070416',
                'duration': 1685,
            },
            'params': {
                'skip_download': True,
            },
            'skip': 'Only works from Germany',
        },
-        'params': {
+        {
-            'skip_download': True,
+            'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
            'info_dict': {
                'id': '69756',
                'ext': 'flv',
                'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
                'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
                'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
                'upload_date': '20120519',
                'duration': 1245,
            },
            'params': {
                'skip_download': True,
            },
            'skip': 'Only works from Germany',
        },
-        'skip': 'Only works from Germany',
+        {
-    },
+            'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
-    {
+            'info_dict': {
-        'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
+                'id': '13883',
-        'file': '69756.flv',
+                'ext': 'flv',
-        'info_dict': {
+                'title': 'Voxtours - Südafrika-Reporter II',
-            'upload_date': '20120519',
+                'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
-            'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
+                'upload_date': '20090627',
-            'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
+                'duration': 1800,
-            'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
+            },
            'params': {
                'skip_download': True,
            },
        },
-        'params': {
+        {
-            'skip_download': True,
+            'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
            'info_dict': {
                'id': '99205',
                'ext': 'flv',
                'title': 'Medicopter 117 - Angst!',
                'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
                'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
                'upload_date': '20080928',
                'duration': 2691,
            },
            'params': {
                'skip_download': True,
            },
        },
-        'skip': 'Only works from Germany',
+        {
-    },
+            'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
-    {
+            'info_dict': {
-        'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
+                'id': '153819',
-        'file': '13883.flv',
+                'ext': 'flv',
-        'info_dict': {
+                'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
-            'upload_date': '20090627',
+                'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
-            'title': 'Voxtours - Südafrika-Reporter II',
+                'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
-            'description': 'Südafrika-Reporter II',
+                'upload_date': '20140221',
                'duration': 2429,
            },
            'skip': 'Only works from Germany',
        },
-        'params': {
+    ]
            'skip_download': True,
        },
    },
    {
        'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
        'file': '99205.flv',
        'info_dict': {
            'upload_date': '20080928', 
            'title': 'Medicopter 117 - Angst!',
            'description': 'Angst!',
            'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
        },
        'params': {
            'skip_download': True,
        },
    },
    {
        'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
        'file': '124903.flv',
        'info_dict': {
            'upload_date': '20130101',
            'title': 'Top Gear vom 01.01.2013',
            'description': 'Episode 1',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Only works from Germany',
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-
+        video_page_url = 'http://%s/' % mobj.group('domain')
        webpage_url = 'http://' + mobj.group('url')
        video_page_url = 'http://' + mobj.group('domain') + '/'
        video_id = mobj.group('video_id')
-        webpage = self._download_webpage(webpage_url, video_id)
+        webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
-        note_m = re.search(r'''(?sx)
+        mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
-            <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
+        if mobj:
-            <div[ ]id="playerteaser">''', webpage)
+            raise ExtractorError(clean_html(mobj.group(1)), expected=True)
-        if note_m:
+
-            msg = clean_html(note_m.group(1))
+        title = self._og_search_title(webpage)
-            raise ExtractorError(msg)
+        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage, default=None)
        upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
        mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
        duration = int(mobj.group('seconds')) if mobj else None
        video_title = self._html_search_regex(
            r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
            webpage, 'title')
        playerdata_url = self._html_search_regex(
-            r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
+            r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
            webpage, 'playerdata_url')
-        playerdata = self._download_webpage(playerdata_url, video_id)
+        playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
-        mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)
+
-        if mobj:
+        videoinfo = playerdata.find('./playlist/videoinfo')
-            video_description = mobj.group('description')
+        
-            if mobj.group('upload_date_Y'):
+        formats = []
-                video_upload_date = mobj.group('upload_date_Y')
+        for filename in videoinfo.findall('filename'):
-            elif mobj.group('upload_date_y'):
+            mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
-                video_upload_date = '20' + mobj.group('upload_date_y')
+            if mobj:
                fmt = {
                    'url': mobj.group('url'),
                    'play_path': 'mp4:' + mobj.group('play_path'),
                    'page_url': video_page_url,
                    'player_url': video_page_url + 'includes/vodplayer.swf',
                }
            else:
-                video_upload_date = None
+                fmt = {
-            if video_upload_date:
+                    'url': filename.text,
-                video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d')
+                }
-        else:
+            fmt.update({
-            video_description = None
+                'width': int_or_none(filename.get('width')),
-            video_upload_date = None
+                'height': int_or_none(filename.get('height')),
-            self._downloader.report_warning('Unable to extract description and upload date')
+                'vbr': int_or_none(filename.get('bitrate')),
-
+                'ext': 'flv',
-        # Thumbnail: not every video has an thumbnail
+            })
-        mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
+            formats.append(fmt)
        if mobj:
            video_thumbnail = mobj.group('thumbnail')
        else:
            video_thumbnail = None
        mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
        if mobj is None:
            raise ExtractorError('Unable to extract media URL')
        video_url = mobj.group('url')
        video_play_path = 'mp4:' + mobj.group('play_path')
        video_player_url = video_page_url + 'includes/vodplayer.swf'
        return {
            'id': video_id,
-            'url': video_url,
+            'title': title,
-            'play_path': video_play_path,
+            'description': description,
-            'page_url': video_page_url,
+            'thumbnail': thumbnail,
-            'player_url': video_player_url,
+            'upload_date': upload_date,
-            'ext': 'flv',
+            'duration': duration,
-            'title': video_title,
+            'formats': formats,
            'description': video_description,
            'upload_date': video_upload_date,
            'thumbnail': video_thumbnail,
        }
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@ -0,0 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import os.path
 import re
 from .common import InfoExtractor
 class SaveFromIE(InfoExtractor):
    IE_NAME = 'savefrom.net'
    _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
    _TEST = {
        'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
        'info_dict': {
            'id': 'UlVRAPW2WJY',
            'ext': 'mp4',
            'title': 'About Team Radical MMA | MMA Fighting',
            'upload_date': '20120816',
            'uploader': 'Howcast',
            'uploader_id': 'Howcast',
            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
        },
        'params': {
            'skip_download': True
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = os.path.splitext(url.split('/')[-1])[0]
        return {
            '_type': 'url',
            'id': video_id,
            'url': mobj.group('url'),
        }
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -217,7 +217,7 @@ class SoundcloudIE(InfoExtractor):
        return self._extract_info_dict(info, full_title, secret_token=token)
 class SoundcloudSetIE(SoundcloudIE):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
+    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
    IE_NAME = 'soundcloud:set'
    # it's in tests/test_playlists.py
    _TESTS = []
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 import os
 import re
 from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
    unified_strdate,
    str_to_int,
    int_or_none,
 )
-from ..aes import (
+from ..aes import aes_decrypt_text
    aes_decrypt_text
 )
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
    _TEST = {
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        'file': '103545.mp4',
+        'md5': '8bbfde12b101204b39e4b9fe7eb67095',
        'md5': '1b3f55e345500552dbc252a3e9c1af43',
        'info_dict': {
-            "uploader": "oreusz",
+            'id': '103545',
-            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            'ext': 'mp4',
-            "description": "Crazy Bitch X rated music video.",
+            'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
-            "age_limit": 18,
+            'description': 'Crazy Bitch X rated music video.',
            'uploader': 'oreusz',
            'uploader_id': '124697',
            'upload_date': '20070508',
            'age_limit': 18,
        }
    }
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+        title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        uploader_id = self._html_search_regex(
            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
        upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
        if upload_date:
            upload_date = unified_strdate(upload_date)
        view_count = self._html_search_regex(
            r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        comment_count = int_or_none(self._html_search_regex(
            r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
        formats = []
        for video_url in video_urls:
            path = compat_urllib_parse_urlparse(video_url).path
            extension = os.path.splitext(path)[1][1:]
            format = path.split('/')[4].split('_')[:2]
            resolution, bitrate_str = format
            format = "-".join(format)
-            height = int(resolution.rstrip('P'))
+            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('K'))
+            tbr = int(bitrate_str.rstrip('Kk'))
            formats.append({
                'url': video_url,
                'ext': extension,
                'resolution': resolution,
                'format': format,
                'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
        return {
            'id': video_id,
-            'uploader': video_uploader,
+            'title': title,
            'title': video_title,
            'thumbnail': thumbnail,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/testurl.py
+++ b/youtube_dl/extractor/testurl.py
@ -39,6 +39,8 @@ class TestURLIE(InfoExtractor):
                    ('Found multiple matching extractors: %s' %
                        ' '.join(ie.IE_NAME for ie in matching_extractors)),
                    expected=True)
        else:
            extractor = matching_extractors[0]
        num_str = mobj.group('num')
        num = int(num_str) if num_str else 0
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
 class ThePlatformIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
-           (?P<config>[^/\?]+/(?:swf|config)/select/)?
+           (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
         |theplatform:)(?P<id>[^/\?&]+)'''
    _TEST = {
@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor):
        f4m_node = body.find(_x('smil:seq/smil:video'))
        if f4m_node is not None:
            f4m_url = f4m_node.attrib['src']
            if 'manifest.f4m?' not in f4m_url:
                f4m_url += '?'
            # the parameters are from syfy.com, other sites may use others,
            # they also work for nbc.com
            f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
            formats = [{
                'ext': 'flv',
-                # the parameters are from syfy.com, other sites may use others
+                'url': f4m_url,
                'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3',
            }]
        else:
            base_url = head.find(_x('smil:meta')).attrib['base']
@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor):
        if mobj.group('config'):
            config_url = url+ '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')
            config_json = self._download_webpage(config_url, video_id, u'Downloading config')
            config = json.loads(config_json)
-            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4'
+            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
        else:
            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
                'format=smil&mbr=true'.format(video_id))
--- a/youtube_dl/extractor/tinypic.py
+++ b/youtube_dl/extractor/tinypic.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from youtube_dl.utils import ExtractorError
+from ..utils import ExtractorError
 class TinyPicIE(InfoExtractor):
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -0,0 +1,44 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class TruTubeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
    _TEST = {
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
        'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
        'info_dict': {
            'id': '14880',
            'ext': 'flv',
            'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
            'thumbnail': 're:^http:.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage).strip()
        thumbnail = self._search_regex(
            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
        all_formats = re.finditer(
            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
        formats = [{
            'format_id': m.group('key'),
            'quality': -i,
            'url': m.group('url'),
        } for i, m in enumerate(all_formats)]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@ -0,0 +1,84 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
    clean_html,
    int_or_none,
 )
 class TvigleIE(InfoExtractor):
    IE_NAME = 'tvigle'
    IE_DESC = 'Интернет-телевидение Tvigle.ru'
    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
    _TESTS = [
        {
            'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
            'md5': '09afba4616666249f087efc6dcf83cb3',
            'info_dict': {
                'id': '503081',
                'ext': 'flv',
                'title': 'Брат 2 ',
                'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
                'upload_date': '20110919',
            },
        },
        {
            'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
            'info_dict': {
                'id': '676433',
                'ext': 'flv',
                'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
                'description': 'md5:027f7dc872948f14c96d19b4178428a4',
                'upload_date': '20121218',
            },
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        video_data = self._download_xml(
            'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
        video = video_data.find('./video')
        title = video.get('name')
        description = video.get('anons')
        if description:
            description = clean_html(description)
        thumbnail = video_data.get('img')
        upload_date = unified_strdate(video.get('date'))
        like_count = int_or_none(video.get('vtp'))
        formats = []
        for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
            video_url = video.get(format_id)
            if not video_url:
                continue
            formats.append({
                'url': video_url,
                'format_id': format_id,
                'format_note': format_note,
                'quality': num,
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'like_count': like_count,
            'age_limit': 18,
            'formats': formats,
        }
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@ -4,6 +4,7 @@ import re
 import json
 from .common import InfoExtractor
 from ..utils import compat_urllib_request
 class VeohIE(InfoExtractor):
@ -24,6 +25,13 @@ class VeohIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        age_limit = 0
        if 'class="adultwarning-container"' in webpage:
            self.report_age_confirmation()
            age_limit = 18
            request = compat_urllib_request.Request(url)
            request.add_header('Cookie', 'confirmedAdult=true')
            webpage = self._download_webpage(request, video_id)
        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
        if m_youtube is not None:
@ -44,4 +52,5 @@ class VeohIE(InfoExtractor):
            'thumbnail': info.get('highResImage') or info.get('medResImage'),
            'description': info['description'],
            'view_count': info['views'],
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/vesti.py
+++ b/youtube_dl/extractor/vesti.py
@ -113,8 +113,8 @@ class VestiIE(InfoExtractor):
        priority_transport = playlist['priority_transport']
        thumbnail = media['picture']
-        width = media['width']
+        width = int_or_none(media['width'])
-        height = media['height']
+        height = int_or_none(media['height'])
        description = media['anons']
        title = media['title']
        duration = int_or_none(media.get('duration'))
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -24,9 +24,10 @@ class VevoIE(InfoExtractor):
        (?P<id>[^&?#]+)'''
    _TESTS = [{
        'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        'file': 'GB1101300280.mp4',
        "md5": "06bea460acb744eab74a9d7dcb4bfd61",
        'info_dict': {
            'id': 'GB1101300280',
            'ext': 'mp4',
            "upload_date": "20130624",
            "uploader": "Hurts",
            "title": "Somebody to Die For",
@ -34,6 +35,33 @@ class VevoIE(InfoExtractor):
            "width": 1920,
            "height": 1080,
        }
    }, {
        'note': 'v3 SMIL format',
        'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
        'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
        'info_dict': {
            'id': 'USUV71302923',
            'ext': 'mp4',
            'upload_date': '20140219',
            'uploader': 'Cassadee Pope',
            'title': 'I Wish I Could Break Your Heart',
            'duration': 226.101,
            'age_limit': 0,
        }
    }, {
        'note': 'Age-limited video',
        'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
        'info_dict': {
            'id': 'USRV81300282',
            'ext': 'mp4',
            'age_limit': 18,
            'title': 'Tunnel Vision (Explicit)',
            'uploader': 'Justin Timberlake',
            'upload_date': '20130704',
        },
        'params': {
            'skip_download': 'true',
        }
    }]
    _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@ -105,9 +133,31 @@ class VevoIE(InfoExtractor):
        video_info = self._download_json(json_url, video_id)['video']
        formats = self._formats_from_json(video_info)
        is_explicit = video_info.get('isExplicit')
        if is_explicit is True:
            age_limit = 18
        elif is_explicit is False:
            age_limit = 0
        else:
            age_limit = None
        # Download SMIL
        smil_blocks = sorted((
            f for f in video_info['videoVersions']
            if f['sourceType'] == 13),
            key=lambda f: f['version'])
        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
            self._SMIL_BASE_URL, video_id, video_id.lower())
        if smil_blocks:
            smil_url_m = self._search_regex(
                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
                fatal=False)
            if smil_url_m is not None:
                smil_url = smil_url_m
        try:
            smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
                self._SMIL_BASE_URL, video_id, video_id.lower())
            smil_xml = self._download_webpage(smil_url, video_id,
                                              'Downloading SMIL info')
            formats.extend(self._formats_from_smil(smil_xml))
@ -128,4 +178,5 @@ class VevoIE(InfoExtractor):
            'upload_date': upload_date.strftime('%Y%m%d'),
            'uploader': video_info['mainArtists'][0]['artistName'],
            'duration': video_info['duration'],
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/videobam.py
+++ b/youtube_dl/extractor/videobam.py
@ -0,0 +1,80 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import int_or_none
 class VideoBamIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
    _TESTS = [
        {
            'url': 'http://videobam.com/OiJQM',
            'md5': 'db471f27763a531f10416a0c58b5a1e0',
            'info_dict': {
                'id': 'OiJQM',
                'ext': 'mp4',
                'title': 'Is Alcohol Worse Than Ecstasy?',
                'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
                'uploader': 'frihetsvinge',
            },
        },
        {
            'url': 'http://videobam.com/pqLvq',
            'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
            'note': 'HD video',
            'info_dict': {
                'id': 'pqLvq',
                'ext': 'mp4',
            }
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
        formats = []
        for preference, format_id in enumerate(['low', 'high']):
            mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
            if not mobj:
                continue
            formats.append({
                'url': mobj.group('url'),
                'ext': 'mp4',
                'format_id': format_id,
                'preference': preference,
            })
        if not formats:
            player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
            formats = [{
                'url': item['url'],
                'ext': 'mp4',
            } for item in player_config['playlist'] if 'autoPlay' in item]
        self._sort_formats(formats)
        title = self._og_search_title(page, default='VideoBam', fatal=False)
        description = self._og_search_description(page, default=None)
        thumbnail = self._og_search_thumbnail(page)
        uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
        view_count = int_or_none(
            self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'view_count': view_count,
            'formats': formats,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -221,7 +221,9 @@ class VimeoIE(SubtitlesInfoExtractor):
        # Extract video thumbnail
        video_thumbnail = config["video"].get("thumbnail")
        if video_thumbnail is None:
-            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
+            video_thumbs = config["video"].get("thumbs")
            if video_thumbs and isinstance(video_thumbs, dict):
                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
        # Extract video description
        video_description = None
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -1,8 +1,10 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class VineIE(InfoExtractor):
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
        'info_dict': {
            'id': 'b9KOOWX7HUx',
            'ext': 'mp4',
            'uploader': 'Jack Dorsey',
            'title': 'Chicken.',
            'description': 'Chicken.',
            'upload_date': '20130519',
            'uploader': 'Jack Dorsey',
            'uploader_id': '76',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'https://vine.co/v/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
-        self.report_extraction(video_id)
+        webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
-        video_url = self._html_search_meta('twitter:player:stream', webpage,
+        data = json.loads(self._html_search_regex(
-            'video URL')
+            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
-        uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
+        formats = [
-            webpage, 'uploader', fatal=False, flags=re.DOTALL)
+            {
                'url': data['videoLowURL'],
                'ext': 'mp4',
                'format_id': 'low',
            },
            {
                'url': data['videoUrl'],
                'ext': 'mp4',
                'format_id': 'standard',
            }
        ]
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': data['description'],
-            'uploader': uploader,
+            'thumbnail': data['thumbnailUrl'],
            'upload_date': unified_strdate(data['created']),
            'uploader': data['username'],
            'uploader_id': data['userIdStr'],
            'like_count': data['likes']['count'],
            'comment_count': data['comments']['count'],
            'repost_count': data['reposts']['count'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@ -16,7 +16,7 @@ from ..utils import (
 class VKIE(InfoExtractor):
    IE_NAME = 'vk.com'
-    _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
+    _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
    _NETRC_MACHINE = 'vk'
    _TESTS = [
@ -42,6 +42,18 @@ class VKIE(InfoExtractor):
                'duration': 558,
            }
        },
        {
            'note': 'Embedded video',
            'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
            'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
            'info_dict': {
                'id': '162925554',
                'ext': 'mp4',
                'uploader': 'Vladimir Gavrin',
                'title': 'Lin Dan',
                'duration': 101,
            }
        },
        {
            'url': 'http://vk.com/video-8871596_164049491',
            'md5': 'a590bcaf3d543576c9bd162812387666',
@ -54,7 +66,7 @@ class VKIE(InfoExtractor):
                'duration': 8352,
            },
            'skip': 'Requires vk account credentials',
-        }
+        },
    ]
    def _login(self):
@ -82,7 +94,10 @@ class VKIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('videoid')
        if not video_id:
            video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
        info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
        info_page = self._download_webpage(info_url, video_id)
--- a/youtube_dl/extractor/worldstarhiphop.py
+++ b/youtube_dl/extractor/worldstarhiphop.py
@ -22,7 +22,7 @@ class WorldStarHipHopIE(InfoExtractor):
        webpage_src = self._download_webpage(url, video_id)
        m_vevo_id = re.search(r'videoId=(.*?)&amp?',
-            webpage_src)
+                              webpage_src)
        if m_vevo_id is not None:
            self.to_screen(u'Vevo video detected:')
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -103,6 +103,7 @@ class XHamsterIE(InfoExtractor):
        }]
        if not hd:
            mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url')
            webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
            if is_hd(webpage):
                video_url = extract_video_url(webpage)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@ -7,19 +7,24 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    parse_duration,
    str_to_int,
 )
 class XTubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
    _TEST = {
        'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
        'file': 'kVTUy_G222_.mp4',
        'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
        'info_dict': {
-            "title": "strange erotica",
+            'id': 'kVTUy_G222_',
-            "description": "surreal gay themed erotica...almost an ET kind of thing",
+            'ext': 'mp4',
-            "uploader": "greenshowers",
+            'title': 'strange erotica',
-            "age_limit": 18,
+            'description': 'surreal gay themed erotica...almost an ET kind of thing',
            'uploader': 'greenshowers',
            'duration': 450,
            'age_limit': 18,
        }
    }
@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
+        video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
-        video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
+        video_uploader = self._html_search_regex(
-        video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
+            r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
-        video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
+        video_description = self._html_search_regex(
            r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
        video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
        duration = parse_duration(self._html_search_regex(
            r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
        view_count = self._html_search_regex(
            r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        comment_count = self._html_search_regex(
            r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
        if comment_count:
            comment_count = str_to_int(comment_count)
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
        format = path.split('/')[5].split('_')[:2]
@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor):
            'title': video_title,
            'uploader': video_uploader,
            'description': video_description,
            'duration': duration,
            'view_count': view_count,
            'comment_count': comment_count,
            'url': video_url,
            'ext': extension,
            'format': format,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -29,7 +29,6 @@ from ..utils import (
    ExtractorError,
    int_or_none,
    PagedList,
    RegexNotFoundError,
    unescapeHTML,
    unified_strdate,
    orderedSet,
@ -200,9 +199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
+        '138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
-        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
        # Dash mp4 audio
        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
@ -1489,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        # the id of the playlist is just 'RD' + video_id
        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
-        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-            get_element_by_attribute('class', 'title ', webpage))
+        title_span = (search_title('playlist-title') or
            search_title('title long-title') or search_title('title'))
        title = clean_html(title_span)
-        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        video_re = r'''(?x)data-video-username="(.*?)".*?
-        ids = orderedSet(re.findall(video_re, webpage))
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
        # Some of the videos may have been deleted, their username field is empty
        ids = [video_id for (username, video_id) in matches if username]
        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, title)
@ -1642,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@ -1741,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
                  for video_id in video_ids]
        return self.playlist_result(videos, query)
 class YoutubeSearchDateIE(YoutubeSearchIE):
    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
    _SEARCH_KEY = 'ytsearchdate'
    IE_DESC = u'YouTube.com searches, newest videos first'
 class YoutubeSearchURLIE(InfoExtractor):
    IE_DESC = u'YouTube.com search URLs'
    IE_NAME = u'youtube:search_url'
    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        query = compat_urllib_parse.unquote_plus(mobj.group('query'))
        webpage = self._download_webpage(url, query)
        result_code = self._search_regex(
            r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
        part_codes = re.findall(
            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
        entries = []
        for part_code in part_codes:
            part_title = self._html_search_regex(
                r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
            part_url_snippet = self._html_search_regex(
                r'(?s)href="([^"]+)"', part_code, 'item URL')
            part_url = compat_urlparse.urljoin(
                'https://www.youtube.com/', part_url_snippet)
            entries.append({
                '_type': 'url',
                'url': part_url,
                'title': part_title,
            })
        return {
            '_type': 'playlist',
            'entries': entries,
            'title': query,
        }
 class YoutubeShowIE(InfoExtractor):
    IE_DESC = u'YouTube.com (multi-season) shows'
    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@ -1,4 +1,5 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
    _TEST = {
-        u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
+        'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
-        u"file": u"2037704.webm",
+        'info_dict': {
-        u"info_dict": {
+            'id': '2037704',
-            u"upload_date": u"20131127",
+            'ext': 'webm',
-            u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
+            'title': 'ZDFspezial - Ende des Machtpokers',
-            u"uploader": u"spezial",
+            'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
-            u"title": u"ZDFspezial - Ende des Machtpokers"
+            'duration': 1022,
            'uploader': 'spezial',
            'uploader_id': '225948',
            'upload_date': '20131127',
        },
-        u"skip": u"Videos on ZDF.de are depublicised in short order",
+        'skip': 'Videos on ZDF.de are depublicised in short order',
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
-        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        doc = self._download_xml(
            xml_url, video_id,
-            note=u'Downloading video info',
+            note='Downloading video info',
-            errnote=u'Failed to download video info')
+            errnote='Failed to download video info')
        title = doc.find('.//information/title').text
        description = doc.find('.//information/detail').text
        duration = int(doc.find('.//details/lengthSec').text)
        uploader_node = doc.find('.//details/originChannelTitle')
        uploader = None if uploader_node is None else uploader_node.text
-        duration_str = doc.find('.//details/length').text
+        uploader_id_node = doc.find('.//details/originChannelId')
-        duration_m = re.match(r'''(?x)^
+        uploader_id = None if uploader_id_node is None else uploader_id_node.text
            (?P<hours>[0-9]{2})
            :(?P<minutes>[0-9]{2})
            :(?P<seconds>[0-9]{2})
            (?:\.(?P<ms>[0-9]+)?)
            ''', duration_str)
        duration = (
            (
                (int(duration_m.group('hours')) * 60 * 60) +
                (int(duration_m.group('minutes')) * 60) +
                int(duration_m.group('seconds'))
            )
            if duration_m
            else None
        )
        upload_date = unified_strdate(doc.find('.//details/airtime').text)
        def xml_to_format(fnode):
            video_url = fnode.find('url').text
-            is_available = u'http://www.metafilegenerator' not in video_url
+            is_available = 'http://www.metafilegenerator' not in video_url
            format_id = fnode.attrib['basetype']
            format_m = re.match(r'''(?x)
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
            quality = fnode.find('./quality').text
            abr = int(fnode.find('./audioBitrate').text) // 1000
-            vbr = int(fnode.find('./videoBitrate').text) // 1000
+            vbr_node = fnode.find('./videoBitrate')
            vbr = None if vbr_node is None else int(vbr_node.text) // 1000
-            format_note = u''
+            width_node = fnode.find('./width')
            width = None if width_node is None else int_or_none(width_node.text)
            height_node = fnode.find('./height')
            height = None if height_node is None else int_or_none(height_node.text)
            format_note = ''
            if not format_note:
                format_note = None
            return {
-                'format_id': format_id + u'-' + quality,
+                'format_id': format_id + '-' + quality,
                'url': video_url,
                'ext': ext,
                'acodec': format_m.group('acodec'),
                'vcodec': format_m.group('vcodec'),
                'abr': abr,
                'vbr': vbr,
-                'width': int_or_none(fnode.find('./width').text),
+                'width': width,
-                'height': int_or_none(fnode.find('./height').text),
+                'height': height,
                'filesize': int_or_none(fnode.find('./filesize').text),
                'format_note': format_note,
                'protocol': proto,
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': description,
            'uploader': uploader,
            'duration': duration,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
            'formats': formats,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import contextlib
 import ctypes
 import datetime
 import email.utils
@ -771,6 +772,7 @@ def unified_strdate(date_str):
        '%B %d %Y',
        '%b %d %Y',
        '%Y-%m-%d',
        '%d.%m.%Y',
        '%d/%m/%Y',
        '%Y/%m/%d %H:%M:%S',
        '%Y-%m-%d %H:%M:%S',
@ -779,6 +781,7 @@ def unified_strdate(date_str):
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
        '%Y-%m-%dT%H:%M:%S.%f',
        '%Y-%m-%dT%H:%M',
    ]
    for expression in format_expressions:
@ -1244,3 +1247,19 @@ except TypeError:
 else:
    struct_pack = struct.pack
    struct_unpack = struct.unpack
 def read_batch_urls(batch_fd):
    def fixup(url):
        if not isinstance(url, compat_str):
            url = url.decode('utf-8', 'replace')
        BOM_UTF8 = u'\xef\xbb\xbf'
        if url.startswith(BOM_UTF8):
            url = url[len(BOM_UTF8):]
        url = url.strip()
        if url.startswith(('#', ';', ']')):
            return False
        return url
    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.21.1'
+__version__ = '2014.03.04.2'
Author	SHA1	Message	Date
Philipp Hagemeister	d63516e9cd	release 2014.03.04.2	2014-03-04 20:56:31 +01:00
Sergey M․	e477dcf649	[vesti] Fix width and height	2014-03-04 21:40:35 +07:00
Sergey M․	9d3f7781f3	[soundcloud:set] Fix _VALID_URL regex (Closes #2509 )	2014-03-04 21:29:14 +07:00
Sergey M․	c7095dada3	[tvigle] Add support for another video link format	2014-03-04 19:22:48 +07:00
Sergey M․	607dbbad76	[xtube] Fix extraction add more metafields	2014-03-04 16:12:11 +07:00
Philipp Hagemeister	17b75c0de1	Document width, height, and resolution (#1445 )	2014-03-04 03:49:33 +01:00
Philipp Hagemeister	ab24f4f3be	[facebook] Use consistent quotes	2014-03-04 03:49:12 +01:00
Philipp Hagemeister	e1a52d9e10	release 2014.03.04.1	2014-03-04 03:40:00 +01:00
Philipp Hagemeister	d0ff838433	[facebook] Correct regexp	2014-03-04 03:39:45 +01:00
Philipp Hagemeister	b37b94501c	[facebook] Fix login detection (#2505 )	2014-03-04 03:39:04 +01:00
Philipp Hagemeister	cb3bb2cfef	[facebook] Modernize	2014-03-04 03:36:54 +01:00
Philipp Hagemeister	e2cc7983e9	release 2014.03.04	2014-03-04 03:32:54 +01:00
Philipp Hagemeister	c9ae7b9565	[youtube] Add support for search result URLs (Fixes #2495 )	2014-03-04 03:32:28 +01:00
Philipp Hagemeister	86fb4347f7	release 2014.03.03	2014-03-03 13:51:25 +01:00
Philipp Hagemeister	2fcec131f5	Credit @juancri for canal13cl (#2498 )	2014-03-03 12:54:01 +01:00
Philipp Hagemeister	9f62eaf4ef	[canal13cl] Add test and improve extraction (#2498 )	2014-03-03 12:53:11 +01:00
Philipp Hagemeister	f92259c026	Merge remote-tracking branch 'origin/master'	2014-03-03 12:34:34 +01:00
Philipp Hagemeister	0afef30b23	Add display_id field	2014-03-03 12:06:28 +01:00
Philipp Hagemeister	dcdfd1c711	Merge remote-tracking branch 'origin/master'	2014-03-03 12:05:59 +01:00
Sergey M․	2acc1f8f50	[orf] Fix segments extraction (Closes #2501 )	2014-03-03 18:05:46 +07:00
Sergey M․	2c39b0c695	[tinypic] Fix import	2014-03-03 17:40:12 +07:00
Sergey M․	e77c5b4f63	[4tube] Fix import	2014-03-03 17:39:49 +07:00
Juan C. Olivares	409a16cb72	Allowing URLs for 13.cl without the /programas prefix	2014-03-02 23:41:13 -03:00
Juan C. Olivares	94d5e90b4f	FIX: Typo in the extractor's name	2014-03-02 23:40:35 -03:00
Juan C. Olivares	2d73b45805	Adding support for 13.cl	2014-03-02 23:15:12 -03:00
Sergey M․	271a2dbfa2	[tvigle] Add age limit	2014-03-02 22:07:18 +07:00
Sergey M․	bf4adcac66	[tvigle] Fix like count	2014-03-02 20:56:36 +07:00
Sergey M․	fb8b8fdd62	[tvigle] Add support for tvigle.ru	2014-03-02 19:59:34 +07:00
Sergey M․	5a0b26252e	[ceskatelevize] Simplify	2014-03-01 23:05:33 +07:00
Sergey M․	7d78f0cc48	[ceskatelevize] Fix video availability check and add geo unrestricted test	2014-03-01 22:54:37 +07:00
Sergey M․	f00fc78674	Merge branch '_ceskatelevize' of https://github.com/pulpe/youtube-dl into pulpe-_ceskatelevize	2014-03-01 22:26:18 +07:00
pulpe	392017874c	[CeskaTelevize] raise ExtractorError if you are outside of CR	2014-03-01 16:17:29 +01:00
pulpe	c3cb92d1ab	[CeskaTelevize] fix python3 support @dstftw	2014-03-01 16:02:51 +01:00
pulpe	aa5590fa07	skip test	2014-03-01 12:34:01 +01:00
pulpe	8cfb5bbf92	[CeskaTelevize] Add initial support for ceskatelevize.cz	2014-03-01 11:47:52 +01:00
Sergey M.	69bb54ebf9	[mailru] Add support for mail.ru video	2014-03-01 16:34:38 +07:00
Sergey M.	ca97a56e4b	[vk] Add support for embedded videos (Closes #2473 )	2014-02-28 23:51:54 +07:00
Sergey M.	fc26f3b4c2	[lifenews] Add support for multiple videos on the same page (#2482 )	2014-02-28 22:52:06 +07:00
Philipp Hagemeister	f604c93c64	[gdcvault] Formatting / Remove unused variables	2014-02-28 15:50:19 +01:00
Philipp Hagemeister	dc3727b65c	Credit @mnem dor GDCVault	2014-02-28 15:14:25 +01:00
Philipp Hagemeister	aba3231de1	Merge remote-tracking branch 'mnem/gdc-vault'	2014-02-28 12:52:11 +01:00
Philipp Hagemeister	9193bab91d	release 2014.02.28	2014-02-28 12:31:37 +01:00
Philipp Hagemeister	fbcf3e416d	Merge pull request #2463 from rzhxeo/resume Set resume_len to 0 if download is restarted	2014-02-28 12:30:34 +01:00
Sergey M.	c0e5d85631	[vimeo] Improve thumbnail extraction	2014-02-28 18:00:12 +07:00
Sergey M.	ca7fa3dcb3	[vimeo] Fix thumbs extraction (Closes #2480 )	2014-02-28 17:43:54 +07:00
Jaime Marquínez Ferrándiz	4ccfba28d9	[collegehumor] Fix test's uploader field	2014-02-27 19:10:30 +01:00
Jaime Marquínez Ferrándiz	abb82f1ddc	[mixcloud] Unquote the track id (#2462 )	2014-02-27 18:58:09 +01:00
Philipp Hagemeister	cda008cff1	release 2014.02.27.1	2014-02-27 16:09:58 +01:00
Sergey M.	1877a14049	[lifenews] Switch to non-mobile webpage version (Fixes #2476 )	2014-02-27 21:45:34 +07:00
David Wagner	546582ec3e	Removing MD5 check for ethereal file.	2014-02-27 14:28:55 +00:00
David Wagner	4534485586	Fix test, remove unused, tidy quotes and brackets	2014-02-27 12:50:48 +00:00
Sergey M.	a9ab8855e4	[prosiebensat1] Fix typo	2014-02-27 17:53:09 +07:00
Sergey M.	8a44ef6868	[prosiebensat1] Add rtmpe support	2014-02-27 17:52:52 +07:00
Sergey M.	0c7214c404	[prosiebensat1] Add support for ProSiebenSat.1 Digital sites (Closes #2346 #2469)	2014-02-27 17:44:29 +07:00
Sergey M.	4cf9654693	Add one more format to unified_strdate	2014-02-27 17:44:05 +07:00
David Wagner	50a138d95c	Add support for authenticated videos	2014-02-27 10:32:31 +00:00
Philipp Hagemeister	91346358b0	release 2014.02.27	2014-02-27 07:22:34 +01:00
Philipp Hagemeister	f3783d4b77	Merge branch 'master' of github.com:rg3/youtube-dl	2014-02-27 07:22:22 +01:00
Philipp Hagemeister	89ef304bed	[generic] Add support for <meta redirect> Fixes #413	2014-02-27 07:22:02 +01:00
David Wagner	83cebb8b7a	Add support for FLV videos with speaker decks	2014-02-27 00:20:34 +00:00
David Wagner	9e68f9fdf1	Extractor for non-password protected GDC Vault videos	2014-02-26 22:33:33 +00:00
Sergey M.	2acea5c03d	[mit] Fix MITIE test	2014-02-26 18:09:43 +07:00
Sergey M.	978177527e	[rtlnow] Remove unused import	2014-02-26 18:02:17 +07:00
Sergey M.	2648c436f3	Merge pull request #2464 from rzhxeo/xhamster [XHamsterIE] Make hd video search more robust	2014-02-26 02:53:54 -08:00
Sergey M.	33f1f2c455	[rtlnow] Fix duration extraction	2014-02-26 17:49:49 +07:00
Sergey M.	995befe0e9	[rtlnow] Replace n-tvnow.de test	2014-02-26 17:43:56 +07:00
Sergey M.	1bb92aff55	[rtlnow] Modernize and add f4m support	2014-02-26 17:36:16 +07:00
rzhxeo	b8e1471d3a	[XHamsterIE] Make hd video search more robust	2014-02-26 10:01:44 +01:00
rzhxeo	60daf7f0bb	Set resume_len to 0 if download is restarted	2014-02-26 02:47:27 +01:00
Philipp Hagemeister	a83a3139d1	[mit] Add import	2014-02-26 00:41:13 +01:00
Philipp Hagemeister	fdb7ca3b8d	release 2014.02.26	2014-02-26 00:32:22 +01:00
Philipp Hagemeister	0d7caf5cdf	Merge remote-tracking branch 'ruuk/master'	2014-02-26 00:31:08 +01:00
Philipp Hagemeister	a339d7ba91	Credit @amlweems for ocw.mit (#2460 )	2014-02-26 00:30:47 +01:00
Philipp Hagemeister	7216de55d6	[mit] Fix ocw tests	2014-02-26 00:29:45 +01:00
Philipp Hagemeister	2437fbca64	[tests] Raise an exception if test definition is invalid (Found in #2460 )	2014-02-26 00:12:02 +01:00
Philipp Hagemeister	7d75d06b78	Merge branch 'ocw-mit-edu' of https://github.com/amlweems/youtube-dl	2014-02-26 00:09:42 +01:00
Philipp Hagemeister	13ef5648c4	Merge branch 'master' of github.com:rg3/youtube-dl	2014-02-26 00:07:45 +01:00
Philipp Hagemeister	5b2478e2ba	[mit] Modernize	2014-02-26 00:06:31 +01:00
Jaime Marquínez Ferrándiz	8b286571c3	[mixcloud] Fix _VALID_RE (fixes #2462 ) Accept any character except `/` for uploader and the name, caused problems with non ASCII characters	2014-02-26 00:04:03 +01:00
Jaime Marquínez Ferrándiz	f3ac523794	Merge pull request #2461 from niebles/master Update __init__.py `io` wasn't imported.	2014-02-26 00:00:57 +01:00
Jaime Marquínez Ferrándiz	020cf5ebfd	[nbc] Add an extractor for the main nbc.com site Some of the videos are encrypted, the f4m downloader doesn’t support them.	2014-02-25 23:57:54 +01:00
ruuk	54ab193970	Extract thumbnail with _og_search_thumbnail	2014-02-25 14:41:36 -08:00
niebles	8f563f32ab	Update __init__.py	2014-02-25 17:31:16 -05:00
Anthony Weems	151bae3566	Add support for ocw.mit.edu video lectures	2014-02-25 14:44:34 -06:00
ruuk	76df418cba	Add thumbnail for metacafe	2014-02-25 12:04:44 -08:00
Jaime Marquínez Ferrándiz	d0a72674c6	[crunchyroll] Use `enumerate`	2014-02-25 20:51:51 +01:00
Sergey M.	1d430674c7	[crunchyroll] Handle error message	2014-02-25 20:30:17 +07:00
Sergey M	70cb73922b	[crunchyroll] Fix subtitle lang code extraction	2014-02-25 20:29:53 +07:00
Sergey M	344400951c	[crunchyroll] Tidy and modernize	2014-02-25 20:29:53 +07:00
Jaime Marquínez Ferrándiz	ea5a0be811	Skip youtube toptracks test All the playlists return 500 errors.	2014-02-25 14:11:01 +01:00
Philipp Hagemeister	3c7fd0bdb2	release 2014.02.25.1	2014-02-25 11:15:55 +01:00
Philipp Hagemeister	6cadf8c858	[vevo] Add age_limit support	2014-02-25 11:15:34 +01:00
Philipp Hagemeister	27579b9e4c	[vevo] Add suppot for v3 SMIL URLs (Fixes #2409 )	2014-02-25 11:06:47 +01:00
Philipp Hagemeister	4d756a9cc0	[testurl] Fix case when only one IE matches	2014-02-25 10:43:34 +01:00
Philipp Hagemeister	3e668e05be	Merge pull request #2456 from AGSPhoenix/master [YT] Fix incorrect format code descriptions	2014-02-25 10:24:02 +01:00
AGSPhoenix	60d3a2e0f8	Fix incorrect format codes Corrects the descriptions for the DASH video format codes 264 and 138 (1440p and 2160p, respectively).	2014-02-24 21:29:37 -05:00
Philipp Hagemeister	cc3a3b6b47	release 2014.02.25	2014-02-25 01:45:10 +01:00
Philipp Hagemeister	eda1d49a62	Merge remote-tracking branch 'origin/master'	2014-02-25 01:45:00 +01:00
Philipp Hagemeister	62e609ab77	Ignore BOM in batch files (Fixes #2450 )	2014-02-25 01:43:17 +01:00
Jaime Marquínez Ferrándiz	2bfe4ead4b	[veoh] Allow to download videos with age protection (fixes #2455 )	2014-02-24 22:01:34 +01:00
Sergey M.	b1c6c32f78	[generic] Add support for nowvideo embedded videos	2014-02-24 23:37:42 +07:00
Philipp Hagemeister	f6acbdecf4	[podomatic] Use unicode_literals	2014-02-24 17:31:09 +01:00
Sergey M.	f1c9dfcc01	[nowvideo] Rewrite based on novamov extractor	2014-02-24 23:30:58 +07:00
Sergey M.	ce78943ae1	[novamov] Generalize extractor	2014-02-24 23:30:09 +07:00
Sergey M.	d6f0d86649	[novamov] Improve _VALID_URL	2014-02-24 22:01:19 +07:00
Jaime Marquínez Ferrándiz	5bb67dbfea	[cinemassacre] Modernize	2014-02-24 14:44:29 +01:00
Jaime Marquínez Ferrándiz	47610c4d3e	[cinemassacre] Fix extraction Now we download over http, we don't need rtmpdump.	2014-02-24 14:35:26 +01:00
Jaime Marquínez Ferrándiz	b732f3581f	[academicearth] Remove debug print	2014-02-24 14:20:17 +01:00
Jaime Marquínez Ferrándiz	9e57ce716f	[academicearth] Fix extraction The courses seems to be no longer available, changed the test to a playlist.	2014-02-24 14:18:12 +01:00
Jaime Marquínez Ferrándiz	cd7ee7aa44	[nbc] Modernize	2014-02-24 14:00:31 +01:00
Jaime Marquínez Ferrándiz	3cfe791473	[iprima] Add missing `)`	2014-02-24 13:50:53 +01:00
Philipp Hagemeister	973f2532f5	[iprima] Add support for -WEB URLs (Closes #2449 )	2014-02-24 10:12:36 +01:00
Philipp Hagemeister	bc3be21d59	[iprima] Clean up a little bit	2014-02-24 09:53:48 +01:00
Philipp Hagemeister	0bf5cf9886	release 2014.02.24	2014-02-24 09:44:22 +01:00
Sergey M.	919052d094	[zdf] Fix podcast extraction and use unicode literals (Closes #2446 )	2014-02-24 13:47:47 +07:00
Sergey M.	a2dafe2887	[youtube] Fix mix video regex Attributes' order in <li> is arbitrary and changes every time playlist page is fetched, so we can't rely on `data-index` to be before `data-video-username`.	2014-02-24 12:52:02 +07:00
Jaime Marquínez Ferrándiz	92661c994b	[normalboots] Modernize and simplify	2014-02-23 18:28:22 +01:00
Jaime Marquínez Ferrándiz	ffe8fe356a	[normalboots] Fix video url extraction	2014-02-23 18:06:51 +01:00
Jaime Marquínez Ferrándiz	bc2f773b4f	[youtube:playlist] Fix mixes extraction (fixes #2444 )	2014-02-23 17:17:36 +01:00
Sergey M.	f919201ecc	[vine] Extract more metadata and support low format	2014-02-23 19:02:31 +07:00
Sergey M.	7ff5d5c2e2	Add one more format to unified_strdate	2014-02-23 19:00:51 +07:00
Jaime Marquínez Ferrándiz	9b77f951c7	[breakcom] Fix error when calling _search_regex I passed `’webpage’` instead of the variable `webpage`.	2014-02-23 12:28:44 +01:00
Jaime Marquínez Ferrándiz	a25f2f990a	[breakcom] Fix info json extraction	2014-02-23 12:20:58 +01:00
Jaime Marquínez Ferrándiz	78b373975d	[vine] Fix uploader extraction	2014-02-23 12:08:30 +01:00
Philipp Hagemeister	2fcc873c4c	release 2014.02.22.1	2014-02-22 23:17:56 +01:00
Philipp Hagemeister	23c2baadb3	[videobam] Set age_limit to 18 From [their ToS](http://videobam.com/terms): "User must be eighteen 18[sic] years of age or older to use or access this web site."	2014-02-22 23:15:41 +01:00
Philipp Hagemeister	521ee82334	Fix imports	2014-02-22 23:03:12 +01:00
Philipp Hagemeister	1df96e59ce	[f4m] Clean up	2014-02-22 23:03:00 +01:00
Sergey M.	3e123c1e28	[videobam] Add support for videobam.com (Closes #2411 )	2014-02-23 04:50:05 +07:00
Philipp Hagemeister	f38da66731	Credit @soult for br	2014-02-22 20:19:41 +01:00
Philipp Hagemeister	06aabfc422	[br] Simplify	2014-02-22 20:17:26 +01:00
Philipp Hagemeister	1052d2bfec	Merge remote-tracking branch 'soult/br'	2014-02-22 17:14:47 +01:00
Philipp Hagemeister	5e0b652344	release 2014.02.22	2014-02-22 15:07:25 +01:00
Philipp Hagemeister	0f8f097183	[release.sh] Do not run tests by default We are at the point that testing takes waay too long for a release cycle, and fails way too often. Tests through travis are a better indicator than testing just before release.	2014-02-22 15:06:07 +01:00
Philipp Hagemeister	491ed3dda2	[trutube] Support multiple formats (#2433 )	2014-02-22 15:05:30 +01:00
Philipp Hagemeister	af284c6d1b	Merge remote-tracking branch 'JohnyMoSwag/master'	2014-02-22 14:38:42 +01:00
Philipp Hagemeister	41d3ec5fba	[savefrom] Add extractor (Fixes #2434 )	2014-02-22 14:36:16 +01:00
Philipp Hagemeister	0568c352f3	[canalc2] Modernize	2014-02-22 14:27:09 +01:00
Sergey M.	2e7b4cb714	[spankwire] Fix uploader id regex	2014-02-22 16:50:08 +07:00
Sergey M.	9767726b66	[spankwire] Improve and modernize	2014-02-22 16:45:03 +07:00
Johny Mo Swag	9ddfd84e41	added trutubeIE	2014-02-22 00:11:57 -08:00
David Triendl	7928024f57	[BR] Add basic test	2014-02-21 18:00:05 +01:00
David Triendl	3eb38acb43	[BR] Add "BR" extractor Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only supports videos. Audio and podcasts do not work yet with this extractor. 1: http://br.de/mediathek	2014-02-21 17:58:52 +01:00
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.21.1'`	`__version__ = '2014.03.04.2'`