release 2014.08.27.1

[ard] Add suppor for plain ARD downloads (Fixes #3546 )
[generic] Prevent <video> search from skipping over empty sources (#3546 )
2014-08-27 02:37:23 +02:00 · 2014-08-27 02:36:57 +02:00 · 2014-08-27 02:09:59 +02:00 · 2014-08-27 02:07:11 +02:00 · 2014-08-27 01:44:54 +02:00 · 2014-08-27 01:44:47 +02:00
50 changed files with 1728 additions and 307 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -26,5 +26,6 @@ updates_key.pem
 *.m4a
 *.m4v
 *.part
 *.swp
 test/testdata
 .tox
--- a/README.md
+++ b/README.md
@@ -255,6 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
 ## Authentication Options:
    -u, --username USERNAME          account username
    -p, --password PASSWORD          account password
    -2, --twofactor TWOFACTOR        two-factor auth code
    -n, --netrc                      use .netrc authentication data
    --video-password PASSWORD        video password (vimeo, smotri)
@@ -287,6 +288,10 @@ which means you can modify it, redistribute it or use it however you like.
                                     postprocessors (default)
    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                     postprocessors
    --exec CMD                       Execute a command on the file after
                                     downloading, similar to find's -exec
                                     syntax. Example: --exec 'adb push {}
                                     /sdcard/Music/ && rm {}'
 # CONFIGURATION
@@ -429,6 +434,7 @@ If you want to add support for a new site, you can follow this quick list (assum
                'id': '42',
                'ext': 'mp4',
                'title': 'Video title goes here',
                'thumbnail': 're:^https?://.*\.jpg$',
                # TODO more properties, either as:
                # * A value
                # * MD5 checksum; start the string with md5:
--- a/test/helper.py
+++ b/test/helper.py
@@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict):
            match_rex = re.compile(match_str)
            self.assertTrue(
-                isinstance(got, compat_str) and match_rex.match(got),
+                isinstance(got, compat_str),
                'Expected a %r object, but got %r' % (compat_str, type(got)))
            self.assertTrue(
                match_rex.match(got),
                u'field %s (value: %r) should match %r' % (info_field, got, match_str))
        elif isinstance(expected, type):
            got = got_dict.get(info_field)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
            '138', '137', '248', '136', '247', '135', '246',
            '245', '244', '134', '243', '133', '242', '160',
            # Dash audio
-            '141', '172', '140', '139', '171',
+            '141', '172', '140', '171', '139',
        ]
        for f1id, f2id in zip(order, order[1:]):
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -7,6 +7,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import (
    assertGreaterEqual,
    get_params,
    gettestcases,
    expect_info_dict,
@@ -136,12 +137,18 @@ def generator(test_case):
                self.assertEqual(res_dict['_type'], 'playlist')
                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
            if 'playlist_mincount' in test_case:
-                self.assertGreaterEqual(
+                assertGreaterEqual(
                    self,
                    len(res_dict['entries']),
                    test_case['playlist_mincount'],
                    'Expected at least %d in playlist %s, but got only %d' % (
                        test_case['playlist_mincount'], test_case['url'],
                        len(res_dict['entries'])))
            if 'playlist_count' in test_case:
                self.assertEqual(
                    len(res_dict['entries']),
                    test_case['playlist_count'],
                    'Expected at %d in playlist %s, but got %d.')
            for tc in test_cases:
                tc_filename = get_tc_filename(tc)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -62,6 +62,7 @@ from youtube_dl.extractor import (
    InstagramUserIE,
    CSpanIE,
    AolIE,
    GameOnePlaylistIE,
 )
@@ -309,24 +310,6 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
        self.assertEqual(len(result['entries']), 3)
    def test_GoogleSearch(self):
        dl = FakeYDL()
        ie = GoogleSearchIE(dl)
        result = ie.extract('gvsearch15:python language')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'python language')
        self.assertEqual(result['title'], 'python language')
        self.assertEqual(len(result['entries']), 15)
    def test_generic_rss_feed(self):
        dl = FakeYDL()
        ie = GenericIE(dl)
        result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
        self.assertEqual(result['title'], 'Zero Punctuation')
        self.assertTrue(len(result['entries']) > 10)
    def test_ted_playlist(self):
        dl = FakeYDL()
        ie = TEDIE(dl)
@@ -407,5 +390,6 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], 'rbhagwati2')
        assertGreaterEqual(self, len(result['entries']), 179)
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -219,6 +219,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('0h0m0s'), 0)
        self.assertEqual(parse_duration('0m0s'), 0)
        self.assertEqual(parse_duration('0s'), 0)
        self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
    def test_fix_xml_ampersands(self):
        self.assertEqual(
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -172,6 +172,7 @@ class YoutubeDL(object):
    The following options are used by the post processors:
    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                       otherwise prefer avconv.
    exec_cmd:          Arbitrary command to run after downloading
    """
    params = None
@@ -424,7 +425,7 @@ class YoutubeDL(object):
            autonumber_templ = '%0' + str(autonumber_size) + 'd'
            template_dict['autonumber'] = autonumber_templ % self._num_downloads
            if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
            if template_dict.get('resolution') is None:
                if template_dict.get('width') and template_dict.get('height'):
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -480,7 +481,10 @@ class YoutubeDL(object):
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        age_limit = self.params.get('age_limit')
        if age_limit is not None:
-            if age_limit < info_dict.get('age_limit', 0):
+            actual_age_limit = info_dict.get('age_limit')
            if actual_age_limit is None:
                actual_age_limit = 0
            if age_limit < actual_age_limit:
                return 'Skipping "' + title + '" because it is age restricted'
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title
@@ -633,6 +637,7 @@ class YoutubeDL(object):
            for i, entry in enumerate(entries, 1):
                self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
                extra = {
                    'n_entries': n_entries,
                    'playlist': playlist,
                    'playlist_index': i + playliststart,
                    'extractor': ie_result['extractor'],
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -71,6 +71,9 @@ __authors__  = (
    'Sebastian Haas',
    'Alexander Kirk',
    'Erik Johnson',
    'Keith Beckman',
    'Ole Ernst',
    'Aaron McDaniel (mcd1992)',
 )
 __license__ = 'Public Domain'
@@ -117,6 +120,7 @@ from .postprocessor import (
    FFmpegExtractAudioPP,
    FFmpegEmbedSubtitlePP,
    XAttrMetadataPP,
    ExecAfterDownloadPP,
 )
@@ -316,6 +320,8 @@ def parseOpts(overrideArguments=None):
            dest='username', metavar='USERNAME', help='account username')
    authentication.add_option('-p', '--password',
            dest='password', metavar='PASSWORD', help='account password')
    authentication.add_option('-2', '--twofactor',
            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
    authentication.add_option('-n', '--netrc',
            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
    authentication.add_option('--video-password',
@@ -546,7 +552,9 @@ def parseOpts(overrideArguments=None):
        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
        help='Prefer ffmpeg over avconv for running the postprocessors')
-
+    postproc.add_option(
        '--exec', metavar='CMD', dest='exec_cmd',
        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
    parser.add_option_group(general)
    parser.add_option_group(selection)
@@ -750,6 +758,7 @@ def _real_main(argv=None):
        'usenetrc': opts.usenetrc,
        'username': opts.username,
        'password': opts.password,
        'twofactor': opts.twofactor,
        'videopassword': opts.videopassword,
        'quiet': (opts.quiet or any_printing),
        'no_warnings': opts.no_warnings,
@@ -826,6 +835,7 @@ def _real_main(argv=None):
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
        'exec_cmd': opts.exec_cmd,
    }
    with YoutubeDL(ydl_opts) as ydl:
@@ -849,6 +859,13 @@ def _real_main(argv=None):
                ydl.add_post_processor(FFmpegAudioFixPP())
            ydl.add_post_processor(AtomicParsleyPP())
        # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
        # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
        if opts.exec_cmd:
            ydl.add_post_processor(ExecAfterDownloadPP(
                verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
        # Update version
        if opts.update_self:
            update_self(ydl.to_screen, opts.verbose)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -27,8 +27,16 @@ class HttpFD(FileDownloader):
            headers['Youtubedl-user-agent'] = info_dict['user_agent']
        if 'http_referer' in info_dict:
            headers['Referer'] = info_dict['http_referer']
-        basic_request = compat_urllib_request.Request(url, None, headers)
+        add_headers = info_dict.get('http_headers')
-        request = compat_urllib_request.Request(url, None, headers)
+        if add_headers:
            headers.update(add_headers)
        data = info_dict.get('http_post_data')
        http_method = info_dict.get('http_method')
        basic_request = compat_urllib_request.Request(url, data, headers)
        request = compat_urllib_request.Request(url, data, headers)
        if http_method is not None:
            basic_request.get_method = lambda: http_method
            request.get_method = lambda: http_method
        is_test = self.params.get('test', False)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -9,7 +9,7 @@ from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
-from .ard import ARDIE
+from .ard import ARDIE, ARDMediathekIE
 from .arte import (
    ArteTvIE,
    ArteTVPlus7IE,
@@ -88,6 +88,7 @@ from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .expotv import ExpoTVIE
 from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
@@ -115,7 +116,10 @@ from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
 from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
-from .gameone import GameOneIE
+from .gameone import (
    GameOneIE,
    GameOnePlaylistIE,
 )
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
@@ -126,6 +130,7 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
@@ -183,6 +188,7 @@ from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .ministrygrid import MinistryGridIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
@@ -194,6 +200,7 @@ from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
 from .movshare import MovShareIE
 from .mtv import (
@@ -202,6 +209,7 @@ from .mtv import (
    MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
 from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@@ -243,6 +251,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
@@ -263,7 +272,7 @@ from .rtbf import RTBFIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
-from .rtve import RTVEALaCartaIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
@@ -274,6 +283,7 @@ from .rutube import (
 from .rutv import RUTVIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
@@ -306,6 +316,7 @@ from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
 from .sportdeutschland import SportDeutschlandIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
@@ -386,6 +397,7 @@ from .vuclip import VuClipIE
 from .vulture import VultureIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
 from .wayofthemaster import WayOfTheMasterIE
 from .wdr import (
    WDRIE,
    WDRMobileIE,
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -10,10 +10,14 @@ from ..utils import (
    qualities,
    compat_urllib_parse_urlparse,
    compat_urllib_parse,
    int_or_none,
    parse_duration,
    unified_strdate,
 )
-class ARDIE(InfoExtractor):
+class ARDMediathekIE(InfoExtractor):
    IE_NAME = 'ARD:mediathek'
    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
    _TESTS = [{
@@ -128,3 +132,60 @@ class ARDIE(InfoExtractor):
            'formats': formats,
            'thumbnail': thumbnail,
        }
 class ARDIE(InfoExtractor):
    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
    _TEST = {
        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
        'md5': 'd216c3a86493f9322545e045ddc3eb35',
        'info_dict': {
            'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
            'id': '100',
            'ext': 'mp4',
            'duration': 2600,
            'title': 'Die Story im Ersten: Mission unter falscher Flagge',
            'upload_date': '20140804',
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('display_id')
        player_url = mobj.group('mainurl') + '~playerXml.xml'
        doc = self._download_xml(player_url, display_id)
        video_node = doc.find('./video')
        upload_date = unified_strdate(video_node.find('./broadcastDate').text)
        thumbnail = video_node.find('.//teaserImage//variant/url').text
        formats = []
        for a in video_node.findall('.//asset'):
            f = {
                'format_id': a.attrib['type'],
                'width': int_or_none(a.find('./frameWidth').text),
                'height': int_or_none(a.find('./frameHeight').text),
                'vbr': int_or_none(a.find('./bitrateVideo').text),
                'abr': int_or_none(a.find('./bitrateAudio').text),
                'vcodec': a.find('./codecVideo').text,
                'tbr': int_or_none(a.find('./totalBitrate').text),
            }
            if a.find('./serverPrefix').text:
                f['url'] = a.find('./serverPrefix').text
                f['playpath'] = a.find('./fileName').text
            else:
                f['url'] = a.find('./fileName').text
            formats.append(f)
        self._sort_formats(formats)
        return {
            'id': mobj.group('id'),
            'formats': formats,
            'display_id': display_id,
            'title': video_node.find('./title').text,
            'duration': parse_duration(video_node.find('./duration').text),
            'upload_date': upload_date,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -177,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
 # It also uses the arte_vp_url url from the webpage to extract the information
 class ArteTVCreativeIE(ArteTVPlus7IE):
    IE_NAME = 'arte.tv:creative'
-    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
+    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
        'info_dict': {
-            'id': '050489-002',
+            'id': '72176',
            'ext': 'mp4',
-            'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
+            'title': 'Folge 2 - Corporate Design',
            'upload_date': '20131004',
        },
-    }
+    }, {
        'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
        'info_dict': {
            'id': '160676',
            'ext': 'mp4',
            'title': 'Monty Python live (mostly)',
            'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
            'upload_date': '20140805',
        }
    }]
 class ArteTVFutureIE(ArteTVPlus7IE):
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -15,7 +15,7 @@ from ..utils import (
 class BlipTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
+    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
    _TESTS = [
        {
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
                'uploader_id': '792887',
                'duration': 279,
            }
        },
        {
            # https://bugzilla.redhat.com/show_bug.cgi?id=967465
            'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
            'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
            'info_dict': {
                'id': '6573122',
                'ext': 'mov',
                'upload_date': '20130520',
                'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
                'title': 'Red vs. Blue Season 11 Trailer',
                'timestamp': 1369029609,
                'uploader': 'redvsblue',
                'uploader_id': '792887',
            }
        }
    ]
@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
 class BlipTVUserIE(InfoExtractor):
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
    _PAGE_SIZE = 12
    IE_NAME = 'blip.tv:user'
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -154,12 +154,14 @@ class BrightcoveIE(InfoExtractor):
    def _extract_brightcove_urls(cls, webpage):
        """Return a list of all Brightcove URLs from the webpage """
-        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+        url_m = re.search(
            r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
            webpage)
        if url_m:
            url = unescapeHTML(url_m.group(1))
            # Some sites don't add it, we can't download with this url, for example:
            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
-            if 'playerKey' in url:
+            if 'playerKey' in url or 'videoId' in url:
                return [url]
        matches = re.findall(
@@ -188,9 +190,13 @@ class BrightcoveIE(InfoExtractor):
            referer = smuggled_data.get('Referer', url)
            return self._get_video_info(
                videoPlayer[0], query_str, query, referer=referer)
-        else:
+        elif 'playerKey' in query:
            player_key = query['playerKey']
            return self._get_playlist_info(player_key[0])
        else:
            raise ExtractorError(
                'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
                expected=True)
    def _get_video_info(self, video_id, query_str, query, referer=None):
        request_url = self._FEDERATED_URL_TEMPLATE % query_str
@@ -202,6 +208,13 @@ class BrightcoveIE(InfoExtractor):
            req.add_header('Referer', referer)
        webpage = self._download_webpage(req, video_id)
        error_msg = self._html_search_regex(
            r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
            'error message', default=None)
        if error_msg is not None:
            raise ExtractorError(
                'brightcove said: %s' % error_msg, expected=True)
        self.report_extraction(video_id)
        info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
        info = json.loads(info)['data']
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -84,6 +84,12 @@ class InfoExtractor(object):
                                 format, irrespective of the file format.
                                 -1 for default (order by other properties),
                                 -2 or smaller for less than default.
                    * http_referer  HTTP Referer header value to set.
                    * http_method  HTTP method to use for the download.
                    * http_headers  A dictionary of additional HTTP headers
                                 to add to the request.
                    * http_post_data  Additional data to send with a POST
                                 request.
    url:            Final video URL.
    ext:            Video filename extension.
    format:         The video format, defaults to ext (used for --get-format)
@@ -108,7 +114,7 @@ class InfoExtractor(object):
    upload_date:    Video upload date (YYYYMMDD).
                    If not explicitly set, calculated from timestamp.
    uploader_id:    Nickname or id of the video uploader.
-    location:       Physical location of the video.
+    location:       Physical location where the video was filmed.
    subtitles:      The subtitle file contents as a dictionary in the format
                    {language: subtitles}.
    duration:       Length of the video in seconds, as an integer.
@@ -434,6 +440,22 @@ class InfoExtractor(object):
        return (username, password)
    def _get_tfa_info(self):
        """
        Get the two-factor authentication info
        TODO - asking the user will be required for sms/phone verify
        currently just uses the command line option
        If there's no info available, return None
        """
        if self._downloader is None:
            return None
        downloader_params = self._downloader.params
        if downloader_params.get('twofactor', None) is not None:
            return downloader_params['twofactor']
        return None
    # Helper functions for extracting OpenGraph info
    @staticmethod
    def _og_regexes(prop):
@@ -598,11 +620,15 @@ class InfoExtractor(object):
            'Unable to download f4m manifest')
        formats = []
-        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+        media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
        for i, media_el in enumerate(media_nodes):
            tbr = int_or_none(media_el.attrib.get('bitrate'))
            format_id = 'f4m-%d' % (i if tbr is None else tbr)
            formats.append({
                'format_id': format_id,
                'url': manifest_url,
                'ext': 'flv',
-                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'tbr': tbr,
                'width': int_or_none(media_el.attrib.get('width')),
                'height': int_or_none(media_el.attrib.get('height')),
            })
@@ -610,6 +636,55 @@ class InfoExtractor(object):
        return formats
    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
        formats = [{
            'format_id': 'm3u8-meta',
            'url': m3u8_url,
            'ext': ext,
            'protocol': 'm3u8',
            'preference': -1,
            'resolution': 'multiple',
            'format_note': 'Quality selection URL',
        }]
        m3u8_doc = self._download_webpage(m3u8_url, video_id)
        last_info = None
        kv_rex = re.compile(
            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
        for line in m3u8_doc.splitlines():
            if line.startswith('#EXT-X-STREAM-INF:'):
                last_info = {}
                for m in kv_rex.finditer(line):
                    v = m.group('val')
                    if v.startswith('"'):
                        v = v[1:-1]
                    last_info[m.group('key')] = v
            elif line.startswith('#') or not line.strip():
                continue
            else:
                tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
                f = {
                    'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
                    'url': line.strip(),
                    'tbr': tbr,
                    'ext': ext,
                }
                codecs = last_info.get('CODECS')
                if codecs:
                    video, audio = codecs.split(',')
                    f['vcodec'] = video.partition('.')[0]
                    f['acodec'] = audio.partition('.')[0]
                resolution = last_info.get('RESOLUTION')
                if resolution:
                    width_str, height_str = resolution.split('x')
                    f['width'] = int(width_str)
                    f['height'] = int(height_str)
                formats.append(f)
                last_info = {}
        self._sort_formats(formats)
        return formats
 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -1,19 +1,21 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import determine_ext
 class EbaumsWorldIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
+        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
-        u'file': u'83367677.mp4',
+        'info_dict': {
-        u'info_dict': {
+            'id': '83367677',
-            u'title': u'A Giant Python Opens The Door',
+            'ext': 'mp4',
-            u'description': u'This is how nightmares start...',
+            'title': 'A Giant Python Opens The Door',
-            u'uploader': u'jihadpizza',
+            'description': 'This is how nightmares start...',
            'uploader': 'jihadpizza',
        },
    }
@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
            'id': video_id,
            'title': config.find('title').text,
            'url': video_url,
            'ext': determine_ext(video_url),
            'description': config.find('description').text,
            'thumbnail': config.find('image').text,
            'uploader': config.find('username').text,
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -1,10 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import random
 import re
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    compat_str,
 )
@@ -12,86 +15,98 @@ class EightTracksIE(InfoExtractor):
    IE_NAME = '8tracks'
    _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
    _TEST = {
-        u"name": u"EightTracks",
+        "name": "EightTracks",
-        u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
+        "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
-        u"playlist": [
+        "info_dict": {
            'id': '1336550',
            'display_id': 'youtube-dl-test-tracks-a',
            "description": "test chars:  \"'/\\ä↭",
            "title": "youtube-dl test tracks \"'/\\ä↭<>",
        },
        "playlist": [
            {
-                u"file": u"11885610.m4a",
+                "md5": "96ce57f24389fc8734ce47f4c1abcc55",
-                u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885610",
-                    u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885608.m4a",
+                "md5": "4ab26f05c1f7291ea460a3920be8021f",
-                u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885608",
-                    u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885679.m4a",
+                "md5": "d30b5b5f74217410f4689605c35d1fd7",
-                u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885679",
-                    u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885680.m4a",
+                "md5": "4eb0a669317cd725f6bbd336a29f923a",
-                u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885680",
-                    u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885682.m4a",
+                "md5": "1893e872e263a2705558d1d319ad19e8",
-                u"md5": u"1893e872e263a2705558d1d319ad19e8",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885682",
-                    u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885683.m4a",
+                "md5": "b673c46f47a216ab1741ae8836af5899",
-                u"md5": u"b673c46f47a216ab1741ae8836af5899",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885683",
-                    u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885684.m4a",
+                "md5": "1d74534e95df54986da7f5abf7d842b7",
-                u"md5": u"1d74534e95df54986da7f5abf7d842b7",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885684",
-                    u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885685.m4a",
+                "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
-                u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
+                "info_dict": {
-                u"info_dict": {
+                    "id": "11885685",
-                    u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+                    "ext": "m4a",
-                    u"uploader_id": u"ytdl"
+                    "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
                    "uploader_id": "ytdl"
                }
            }
        ]
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)
-        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
+        json_like = self._search_regex(
            r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
        data = json.loads(json_like)
        session = str(random.randint(0, 1000000000))
@@ -99,21 +114,30 @@ class EightTracksIE(InfoExtractor):
        track_count = data['tracks_count']
        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
        next_url = first_url
-        res = []
+        entries = []
        for i in range(track_count):
-            api_json = self._download_webpage(next_url, playlist_id,
+            api_json = self._download_webpage(
-                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
+                next_url, playlist_id,
-                errnote=u'Failed to download song information')
+                note='Downloading song information %d/%d' % (i + 1, track_count),
                errnote='Failed to download song information')
            api_data = json.loads(api_json)
-            track_data = api_data[u'set']['track']
+            track_data = api_data['set']['track']
            info = {
-                'id': track_data['id'],
+                'id': compat_str(track_data['id']),
                'url': track_data['track_file_stream_url'],
                'title': track_data['performer'] + u' - ' + track_data['name'],
                'raw_title': track_data['name'],
                'uploader_id': data['user']['login'],
                'ext': 'm4a',
            }
-            res.append(info)
+            entries.append(info)
-            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
-        return res
+                session, mix_id, track_data['id'])
        return {
            '_type': 'playlist',
            'entries': entries,
            'id': compat_str(mix_id),
            'display_id': playlist_id,
            'title': data.get('name'),
            'description': data.get('description'),
        }
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import fix_xml_ampersands
 class EmpflixIE(InfoExtractor):
@@ -36,7 +37,8 @@ class EmpflixIE(InfoExtractor):
            webpage, 'flashvars.config')
        cfg_xml = self._download_xml(
-            cfg_url, video_id, note='Downloading metadata')
+            cfg_url, video_id, note='Downloading metadata',
            transform_source=fix_xml_ampersands)
        formats = [
            {
@@ -44,11 +46,13 @@ class EmpflixIE(InfoExtractor):
                'format_id': item.find('res').text,
            } for item in cfg_xml.findall('./quality/item')
        ]
        thumbnail = cfg_xml.find('./startThumb').text
        return {
            'id': video_id,
            'title': video_title,
            'description': video_description,
            'thumbnail': thumbnail,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/expotv.py
+++ b/youtube_dl/extractor/expotv.py
@@ -0,0 +1,73 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unified_strdate,
 )
 class ExpoTVIE(InfoExtractor):
    _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
    _TEST = {
        'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
        'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
        'info_dict': {
            'id': '17561',
            'ext': 'mp4',
            'upload_date': '20060212',
            'title': 'My Favorite Online Scrapbook Store',
            'view_count': int,
            'description': 'You\'ll find most everything you need at this virtual store front.',
            'uploader': 'Anna T.',
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_key = self._search_regex(
            r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
        config_url = 'http://client.expotv.com/video/config/%s/%s' % (
            video_id, player_key)
        config = self._download_json(
            config_url, video_id,
            note='Downloading video configuration')
        formats = [{
            'url': fcfg['file'],
            'height': int_or_none(fcfg.get('height')),
            'format_note': fcfg.get('label'),
            'ext': self._search_regex(
                r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
                'file extension', default=None),
        } for fcfg in config['sources']]
        self._sort_formats(formats)
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = config.get('image')
        view_count = int_or_none(self._search_regex(
            r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
        uploader = self._search_regex(
            r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
            fatal=False)
        upload_date = unified_strdate(self._search_regex(
            r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
            fatal=False))
        return {
            'id': video_id,
            'formats': formats,
            'title': title,
            'description': description,
            'view_count': view_count,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'upload_date': upload_date,
        }
--- a/youtube_dl/extractor/gameone.py
+++ b/youtube_dl/extractor/gameone.py
@@ -88,3 +88,28 @@ class GameOneIE(InfoExtractor):
            'age_limit': age_limit,
            'timestamp': timestamp,
        }
 class GameOnePlaylistIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
    IE_NAME = 'gameone:playlist'
    _TEST = {
        'url': 'http://www.gameone.de/tv',
        'info_dict': {
            'title': 'GameOne',
        },
        'playlist_mincount': 294,
    }
    def _real_extract(self, url):
        webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
        max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
        entries = [
            self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
            for video_id in range(max_id, 0, -1)]
        return {
            '_type': 'playlist',
            'title': 'GameOne',
            'entries': entries,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,18 +8,19 @@ import re
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_xml_parse_error,
    ExtractorError,
    float_or_none,
    HEADRequest,
    orderedSet,
    parse_xml,
    smuggle_url,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    url_basename,
 )
 from .brightcove import BrightcoveIE
@@ -289,6 +290,68 @@ class GenericIE(InfoExtractor):
                'description': 'Mario\'s life in the fast lane has never looked so good.',
            },
        },
        # YouTube embed via <data-embed-url="">
        {
            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
            'info_dict': {
                'id': 'jpSGZsgga_I',
                'ext': 'mp4',
                'title': 'Asphalt 8: Airborne - Launch Trailer',
                'uploader': 'Gameloft',
                'uploader_id': 'gameloft',
                'upload_date': '20130821',
                'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
            },
            'params': {
                'skip_download': True,
            }
        },
        # Camtasia studio
        {
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
            'playlist': [{
                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
                    'ext': 'flv',
                    'duration': 2235.90,
                }
            }, {
                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
                    'ext': 'flv',
                    'duration': 2235.93,
                }
            }],
            'info_dict': {
                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
            }
        },
        # Flowplayer
        {
            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
            'md5': '9d65602bf31c6e20014319c7d07fba27',
            'info_dict': {
                'id': '5123ea6d5e5a7',
                'ext': 'mp4',
                'age_limit': 18,
                'uploader': 'www.handjobhub.com',
                'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
            }
        },
        # RSS feed
        {
            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
            'info_dict': {
                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
                'title': 'Zero Punctuation',
                'description': 're:'
            },
            'playlist_mincount': 11,
        }
    ]
    def report_download_webpage(self, video_id):
@@ -301,58 +364,6 @@ class GenericIE(InfoExtractor):
        """Report information extraction."""
        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
    def _send_head(self, url):
        """Check if it is a redirect, like url shorteners, in case return the new url."""
        class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
            """
            Subclass the HTTPRedirectHandler to make it use our
            HEADRequest also on the redirected URL
            """
            def redirect_request(self, req, fp, code, msg, headers, newurl):
                if code in (301, 302, 303, 307):
                    newurl = newurl.replace(' ', '%20')
                    newheaders = dict((k,v) for k,v in req.headers.items()
                                      if k.lower() not in ("content-length", "content-type"))
                    try:
                        # This function was deprecated in python 3.3 and removed in 3.4
                        origin_req_host = req.get_origin_req_host()
                    except AttributeError:
                        origin_req_host = req.origin_req_host
                    return HEADRequest(newurl,
                                       headers=newheaders,
                                       origin_req_host=origin_req_host,
                                       unverifiable=True)
                else:
                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
        class HTTPMethodFallback(compat_urllib_request.BaseHandler):
            """
            Fallback to GET if HEAD is not allowed (405 HTTP error)
            """
            def http_error_405(self, req, fp, code, msg, headers):
                fp.read()
                fp.close()
                newheaders = dict((k,v) for k,v in req.headers.items()
                                  if k.lower() not in ("content-length", "content-type"))
                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
                                                 headers=newheaders,
                                                 origin_req_host=req.get_origin_req_host(),
                                                 unverifiable=True))
        # Build our opener
        opener = compat_urllib_request.OpenerDirector()
        for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                        HTTPMethodFallback, HEADRedirectHandler,
                        compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
            opener.add_handler(handler())
        response = opener.open(HEADRequest(url))
        if response is None:
            raise ExtractorError('Invalid URL protocol')
        return response
    def _extract_rss(self, url, video_id, doc):
        playlist_title = doc.find('./channel/title').text
        playlist_desc_el = doc.find('./channel/description')
@@ -372,6 +383,43 @@ class GenericIE(InfoExtractor):
            'entries': entries,
        }
    def _extract_camtasia(self, url, video_id, webpage):
        """ Returns None if no camtasia video can be found. """
        camtasia_cfg = self._search_regex(
            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
            webpage, 'camtasia configuration file', default=None)
        if camtasia_cfg is None:
            return None
        title = self._html_search_meta('DC.title', webpage, fatal=True)
        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
        camtasia_cfg = self._download_xml(
            camtasia_url, video_id,
            note='Downloading camtasia configuration',
            errnote='Failed to download camtasia configuration')
        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
        entries = []
        for n in fileset_node.getchildren():
            url_n = n.find('./uri')
            if url_n is None:
                continue
            entries.append({
                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
                'title': '%s - %s' % (title, n.tag),
                'url': compat_urlparse.urljoin(url, url_n.text),
                'duration': float_or_none(n.find('./duration').text),
            })
        return {
            '_type': 'playlist',
            'entries': entries,
            'title': title,
        }
    def _real_extract(self, url):
        if url.startswith('//'):
            return {
@@ -408,17 +456,31 @@ class GenericIE(InfoExtractor):
            else:
                assert ':' in default_search
                return self.url_result(default_search + url)
-        video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
+
        url, smuggled_data = unsmuggle_url(url)
        force_videoid = None
        if smuggled_data and 'force_videoid' in smuggled_data:
            force_videoid = smuggled_data['force_videoid']
            video_id = force_videoid
        else:
            video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
        self.to_screen('%s: Requesting header' % video_id)
-        try:
+        head_req = HEADRequest(url)
-            response = self._send_head(url)
+        response = self._request_webpage(
            head_req, video_id,
            note=False, errnote='Could not send HEAD request to %s' % url,
            fatal=False)
        if response is not False:
            # Check for redirect
            new_url = response.geturl()
            if url != new_url:
                self.report_following_redirect(new_url)
                if force_videoid:
                    new_url = smuggle_url(
                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)
            # Check for direct link to a video
@@ -439,10 +501,6 @@ class GenericIE(InfoExtractor):
                    'upload_date': upload_date,
                }
        except compat_urllib_error.HTTPError:
            # This may be a stupid server that doesn't like HEAD, our UA, or so
            pass
        try:
            webpage = self._download_webpage(url, video_id)
        except ValueError:
@@ -460,6 +518,11 @@ class GenericIE(InfoExtractor):
        except compat_xml_parse_error:
            pass
        # Is it a Camtasia project?
        camtasia_res = self._extract_camtasia(url, video_id, webpage)
        if camtasia_res is not None:
            return camtasia_res
        # Sometimes embedded video player is hidden behind percent encoding
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
        # Unescaping the whole page allows to handle those cases in a generic way
@@ -475,10 +538,26 @@ class GenericIE(InfoExtractor):
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
            default='video')
        # Try to detect age limit automatically
        age_limit = self._rta_search(webpage)
        # And then there are the jokers who advertise that they use RTA,
        # but actually don't.
        AGE_LIMIT_MARKERS = [
            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
        ]
        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
            age_limit = 18
        # video uploader is domain name
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
        # Helper method
        def _playlist_from_matches(matches, getter, ie=None):
            urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)
        # Look for BrightCove:
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
        if bc_urls:
@@ -514,6 +593,7 @@ class GenericIE(InfoExtractor):
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*
            )
@@ -522,19 +602,15 @@ class GenericIE(InfoExtractor):
                (?:embed|v)/.+?)
            \1''', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
+            return _playlist_from_matches(
-                     for tuppl in matches]
+                matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)
        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]))
+            return _playlist_from_matches(
-                     for tuppl in matches]
+                matches, lambda m: unescapeHTML(m[1]))
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)
        # Look for embedded Wistia player
        match = re.search(
@@ -553,7 +629,7 @@ class GenericIE(InfoExtractor):
        mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
        if mobj:
            return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
-        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
+        mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
        if mobj:
            return self.url_result(mobj.group(1), 'BlipTV')
@@ -648,10 +724,8 @@ class GenericIE(InfoExtractor):
        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
+            return _playlist_from_matches(
-                     for eurl in matches]
+                matches, getter=unescapeHTML, ie='FunnyOrDie')
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)
        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
@@ -713,6 +787,13 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')
        # Look for embedded sbs.com.au player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')
        # Start with something easy: JW Player in SWFObject
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if not found:
@@ -730,6 +811,14 @@ class GenericIE(InfoExtractor):
        if not found:
            # Broaden the findall a little bit: JWPlayer JS loader
            found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
        if not found:
            # Flow player
            found = re.findall(r'''(?xs)
                flowplayer\("[^"]+",\s*
                    \{[^}]+?\}\s*,
                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                        ["']?url["']?\s*:\s*["']([^"']+)["']
            ''', webpage)
        if not found:
            # Try to find twitter cards info
            found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -739,10 +828,16 @@ class GenericIE(InfoExtractor):
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
-                found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+                def check_video(vurl):
                    vpath = compat_urlparse.urlparse(vurl).path
                    vext = determine_ext(vpath)
                    return '.' in vpath and vext not in ('swf', 'png', 'jpg')
                found = list(filter(
                    check_video,
                    re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
        if not found:
            # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
        if not found:
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
@@ -776,6 +871,7 @@ class GenericIE(InfoExtractor):
                'url': video_url,
                'uploader': video_uploader,
                'title': video_title,
                'age_limit': age_limit,
            })
        if len(entries) == 1:
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@@ -14,6 +14,14 @@ class GoogleSearchIE(SearchInfoExtractor):
    _MAX_RESULTS = 1000
    IE_NAME = 'video.google:search'
    _SEARCH_KEY = 'gvsearch'
    _TEST = {
        'url': 'gvsearch15:python language',
        'info_dict': {
            'id': 'python language',
            'title': 'python language',
        },
        'playlist_count': 15,
    }
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -0,0 +1,190 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import time
 import math
 import os.path
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
 from ..utils import (
    compat_urllib_parse,
    compat_urlparse,
 )
 class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
    def __init__(self):
        self._current_object = None
        self.objects = []
        compat_html_parser.HTMLParser.__init__(self)
    def handle_starttag(self, tag, attrs):
        attrs = dict((k, v) for k, v in attrs)
        if tag == 'object':
            self._current_object = {'attrs': attrs, 'params': []}
        elif tag == 'param':
            self._current_object['params'].append(attrs)
    def handle_endtag(self, tag):
        if tag == 'object':
            self.objects.append(self._current_object)
            self._current_object = None
    @classmethod
    def extract_object_tags(cls, html):
        p = cls()
        p.feed(html)
        p.close()
        return p.objects
 class GroovesharkIE(InfoExtractor):
    _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
    _TEST = {
        'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
        'md5': '7ecf8aefa59d6b2098517e1baa530023',
        'info_dict': {
            'id': '6SS1DW',
            'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
            'ext': 'mp3',
            'duration': 227,
        }
    }
    do_playerpage_request = True
    do_bootstrap_request = True
    def _parse_target(self, target):
        uri = compat_urlparse.urlparse(target)
        hash = uri.fragment[1:].split('?')[0]
        token = os.path.basename(hash.rstrip('/'))
        return (uri, hash, token)
    def _build_bootstrap_url(self, target):
        (uri, hash, token) = self._parse_target(target)
        query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
    def _build_meta_url(self, target):
        (uri, hash, token) = self._parse_target(target)
        query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
        return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
    def _build_stream_url(self, meta):
        return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
    def _build_swf_referer(self, target, obj):
        (uri, _, _) = self._parse_target(target)
        return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
    def _transform_bootstrap(self, js):
        return re.split('(?m)^\s*try\s*{', js)[0] \
                 .split(' = ', 1)[1].strip().rstrip(';')
    def _transform_meta(self, js):
        return js.split('\n')[0].split('=')[1].rstrip(';')
    def _get_meta(self, target):
        (meta_url, token) = self._build_meta_url(target)
        self.to_screen('Metadata URL: %s' % meta_url)
        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
        req = compat_urllib_request.Request(meta_url, headers=headers)
        res = self._download_json(req, token,
                                  transform_source=self._transform_meta)
        if 'getStreamKeyWithSong' not in res:
            raise ExtractorError(
                'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
        if res['getStreamKeyWithSong'] is None:
            raise ExtractorError(
                'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
                expected=True)
        return res['getStreamKeyWithSong']
    def _get_bootstrap(self, target):
        (bootstrap_url, token) = self._build_bootstrap_url(target)
        headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
        req = compat_urllib_request.Request(bootstrap_url, headers=headers)
        res = self._download_json(req, token, fatal=False,
                                  note='Downloading player bootstrap data',
                                  errnote='Unable to download player bootstrap data',
                                  transform_source=self._transform_bootstrap)
        return res
    def _get_playerpage(self, target):
        (_, _, token) = self._parse_target(target)
        webpage = self._download_webpage(
            target, token,
            note='Downloading player page',
            errnote='Unable to download player page',
            fatal=False)
        if webpage is not None:
            # Search (for example German) error message
            error_msg = self._html_search_regex(
                r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
                'error message', default=None)
            if error_msg is not None:
                error_msg = error_msg.replace('\n', ' ')
                raise ExtractorError('Grooveshark said: %s' % error_msg)
        if webpage is not None:
            o = GroovesharkHtmlParser.extract_object_tags(webpage)
            return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
        return (webpage, None)
    def _real_initialize(self):
        self.ts = int(time.time() * 1000)  # timestamp in millis
    def _real_extract(self, url):
        (target_uri, _, token) = self._parse_target(url)
        # 1. Fill cookiejar by making a request to the player page
        swf_referer = None
        if self.do_playerpage_request:
            (_, player_objs) = self._get_playerpage(url)
            if player_objs is not None:
                swf_referer = self._build_swf_referer(url, player_objs[0])
                self.to_screen('SWF Referer: %s' % swf_referer)
        # 2. Ask preload.php for swf bootstrap data to better mimic webapp
        if self.do_bootstrap_request:
            bootstrap = self._get_bootstrap(url)
            self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
        # 3. Ask preload.php for track metadata.
        meta = self._get_meta(url)
        # 4. Construct stream request for track.
        stream_url = self._build_stream_url(meta)
        duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
        post_dict = {'streamKey': meta['streamKey']['streamKey']}
        post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
        headers = {
            'Content-Length': len(post_data),
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        if swf_referer is not None:
            headers['Referer'] = swf_referer
        return {
            'id': token,
            'title': meta['song']['Name'],
            'http_method': 'POST',
            'url': stream_url,
            'ext': 'mp3',
            'format': 'mp3 audio',
            'duration': duration,
            'http_post_data': post_data,
            'http_headers': headers,
        }
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -18,6 +18,7 @@ class IGNIE(InfoExtractor):
    _DESCRIPTION_RE = [
        r'<span class="page-object-description">(.+?)</span>',
        r'id="my_show_video">.*?<p>(.*?)</p>',
        r'<meta name="description" content="(.*?)"',
    ]
    _TESTS = [
@@ -55,6 +56,17 @@ class IGNIE(InfoExtractor):
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
            'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
            'info_dict': {
                'id': '078fdd005f6d3c02f63d795faa1b984f',
                'ext': 'mp4',
                'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
                'description': 'Giant skeletons, bloody hunts, and captivating'
                    ' natural beauty take our breath away.',
            },
        },
    ]
    def _find_video_id(self, webpage):
@@ -62,6 +74,7 @@ class IGNIE(InfoExtractor):
            r'data-video-id="(.+?)"',
            r'<object id="vid_(.+?)"',
            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
        ]
        return self._search_regex(res_id, webpage, 'video id')
@@ -70,10 +83,7 @@ class IGNIE(InfoExtractor):
        name_or_id = mobj.group('name_or_id')
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
-        if page_type == 'articles':
+        if page_type != 'video':
            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
            return self.url_result(video_url, ie='IGN')
        elif page_type != 'video':
            multiple_urls = re.findall(
                '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
                webpage)
--- a/youtube_dl/extractor/ministrygrid.py
+++ b/youtube_dl/extractor/ministrygrid.py
@@ -0,0 +1,57 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    smuggle_url,
 )
 class MinistryGridIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
    _TEST = {
        'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
        'md5': '844be0d2a1340422759c2a9101bab017',
        'info_dict': {
            'id': '3453494717001',
            'ext': 'mp4',
            'title': 'The Gospel by Numbers',
            'description': 'Coming soon from T4G 2014!',
            'uploader': 'LifeWay Christian Resources (MG)',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        portlets_json = self._search_regex(
            r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
        portlets = json.loads(portlets_json)
        pl_id = self._search_regex(
            r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
        for i, portlet in enumerate(portlets):
            portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
            portlet_code = self._download_webpage(
                portlet_url, video_id,
                note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
                fatal=False)
            video_iframe_url = self._search_regex(
                r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
                default=None)
            if video_iframe_url:
                surl = smuggle_url(
                    video_iframe_url, {'force_videoid': video_id})
                return {
                    '_type': 'url',
                    'id': video_id,
                    'url': surl,
                }
        raise ExtractorError('Could not find video iframe in any portlets')
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    ExtractorError,
    HEADRequest,
    int_or_none,
    parse_iso8601,
 )
@@ -38,7 +39,7 @@ class MixcloudIE(InfoExtractor):
            try:
                # We only want to know if the request succeed
                # don't download the whole file
-                self._request_webpage(url, None, False)
+                self._request_webpage(HEADRequest(url), None, False)
                return url
            except ExtractorError:
                url = None
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import os
 import re
@@ -8,15 +10,17 @@ from ..utils import (
    compat_urllib_parse,
 )
 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
    _TEST = {
-        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+        'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
-        u'file': u'5018.mp4',
+        'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
-        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
+        'info_dict': {
-        u'info_dict': {
+            'id': '5018',
-            u"title": u"Japanese Teen Music Video",
+            'ext': 'mp4',
-            u"age_limit": 18,
+            'title': 'Japanese Teen Music Video',
            'age_limit': 18,
        }
    }
@@ -29,8 +33,8 @@ class MofosexIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
+        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
        format = path.split('/')[5].split('_')[:2]
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -0,0 +1,78 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_str,
    clean_html,
 )
 class MovieClipsIE(InfoExtractor):
    _VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
    _TEST = {
        'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
        'info_dict': {
            'id': 'Wy7ZU',
            'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
            'ext': 'mp4',
            'title': 'My Week with Marilyn - Do You Love Me?',
            'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
            'thumbnail': 're:^https?://.*\.jpg$',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')
        show_id = display_id or video_id
        config = self._download_xml(
            'http://config.movieclips.com/player/config/%s' % video_id,
            show_id, 'Downloading player config')
        if config.find('./country-region').text == 'false':
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
        properties = config.find('./video/properties')
        smil_file = properties.attrib['smil_file']
        smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
        base_url = smil.find('./head/meta').attrib['base']
        formats = []
        for video in smil.findall('./body/switch/video'):
            vbr = int(video.attrib['system-bitrate']) / 1000
            src = video.attrib['src']
            formats.append({
                'url': base_url,
                'play_path': src,
                'ext': src.split(':')[0],
                'vbr': vbr,
                'format_id': '%dk' % vbr,
            })
        self._sort_formats(formats)
        title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
        description = clean_html(compat_str(properties.attrib['clip_description']))
        thumbnail = properties.attrib['image']
        categories = properties.attrib['clip_categories'].split(',')
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'categories': categories,
            'formats': formats,
        }
--- a/youtube_dl/extractor/musicvault.py
+++ b/youtube_dl/extractor/musicvault.py
@@ -0,0 +1,78 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    strip_jsonp,
    parse_duration,
    unified_strdate,
 )
 class MusicVaultIE(InfoExtractor):
    _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
    _TEST = {
        'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
        'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
        'info_dict': {
            'id': '1010863',
            'ext': 'mp4',
            'uploader_id': 'the-allman-brothers-band',
            'title': 'Straight from the Heart',
            'duration': 244,
            'uploader': 'The Allman Brothers Band',
            'thumbnail': 're:^https?://.*/thumbnail/.*',
            'upload_date': '19811216',
            'location': 'Capitol Theatre (Passaic, NJ)',
            'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('display_id')
        webpage = self._download_webpage(url, display_id)
        thumbnail = self._search_regex(
            r'<meta itemprop="thumbnail" content="([^"]+)"',
            webpage, 'thumbnail', fatal=False)
        data_div = self._search_regex(
            r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
        uploader = self._html_search_regex(
            r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
        title = self._html_search_regex(
            r'<h2.*?>(.*?)</h2>', data_div, 'title')
        upload_date = unified_strdate(self._html_search_regex(
            r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
        location = self._html_search_regex(
            r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
        duration = parse_duration(self._html_search_meta('duration', webpage))
        VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
        kaltura_id = self._search_regex(
            r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
            webpage, 'kaltura ID')
        video_url = VIDEO_URL_TEMPLATE % {
            'entry_id': kaltura_id,
            'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
            'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
        }
        return {
            'id': mobj.group('id'),
            'url': video_url,
            'ext': 'mp4',
            'display_id': display_id,
            'uploader_id': mobj.group('uploader_id'),
            'thumbnail': thumbnail,
            'description': self._html_search_meta('description', webpage),
            'upload_date': upload_date,
            'location': location,
            'title': title,
            'uploader': uploader,
            'duration': duration,
        }
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
            webpage = self._download_webpage(
                request, video_id, 'Downloading %s page' % format_id)
            video_url = self._html_search_regex(
-                r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
+                r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
            if not video_url:
                continue
            formats.append({
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
        webpage = self._download_webpage(
            'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
        title = self._html_search_regex(
-            r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
+            [r'<span title="([^"]+)">',
-        thumbnail = self._html_search_regex(
+             r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
-            r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
+        thumbnails = [
-            webpage, 'thumbnail URL', fatal=False)
+            {
                'url': thumb_url,
            } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
        ]
        thumbnail = thumbnails[0]['url'] if thumbnails else None
        duration = parse_duration(self._html_search_regex(
-            r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
+            r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
        upload_date = unified_strdate(self._html_search_regex(
-            r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
+            r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
        return {
            'id': video_id,
            'title': title,
-            'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
+            'thumbnails': thumbnails,
            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'age_limit': 18,
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    js_to_json,
 )
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -54,6 +54,18 @@ class PBSIE(InfoExtractor):
                'duration': 801,
            },
        },
        {
            'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
            'md5': 'c62859342be2a0358d6c9eb306595978',
            'info_dict': {
                'id': '2365297708',
                'ext': 'mp4',
                'description': 'md5:68d87ef760660eb564455eb30ca464fe',
                'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
                'duration': 6559,
                'thumbnail': 're:^https?://.*\.jpg$',
            }
        }
    ]
    def _extract_ids(self, url):
@@ -75,7 +87,7 @@ class PBSIE(InfoExtractor):
                return media_id, presumptive_id
            url = self._search_regex(
-                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
+                r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                webpage, 'player URL')
            mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -0,0 +1,82 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    ExtractorError,
    float_or_none,
    int_or_none,
 )
 class PlayFMIE(InfoExtractor):
    IE_NAME = 'play.fm'
    _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
    _TEST = {
        'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
        'md5': 'c505f8307825a245d0c7ad1850001f22',
        'info_dict': {
            'id': '137220',
            'ext': 'mp3',
            'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
            'uploader': 'Sven Tasnadi',
            'uploader_id': 'sventasnadi',
            'duration': 5627.428,
            'upload_date': '20140712',
            'view_count': int,
            'thumbnail': 're:^https?://.*\.jpg$',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        upload_date = mobj.group('upload_date')
        rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
        req = compat_urllib_request.Request(
            'http://www.play.fm/flexRead/recording', data=rec_data)
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
        rec_doc = self._download_xml(req, video_id)
        error_node = rec_doc.find('./error')
        if error_node is not None:
            raise ExtractorError('An error occured: %s (code %s)' % (
                error_node.text, rec_doc.find('./status').text))
        recording = rec_doc.find('./recording')
        title = recording.find('./title').text
        view_count = int_or_none(recording.find('./stats/playcount').text)
        duration = float_or_none(recording.find('./duration').text, scale=1000)
        thumbnail = recording.find('./image').text
        artist = recording.find('./artists/artist')
        uploader = artist.find('./name').text
        uploader_id = artist.find('./slug').text
        video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
            'http:', recording.find('./url').text,
            recording.find('./_class').text, recording.find('./file_id').text,
            rec_doc.find('./uuid').text, video_id,
            rec_doc.find('./jingle/file_id').text,
            'http%3A%2F%2Fwww.play.fm%2Fplayer',
        )
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp3',
            'filesize': int_or_none(recording.find('./size').text),
            'title': title,
            'upload_date': upload_date,
            'view_count': view_count,
            'duration': duration,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
        }
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@@ -9,15 +11,16 @@ from ..utils import (
 class PornotubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
+    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
    _TEST = {
-        u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+        'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
-        u'file': u'1689755.flv',
+        'md5': '374dd6dcedd24234453b295209aa69b6',
-        u'md5': u'374dd6dcedd24234453b295209aa69b6',
+        'info_dict': {
-        u'info_dict': {
+            'id': '1689755',
-            u"upload_date": u"20090708", 
+            'ext': 'flv',
-            u"title": u"Marilyn-Monroe-Bathing",
+            'upload_date': '20090708',
-            u"age_limit": 18
+            'title': 'Marilyn-Monroe-Bathing',
            'age_limit': 18
        }
    }
@@ -32,22 +35,22 @@ class PornotubeIE(InfoExtractor):
        # Get the video URL
        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
-        video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
+        video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
        video_url = compat_urllib_parse.unquote(video_url)
        #Get the uploaded date
        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
-        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
+        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
-        if upload_date: upload_date = unified_strdate(upload_date)
+        if upload_date:
            upload_date = unified_strdate(upload_date)
        age_limit = self._rta_search(webpage)
-        info = {'id': video_id,
+        return {
-                'url': video_url,
+            'id': video_id,
-                'uploader': None,
+            'url': video_url,
-                'upload_date': upload_date,
+            'upload_date': upload_date,
-                'title': video_title,
+            'title': video_title,
-                'ext': 'flv',
+            'ext': 'flv',
-                'format': 'flv',
+            'format': 'flv',
-                'age_limit': age_limit}
+            'age_limit': age_limit,
-
+        }
        return [info]
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import parse_duration
 class RtlXlIE(InfoExtractor):
@@ -20,6 +21,7 @@ class RtlXlIE(InfoExtractor):
                'onze mobiele apps.',
            'timestamp': 1408051800,
            'upload_date': '20140814',
            'duration': 576.880,
        },
        'params': {
            # We download the first bytes of the first fragment, it can't be
@@ -35,7 +37,7 @@ class RtlXlIE(InfoExtractor):
        info = self._download_json(
            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
            uuid)
-        meta = info['meta']
+
        material = info['material'][0]
        episode_info = info['episodes'][0]
@@ -45,8 +47,9 @@ class RtlXlIE(InfoExtractor):
        return {
            'id': uuid,
-            'title': '%s - %s' % (progname, subtitle), 
+            'title': '%s - %s' % (progname, subtitle),
            'formats': self._extract_f4m_formats(f4m_url, uuid),
            'timestamp': material['original_date'],
            'description': episode_info['synopsis'],
            'duration': parse_duration(material.get('duration')),
        }
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -1,21 +1,66 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 import base64
 import re
 import time
 from .common import InfoExtractor
 from ..utils import (
    struct_unpack,
    remove_end,
 )
 def _decrypt_url(png):
    encrypted_data = base64.b64decode(png)
    text_index = encrypted_data.find(b'tEXt')
    text_chunk = encrypted_data[text_index - 4:]
    length = struct_unpack('!I', text_chunk[:4])[0]
    # Use bytearray to get integers when iterating in both python 2.x and 3.x
    data = bytearray(text_chunk[8:8 + length])
    data = [chr(b) for b in data if b != 0]
    hash_index = data.index('#')
    alphabet_data = data[:hash_index]
    url_data = data[hash_index + 1:]
    alphabet = []
    e = 0
    d = 0
    for l in alphabet_data:
        if d == 0:
            alphabet.append(l)
            d = e = (e + 1) % 4
        else:
            d -= 1
    url = ''
    f = 0
    e = 3
    b = 1
    for letter in url_data:
        if f == 0:
            l = int(letter) * 10
            f = 1
        else:
            if e == 0:
                l += int(letter)
                url += alphabet[l]
                e = (b + 3) % 4
                f = 0
                b += 1
            else:
                e -= 1
    return url
 class RTVEALaCartaIE(InfoExtractor):
    IE_NAME = 'rtve.es:alacarta'
    IE_DESC = 'RTVE a la carta'
    _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
        'info_dict': {
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
        },
-    }
+    }, {
-
+        'note': 'Live stream',
-    def _decrypt_url(self, png):
+        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
-        encrypted_data = base64.b64decode(png)
+        'info_dict': {
-        text_index = encrypted_data.find(b'tEXt')
+            'id': '1694255',
-        text_chunk = encrypted_data[text_index-4:]
+            'ext': 'flv',
-        length = struct_unpack('!I', text_chunk[:4])[0]
+            'title': 'TODO',
-        # Use bytearray to get integers when iterating in both python 2.x and 3.x
+        }
-        data = bytearray(text_chunk[8:8+length])
+    }]
        data = [chr(b) for b in data if b != 0]
        hash_index = data.index('#')
        alphabet_data = data[:hash_index]
        url_data = data[hash_index+1:]
        alphabet = []
        e = 0
        d = 0
        for l in alphabet_data:
            if d == 0:
                alphabet.append(l)
                d = e = (e + 1) % 4
            else:
                d -= 1
        url = ''
        f = 0
        e = 3
        b = 1
        for letter in url_data:
            if f == 0:
                l = int(letter)*10
                f = 1
            else:
                if e == 0:
                    l += int(letter)
                    url += alphabet[l]
                    e = (b + 3) % 4
                    f = 0
                    b += 1
                else:
                    e -= 1
        return url
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
            video_id)['page']['items'][0]
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = self._decrypt_url(png)
+        video_url = _decrypt_url(png)
        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
-            'thumbnail': info['image'],
+            'thumbnail': info.get('image'),
            'page_url': url,
        }
 class RTVELiveIE(InfoExtractor):
    IE_NAME = 'rtve.es:live'
    IE_DESC = 'RTVE.es live streams'
    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
    _TESTS = [{
        'url': 'http://www.rtve.es/noticias/directo-la-1/',
        'info_dict': {
            'id': 'directo-la-1',
            'ext': 'flv',
            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
        },
        'params': {
            'skip_download': 'live stream',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        start_time = time.gmtime()
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_url = self._search_regex(
            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
        title = remove_end(self._og_search_title(webpage), ' en directo')
        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
        vidplayer_id = self._search_regex(
            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
        video_url = _decrypt_url(png)
        return {
            'id': video_id,
            'ext': 'flv',
            'title': title,
            'url': video_url,
            'app': 'rtve-live-live?ovpfv=2.1.2',
            'player_url': player_url,
            'rtmp_live': True,
        }
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -0,0 +1,56 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
    remove_end,
 )
 class SBSIE(InfoExtractor):
    IE_DESC = 'sbs.com.au'
    _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
    _TESTS = [{
        # Original URL is handled by the generic IE which finds the iframe:
        # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
        'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
        'info_dict': {
            'id': '320403011771',
            'ext': 'flv',
            'title': 'Dingo Conservation',
            'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
            'thumbnail': 're:http://.*\.jpg',
        },
        'add_ies': ['generic'],
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        release_urls_json = js_to_json(self._search_regex(
            r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
            webpage, ''))
        release_urls = json.loads(release_urls_json)
        theplatform_url = (
            release_urls.get('progressive') or release_urls.get('standard'))
        title = remove_end(self._og_search_title(webpage), ' (The Feed)')
        description = self._html_search_meta('description', webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'url': theplatform_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@@ -61,7 +61,10 @@ class SockshareIE(InfoExtractor):
            r'<a href="([^"]*)".+class="download_file_link"',
            webpage, 'file url')
        video_url = "http://www.sockshare.com" + video_url
-        title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
+        title = self._html_search_regex((
            r'<h1>(.+)<strong>',
            r'var name = "([^"]+)";'),
            webpage, 'title', default=None)
        thumbnail = self._html_search_regex(
            r'<img\s+src="([^"]*)".+?name="bg"',
            webpage, 'thumbnail')
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -0,0 +1,77 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_request,
    parse_iso8601,
 )
 class SportDeutschlandIE(InfoExtractor):
    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
    _TEST = {
        'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
        'info_dict': {
            'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
            'ext': 'mp4',
            'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
            'categories': ['Badminton'],
            'view_count': int,
            'thumbnail': 're:^https?://.*\.jpg',
            'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
            'timestamp': 1409043600,
            'upload_date': '20140826',
        },
        'params': {
            'skip_download': 'Live stream',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        sport_id = mobj.group('sport')
        api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
            sport_id, video_id)
        req = compat_urllib_request.Request(api_url, headers={
            'Accept': 'application/vnd.vidibus.v2.html+json',
            'Referer': url,
        })
        data = self._download_json(req, video_id)
        categories = list(data.get('section', {}).get('tags', {}).values())
        asset = data['asset']
        smil_url = asset['video']
        m3u8_url = smil_url.replace('.smil', '.m3u8')
        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
        smil_doc = self._download_xml(
            smil_url, video_id, note='Downloading SMIL metadata')
        base_url = smil_doc.find('./head/meta').attrib['base']
        formats.extend([{
            'format_id': 'rmtp',
            'url': base_url,
            'play_path': n.attrib['src'],
            'ext': 'flv',
            'preference': -100,
            'format_note': 'Seems to fail at example stream',
        } for n in smil_doc.findall('./body/video')])
        self._sort_formats(formats)
        return {
            'id': video_id,
            'formats': formats,
            'title': asset['title'],
            'thumbnail': asset.get('image'),
            'description': asset.get('teaser'),
            'categories': categories,
            'view_count': asset.get('views'),
            'rtmp_live': asset['live'],
            'timestamp': parse_iso8601(asset.get('date')),
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -151,6 +151,19 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                'duration': 62,
            }
        },
        {
            'note': 'video player needs Referer',
            'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
            'md5': '6295fdab8f4bf6a002d058b2c6dce276',
            'info_dict': {
                'id': '91613211',
                'ext': 'mp4',
                'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
                'uploader': 'DevWeek Events',
                'duration': 2773,
                'thumbnail': 're:^https?://.*\.jpg$',
            }
        }
    ]
    @classmethod
@@ -205,6 +218,8 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        if data is not None:
            headers = headers.copy()
            headers.update(data)
        if 'Referer' not in headers:
            headers['Referer'] = url
        # Extract ID from URL
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
                req, video_id, 'Downloading video page')
        title = self._search_regex(
-            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+            r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
        thumbnail = self._search_regex(
            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
        url = self._search_regex(
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -2,29 +2,43 @@
 from __future__ import unicode_literals
 import re
 import hashlib
 from .common import InfoExtractor
-from ..utils import (
+from ..utils import unified_strdate
    unified_strdate,
 )
 class WatIE(InfoExtractor):
-    _VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
+    _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
-    _TEST = {
+    _TESTS = [
-        'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
+        {
-        'info_dict': {
+            'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-            'id': '10631273',
+            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
-            'ext': 'mp4',
+            'info_dict': {
-            'title': 'World War Z - Philadelphia VOST',
+                'id': '11713067',
-            'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
+                'display_id': 'soupe-figues-l-orange-aux-epices',
                'ext': 'mp4',
                'title': 'Soupe de figues à l\'orange et aux épices',
                'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
                'upload_date': '20140819',
                'duration': 120,
            },
        },
-        'params': {
+        {
-            # Sometimes wat serves the whole file with the --test option
+            'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
-            'skip_download': True,
+            'md5': 'fbc84e4378165278e743956d9c1bf16b',
            'info_dict': {
                'id': '11713075',
                'display_id': 'gregory-lemarchal-voix-ange',
                'ext': 'mp4',
                'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
                'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
                'upload_date': '20140816',
                'duration': 2910,
            },
        },
-    }
+    ]
    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
@@ -36,13 +50,20 @@ class WatIE(InfoExtractor):
        def real_id_for_chapter(chapter):
            return chapter['tc_start'].split('-')[0]
        mobj = re.match(self._VALID_URL, url)
-        short_id = mobj.group('shortID')
+        short_id = mobj.group('short_id')
-        webpage = self._download_webpage(url, short_id)
+        display_id = mobj.group('display_id')
        webpage = self._download_webpage(url, display_id or short_id)
        real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
        video_info = self.download_video_info(real_id)
        geo_list = video_info.get('geoList')
        country = geo_list[0] if geo_list else ''
        chapters = video_info['chapters']
        first_chapter = chapters[0]
        files = video_info['files']
        first_file = files[0]
        if real_id_for_chapter(first_chapter) != real_id:
            self.to_screen('Multipart video detected')
@@ -61,12 +82,47 @@ class WatIE(InfoExtractor):
            upload_date = unified_strdate(first_chapter['date_diffusion'])
        # Otherwise we can continue and extract just one part, we have to use
        # the short id for getting the video url
        formats = [{
            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
            'format_id': 'Mobile',
        }]
        fmts = [('SD', 'web')]
        if first_file.get('hasHD'):
            fmts.append(('HD', 'webhd'))
        def compute_token(param):
            timestamp = '%08x' % int(self._download_webpage(
                'http://www.wat.tv/servertime', real_id,
                'Downloading server time').split('|')[0])
            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
        for fmt in fmts:
            webid = '/%s/%s' % (fmt[1], real_id)
            video_url = self._download_webpage(
                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
                real_id,
                'Downloding %s video URL' % fmt[0],
                'Failed to download %s video URL' % fmt[0],
                False)
            if not video_url:
                continue
            formats.append({
                'url': video_url,
                'ext': 'mp4',
                'format_id': fmt[0],
            })
        return {
            'id': real_id,
-            'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
+            'display_id': display_id,
            'title': first_chapter['title'],
            'thumbnail': first_chapter['preview'],
            'description': first_chapter['description'],
            'view_count': video_info['views'],
            'upload_date': upload_date,
            'duration': first_file['duration'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/wayofthemaster.py
+++ b/youtube_dl/extractor/wayofthemaster.py
@@ -0,0 +1,52 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class WayOfTheMasterIE(InfoExtractor):
    _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
    _TEST = {
        'url': 'http://www.wayofthemaster.com/hbks.shtml',
        'md5': '5316b57487ada8480606a93cb3d18d24',
        'info_dict': {
            'id': 'hbks',
            'ext': 'mp4',
            'title': 'Intelligent Design vs. Evolution',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._search_regex(
            r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
            webpage, 'title', default=None)
        if title is None:
            title = self._html_search_regex(
                r'<title>(.*?)</title>', webpage, 'page title')
        url_base = self._search_regex(
            r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
            webpage, 'URL base')
        formats = [{
            'format_id': 'low',
            'quality': 1,
            'url': url_base + '_low.mp4',
        }, {
            'format_id': 'high',
            'quality': 2,
            'url': url_base + '_high.mp4',
        }]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
        }
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -14,7 +14,7 @@ from ..utils import (
 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
-    _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
    _TESTS = [
        {
            'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -37,6 +37,7 @@ from ..utils import (
 class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
    _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
    _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
    _NETRC_MACHINE = 'youtube'
@@ -50,12 +51,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            fatal=False))
    def _login(self):
        """
        Attempt to log in to YouTube.
        True is returned if successful or skipped.
        False is returned if login failed.
        If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
        """
        (username, password) = self._get_login_info()
        # No authentication to be performed
        if username is None:
            if self._LOGIN_REQUIRED:
                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
-            return False
+            return True
        login_page = self._download_webpage(
            self._LOGIN_URL, None,
@@ -73,6 +81,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                u'Email': username,
                u'GALX': galx,
                u'Passwd': password,
                u'PersistentCookie': u'yes',
                u'_utf8': u'霱',
                u'bgresponse': u'js_disabled',
@@ -88,6 +97,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                u'uilel': u'3',
                u'hl': u'en_US',
        }
        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
@@ -99,6 +109,68 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            note=u'Logging in', errnote=u'unable to log in', fatal=False)
        if login_results is False:
            return False
        if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
            raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
        # Two-Factor
        # TODO add SMS and phone call support - these require making a request and then prompting the user
        if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
            tfa_code = self._get_tfa_info()
            if tfa_code is None:
                self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
                self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                return False
            # Unlike the first login form, secTok and timeStmp are both required for the TFA form
            match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
            if match is None:
                self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
            secTok = match.group(1)
            match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
            if match is None:
                self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
            timeStmp = match.group(1)
            tfa_form_strs = {
                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
                u'smsToken': u'',
                u'smsUserPin': tfa_code,
                u'smsVerifyPin': u'Verify',
                u'PersistentCookie': u'yes',
                u'checkConnection': u'',
                u'checkedDomains': u'youtube',
                u'pstMsg': u'1',
                u'secTok': secTok,
                u'timeStmp': timeStmp,
                u'service': u'youtube',
                u'hl': u'en_US',
            }
            tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
            tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
            tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
            tfa_results = self._download_webpage(
                tfa_req, None,
                note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
            if tfa_results is False:
                return False
            if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
                self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
                return False
            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
                self._downloader.report_warning(u'unable to log in - did the page structure change?')
                return False
            if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
                self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
                return False
        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
            self._downloader.report_warning(u'unable to log in: bad username or password')
            return False
--- a/youtube_dl/postprocessor/init.py
+++ b/youtube_dl/postprocessor/init.py
@@ -9,6 +9,7 @@ from .ffmpeg import (
    FFmpegEmbedSubtitlePP,
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
 __all__ = [
    'AtomicParsleyPP',
@@ -19,4 +20,5 @@ __all__ = [
    'FFmpegExtractAudioPP',
    'FFmpegEmbedSubtitlePP',
    'XAttrMetadataPP',
    'ExecAfterDownloadPP',
 ]
--- a/youtube_dl/postprocessor/execafterdownload.py
+++ b/youtube_dl/postprocessor/execafterdownload.py
@@ -0,0 +1,31 @@
 from __future__ import unicode_literals
 import subprocess
 from .common import PostProcessor
 from ..utils import (
    shlex_quote,
    PostProcessingError,
 )
 class ExecAfterDownloadPP(PostProcessor):
    def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
        self.verboseOutput = verboseOutput
        self.exec_cmd = exec_cmd
    def run(self, information):
        cmd = self.exec_cmd
        if not '{}' in cmd:
            cmd += ' {}'
        cmd = cmd.replace('{}', shlex_quote(information['filepath']))
        self._downloader.to_screen("[exec] Executing command: %s" % cmd)
        retCode = subprocess.call(cmd, shell=True)
        if retCode != 0:
            raise PostProcessingError(
                'Command returned error code %d' % retCode)
        return None, information  # by default, keep file and do nothing
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -192,6 +192,13 @@ try:
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 try:
    from shlex import quote as shlex_quote
 except ImportError:  # Python < 3.3
    def shlex_quote(s):
        return "'" + s.replace("'", "'\"'\"'") + "'"
 def compat_ord(c):
    if type(c) is int: return c
    else: return ord(c)
@@ -759,10 +766,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
        return ret
    def http_request(self, req):
-        for h,v in std_headers.items():
+        for h, v in std_headers.items():
-            if h in req.headers:
+            if h not in req.headers:
-                del req.headers[h]
+                req.add_header(h, v)
            req.add_header(h, v)
        if 'Youtubedl-no-compression' in req.headers:
            if 'Accept-encoding' in req.headers:
                del req.headers['Accept-encoding']
@@ -855,6 +861,7 @@ def unified_strdate(date_str):
        '%Y/%m/%d',
        '%d.%m.%Y',
        '%d/%m/%Y',
        '%d/%m/%y',
        '%Y/%m/%d %H:%M:%S',
        '%Y-%m-%d %H:%M:%S',
        '%d.%m.%Y %H:%M',
@@ -1285,6 +1292,12 @@ def remove_start(s, start):
    return s
 def remove_end(s, end):
    if s.endswith(end):
        return s[:-len(end)]
    return s
 def url_basename(url):
    path = compat_urlparse.urlparse(url).path
    return path.strip(u'/').split(u'/')[-1]
@@ -1324,7 +1337,7 @@ def parse_duration(s):
        return None
    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
+        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
@@ -1332,6 +1345,8 @@ def parse_duration(s):
        res += int(m.group('mins')) * 60
        if m.group('hours'):
            res += int(m.group('hours')) * 60 * 60
    if m.group('ms'):
        res += float(m.group('ms'))
    return res
@@ -1442,6 +1457,12 @@ def urlencode_postdata(*args, **kargs):
    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
 try:
    etree_iter = xml.etree.ElementTree.Element.iter
 except AttributeError:  # Python <=2.6
    etree_iter = lambda n: n.findall('.//*')
 def parse_xml(s):
    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
        def doctype(self, name, pubid, system):
@@ -1449,7 +1470,14 @@ def parse_xml(s):
    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
-    return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
    # Fix up XML parser in Python 2.x
    if sys.version_info < (3, 0):
        for n in etree_iter(tree):
            if n.text is not None:
                if not isinstance(n.text, compat_str):
                    n.text = n.text.decode('utf-8')
    return tree
 if sys.version_info < (3, 0) and sys.platform == 'win32':
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.08.22.1'
+__version__ = '2014.08.27.1'
Author	SHA1	Message	Date
Philipp Hagemeister	a7680bf330	release 2014.08.27.1	2014-08-27 02:37:23 +02:00
Philipp Hagemeister	6d3d3fc083	[ard] Add suppor for plain ARD downloads (Fixes #3546 )	2014-08-27 02:36:57 +02:00
Philipp Hagemeister	aff216edf4	[generic] Prevent <video> search from skipping over empty sources (#3546 )	2014-08-27 02:09:59 +02:00
Philipp Hagemeister	1cb6dcdbbe	[generic] Do not download images as videos by accident	2014-08-27 02:07:11 +02:00
Philipp Hagemeister	3f514a353e	release 2014.08.27	2014-08-27 01:44:54 +02:00
Philipp Hagemeister	da9ec3b932	[muscivault] Add extractor (Fixes #3593 )	2014-08-27 01:44:47 +02:00
Philipp Hagemeister	191b7cbba9	[mfs] Modernize	2014-08-27 01:04:32 +02:00
Philipp Hagemeister	e8c59b9642	release 2014.08.26	2014-08-26 21:30:52 +02:00
Philipp Hagemeister	6abb066128	[sockshare] Fix title extraction (Fixes #3592 )	2014-08-26 21:30:30 +02:00
Philipp Hagemeister	8f1ea7cbb6	[empflix] Revert to XML parser Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore). Instead, use the transform function we already have :) This partially reverts commit `c7bee2a725`.	2014-08-26 15:51:42 +02:00
Jaime Marquínez Ferrándiz	a204c85408	[ign] Fix extraction of video in articles	2014-08-26 15:38:29 +02:00
Sergey M․	15a1f4b8fe	[empflix] Extract thumbnail	2014-08-26 20:10:36 +07:00
Sergey M․	c7bee2a725	[empflix] Adapt to malformed config XML	2014-08-26 20:07:28 +07:00
Jaime Marquínez Ferrándiz	dbc1366b50	[mixcloud] Use a HEAD request when checking if the url is valid	2014-08-26 14:55:15 +02:00
Philipp Hagemeister	704df56da7	[sportdeutschland] add new extractor	2014-08-26 12:51:13 +02:00
Philipp Hagemeister	33ac271ba7	[utils] Let request headers override standard headers What was I thinking when writing this?	2014-08-26 11:51:48 +02:00
Philipp Hagemeister	0963f92f23	[eighttracks] modernize	2014-08-26 11:31:23 +02:00
Philipp Hagemeister	9a66c1079c	release 2014.08.25.3	2014-08-25 18:38:10 +02:00
Philipp Hagemeister	f971dcbba0	Merge branch 'master' of github.com:rg3/youtube-dl	2014-08-25 18:36:42 +02:00
Philipp Hagemeister	0990305d2a	[generic] Fix rss under Python 2.x and move test to extractor	2014-08-25 18:03:01 +02:00
Jaime Marquínez Ferrándiz	bcc069a937	[generic] Remove debug statement	2014-08-25 17:21:58 +02:00
Jaime Marquínez Ferrándiz	34708e1bb6	[bliptv] Remove superfluous characters in _VALID_URL regex	2014-08-25 17:16:11 +02:00
Philipp Hagemeister	829476b80a	[googlesearch] Move test to extractor	2014-08-25 17:02:52 +02:00
Philipp Hagemeister	1dd70fe330	release 2014.08.25.2	2014-08-25 16:52:28 +02:00
Philipp Hagemeister	067e922295	release 2014.08.25.1	2014-08-25 16:41:05 +02:00
Sergey M․	c28df2478f	[wat] Use server time and pass country argument (Closes #3579 )	2014-08-25 20:21:33 +07:00
Philipp Hagemeister	241f7a8ade	Merge remote-tracking branch 'JGjorgji/fix-leading-zeroes'	2014-08-25 13:59:19 +02:00
Philipp Hagemeister	b252735910	[extractor/common] Generate better f4m format IDs	2014-08-25 13:03:08 +02:00
Philipp Hagemeister	7adcbe7594	[rtlnl] Extract duration	2014-08-25 12:59:53 +02:00
Philipp Hagemeister	8d31fa3cce	[execafterdownload] Simplify (#3569 )	2014-08-25 10:18:01 +02:00
Philipp Hagemeister	1f06864e9a	[wat] Remove unused import	2014-08-25 10:15:32 +02:00
Philipp Hagemeister	348ae0a79e	Merge remote-tracking branch 'mcd1992/exec_after_download'	2014-08-25 09:44:11 +02:00
Philipp Hagemeister	528d455632	release 2014.08.25	2014-08-25 09:35:46 +02:00
Philipp Hagemeister	ba5d51b340	[vimeo] Always pass in referer (Fixes #3582 )	2014-08-25 09:35:37 +02:00
mcd1992	7833d941bb	Rebased with upstream/master	2014-08-24 15:04:50 -05:00
mcd1992	a2360a4c80	Moved from os.system to subprocess.call	2014-08-24 14:38:43 -05:00
mcd1992	a7cacbca2b	Implemented --exec option.	2014-08-24 14:38:43 -05:00
Gjorgji Jankovski	c6b4132a0a	renamed for consistency	2014-08-24 18:49:04 +02:00
Gjorgji Jankovski	ad260c90ab	Filenames are padded according to the playlist length	2014-08-24 18:23:32 +02:00
Philipp Hagemeister	b8313f07bc	release 2014.08.24.6	2014-08-24 15:19:33 +02:00
Philipp Hagemeister	92a17d28ac	[wat] Make geolock a warning (Fixes #3579 )	2014-08-24 15:19:21 +02:00
Philipp Hagemeister	5f90042bd6	[generic] remove unused imports	2014-08-24 14:28:58 +02:00
Philipp Hagemeister	9480d1a566	Merge remote-tracking branch 'riking/twofactor'	2014-08-24 07:14:23 +02:00
Philipp Hagemeister	36b0079f23	Credit @olebowle for GameOne:playlist (#3247 )	2014-08-24 07:06:54 +02:00
Philipp Hagemeister	28028629b9	[gameone:playlist] Move test to extractor	2014-08-24 07:05:49 +02:00
Philipp Hagemeister	11f75cac3d	Merge remote-tracking branch 'olebowle/gameone'	2014-08-24 07:02:29 +02:00
Philipp Hagemeister	e673db0194	release 2014.08.24.5	2014-08-24 06:58:47 +02:00
Philipp Hagemeister	ebab4520ff	[generic] Use default opener for HEAD request (Fixes #3528 )	2014-08-24 06:58:11 +02:00
Philipp Hagemeister	a71d1414eb	release 2014.08.24.4	2014-08-24 06:42:05 +02:00
Philipp Hagemeister	423817c468	[expotv] Add new extractor (Fixes #3552 )	2014-08-24 06:41:55 +02:00
Philipp Hagemeister	51ed9fce09	[pornotube] Modernize	2014-08-24 06:16:24 +02:00
Philipp Hagemeister	d43aeb1d00	release 2014.08.24.3	2014-08-24 05:32:31 +02:00
Philipp Hagemeister	4d805e063c	[generic] Automatic detection of flow player and age_limit (Fixes #3576 )	2014-08-24 05:31:32 +02:00
Philipp Hagemeister	24e5e24166	release 2014.08.24.2	2014-08-24 04:47:38 +02:00
Philipp Hagemeister	4d54ef20a2	[ministrygrid] Add extractor (Fixes #2900 )	2014-08-24 04:47:28 +02:00
Philipp Hagemeister	54036b3991	[wayofthemaster] Remove unused import	2014-08-24 04:18:09 +02:00
Philipp Hagemeister	e5402ac120	[wayofthemaster] Add extractor (Fixes #3575 )	2014-08-24 04:14:02 +02:00
Philipp Hagemeister	f56f8399c7	[ebaumsworld] Remove spurious determine_ext	2014-08-24 03:37:19 +02:00
Philipp Hagemeister	cf0c5fa3a1	[ebaumsworld] Modernize	2014-08-24 03:31:38 +02:00
Philipp Hagemeister	8c2ccefae6	release 2014.08.24.1	2014-08-24 03:20:40 +02:00
Philipp Hagemeister	1f8b6af773	[bip.tv] Allow underscore in lookup ids (Fixes #3573 )	2014-08-24 03:20:31 +02:00
Philipp Hagemeister	8f9b683eeb	[blip.tv] Add legacy test case This was broken in the mean time, so add a test case to make sure it doesn't break silently again.	2014-08-24 03:13:58 +02:00
Philipp Hagemeister	b5f4775b38	[arte.tv:creative] Fix test case	2014-08-24 03:11:00 +02:00
Philipp Hagemeister	01d906ffe9	[arte:creative] Support more URLs (fixes #3572 )	2014-08-24 02:57:32 +02:00
Philipp Hagemeister	614582bcc4	release 2014.08.24	2014-08-24 02:44:36 +02:00
Philipp Hagemeister	e1ab5000b2	[brightcove] Add support for videoId= in og:video meta (Fixes #3571 )	2014-08-24 02:41:21 +02:00
Philipp Hagemeister	a5ed3e571e	[brightcove] Detect geoblocking	2014-08-24 02:40:26 +02:00
Philipp Hagemeister	10eaeb20c5	[generic] Require og:video URLs to contain a dot	2014-08-24 02:29:56 +02:00
Philipp Hagemeister	fa8deaf38b	[generic] Prevent from downloading a .swf as a video We're seeing quite a number of people who do not put a video file in the og:video field, but the player URL. Try to detect some of these and filter them out.	2014-08-24 02:24:49 +02:00
Philipp Hagemeister	6857590059	[brightcove] Add a truncated URL warning message (#3571 )	2014-08-24 02:11:26 +02:00
Philipp Hagemeister	a3db22ebdf	[grooveshark] Use proper imports	2014-08-24 02:06:59 +02:00
Philipp Hagemeister	c8e9a235d9	[generic] Add support for camtasia videos (Fixes #3574 )	2014-08-24 02:02:17 +02:00
Philipp Hagemeister	30b871b0ca	Merge remote-tracking branch 'origin/master'	2014-08-24 01:34:28 +02:00
Philipp Hagemeister	eb9da9b732	[grooveshark] Fix test md5sum	2014-08-24 01:33:55 +02:00
Philipp Hagemeister	d769be6c96	[grooveshark,http] Make HTTP POST downloads work	2014-08-24 01:31:35 +02:00
Sergey M․	a54bda3ae2	[wat] Add support for SD and HD videos (Closes #3558 )	2014-08-24 02:22:10 +07:00
Philipp Hagemeister	00558d9414	Merge remote-tracking branch 'sehrgut/Grooveshark' Conflicts: youtube_dl/__init__.py youtube_dl/extractor/__init__.py	2014-08-23 16:41:14 +02:00
Philipp Hagemeister	49f3c16543	release 2014.08.23	2014-08-23 15:24:31 +02:00
Philipp Hagemeister	2ef6fcb5d8	[sbs] Add new extractor (Fixes #3566 )	2014-08-23 15:20:56 +02:00
Philipp Hagemeister	38fc045253	[rtlnl] Remove unused code	2014-08-23 15:05:21 +02:00
Philipp Hagemeister	af1fd929c6	[patreon] Remove unused import	2014-08-23 15:04:11 +02:00
Philipp Hagemeister	b7b04c9234	[vodlocker] Allow title to end with a <br>	2014-08-23 14:39:47 +02:00
Sergey M․	bc0bb6fd30	[movieclips] Add extractor (Closes #3554 )	2014-08-23 17:44:56 +07:00
Philipp Hagemeister	430826c9d4	Merge pull request #3568 from MikeCol/xhamster_load changed _VALID_URL to allow for country specific subdomains	2014-08-22 22:46:42 +02:00
MikeCol	68909f0c4e	changed _VALID_URL to allow for country specific prefixes	2014-08-22 22:17:07 +02:00
Philipp Hagemeister	9d048a17d8	[rtve.es:live] Start supporting the 24h channel	2014-08-22 18:47:49 +02:00
Philipp Hagemeister	492641d10a	release 2014.08.22.3	2014-08-22 18:41:43 +02:00
Philipp Hagemeister	2b9faf5542	[rtve] Add support for live stream At the moment, only RTVE-1 seems to work flawlessly. -2 seems geoblocked right now. -TDP doesn't seem to be available outside of Spain.	2014-08-22 18:40:28 +02:00
Philipp Hagemeister	ed2d6a1960	[generic] Simplify playlist support (#2948 )	2014-08-22 18:19:56 +02:00
Philipp Hagemeister	be843678b1	[YouTubeDL] Correct handling of age_limit = None in result	2014-08-22 17:46:57 +02:00
Philipp Hagemeister	c71dfccc98	Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url' Conflicts: youtube_dl/extractor/generic.py	2014-08-22 17:40:36 +02:00
Philipp Hagemeister	1a9ccac7c1	Merge remote-tracking branch 'origin/master'	2014-08-22 17:38:11 +02:00
Philipp Hagemeister	e330d59abb	[playfm] Add extractor (Fixes #3538 )	2014-08-22 17:38:06 +02:00
Sergey M․	394df6d7d0	[nuvid] Adapt to latest layout changes	2014-08-22 21:41:51 +07:00
Philipp Hagemeister	218f754940	[README] Add thumbnail to _TEST example While it's not mandatory, extractors are highly encouraged to provide a thumbnail field.	2014-08-22 11:30:49 +02:00
Philipp Hagemeister	a053c3493a	[test_YoutubeDL] Reorder formats (#3542 )	2014-08-22 03:44:30 +02:00
Philipp Hagemeister	50b294aab8	release 2014.08.22.2	2014-08-22 03:16:16 +02:00
Philipp Hagemeister	756b046f3e	[pbs] recognize class=partnerPlayer as well (Fixes #3564 )	2014-08-22 03:16:08 +02:00
riking	165250ff5e	Remove debug prints	2014-08-16 14:49:30 -07:00
riking	83317f6938	[youtube] Add two-factor account signin (TOTP only) Additional work is required to prompt the user for the SMS or phone call codes, as there is no framework currently to prompt the user during an extraction operation. Fixes #3533	2014-08-16 14:48:17 -07:00
Ole Ernst	8c778adc39	[gameone] simplify playlist extractor	2014-07-23 10:00:50 +02:00
Ole Ernst	71b6065009	[gameone] add playlist test	2014-07-23 09:32:01 +02:00
Ole Ernst	c065fd35ae	[gameone] add playlist capability	2014-07-13 12:16:25 +02:00
anovicecodemonkey	37e3cbe22e	Move duplicate check to generic.py	2014-06-01 01:16:35 +09:30
anovicecodemonkey	610134730a	Add a _TEST_	2014-05-21 19:25:37 +09:30
anovicecodemonkey	212a5e28ba	Add a duplicate check to /extractor/common.py playlist_result function	2014-05-21 19:04:55 +09:30
Keith Beckman	ee1a7032d5	Fixed errors found by travisci: py26: re.split can't take flags. use inline flags or re.compile py27: info_dict must be serializable. remove request object py335, py34: no urlparse module. use utils.compat_urlparse	2014-05-20 22:28:32 -04:00
Keith Beckman	7ed806d241	Fixed pyflakes and pep8 warnings	2014-05-20 02:55:21 -04:00
Keith Beckman	dd06c95e43	Added new IE for Grooveshark	2014-05-20 02:47:34 -04:00
anovicecodemonkey	3442b30ab2	[generic] Support data-video-url for YouTube embeds (Fixes #2862 )	2014-05-18 23:15:09 +09:30
`@@ -1,2 +1,2 @@`

	`__version__ = '2014.08.22.1'`	`__version__ = '2014.08.27.1'`