release 2014.02.10

[youtube] Correct a minor regex typo
Fix #2355 (date parsing with dashes)
2014-02-10 02:01:11 +01:00 · 2014-02-10 01:30:47 +01:00 · 2014-02-09 18:09:57 +01:00 · 2014-02-09 17:56:10 +01:00 · 2014-02-09 14:23:19 +01:00 · 2014-02-09 14:11:45 +01:00
48 changed files with 1457 additions and 254 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 # Allow direct execution
 import os
 import sys
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    JustinTVIE,
    PBSIE,
    YoutubeIE,
 )
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_youtube_playlist_matching(self):
        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
-        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
-        assertPlaylist(u'PL63F0C78739B09958')
+        assertPlaylist('PL63F0C78739B09958')
-        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
-        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
+        self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
        # Top tracks
        assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
    def test_youtube_matching(self):
-        self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
+        self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
-        self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
    def test_youtube_extract(self):
-        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
    def test_facebook_matching(self):
-        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
    def test_no_duplicates(self):
        ies = gen_extractors()
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
    def test_pbs(self):
        # https://github.com/rg3/youtube-dl/issues/2350
        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_download.py
+++ b/test/test_download.py
@ -22,6 +22,7 @@ import socket
 import youtube_dl.YoutubeDL
 from youtube_dl.utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_HTTPError,
@ -110,7 +111,7 @@ def generator(test_case):
                    ydl.download([test_case['url']])
                except (DownloadError, ExtractorError) as err:
                    # Check if the exception is not a network related one
-                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
+                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                        raise
                    if try_num == RETRIES:
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -34,6 +34,8 @@ from youtube_dl.extractor import (
    KhanAcademyIE,
    EveryonesMixtapeIE,
    RutubeChannelIE,
    GoogleSearchIE,
    GenericIE,
 )
@ -229,6 +231,24 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], '1409')
        self.assertTrue(len(result['entries']) >= 34)
    def test_multiple_brightcove_videos(self):
        # https://github.com/rg3/youtube-dl/issues/2283
        dl = FakeYDL()
        ie = GenericIE(dl)
        result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
        self.assertEqual(len(result['entries']), 3)
    def test_GoogleSearch(self):
        dl = FakeYDL()
        ie = GoogleSearchIE(dl)
        result = ie.extract('gvsearch15:python language')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'python language')
        self.assertEqual(result['title'], 'python language')
        self.assertTrue(len(result['entries']) == 15)
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -14,6 +14,7 @@ from youtube_dl.extractor import (
    YoutubeIE,
    DailymotionIE,
    TEDIE,
    VimeoIE,
 )
@ -223,5 +224,60 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
        self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
 class TestVimeoSubtitles(BaseTestSubtitles):
    url = 'http://vimeo.com/76979871'
    IE = VimeoIE
    def test_no_writesubtitles(self):
        subtitles = self.getSubtitles()
        self.assertEqual(subtitles, None)
    def test_subtitles(self):
        self.DL.params['writesubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
    def test_subtitles_lang(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['subtitleslangs'] = ['fr']
        subtitles = self.getSubtitles()
        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
    def test_allsubtitles(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
    def test_list_subtitles(self):
        self.DL.expect_warning(u'Automatic Captions not supported by this server')
        self.DL.params['listsubtitles'] = True
        info_dict = self.getInfoDict()
        self.assertEqual(info_dict, None)
    def test_automatic_captions(self):
        self.DL.expect_warning(u'Automatic Captions not supported by this server')
        self.DL.params['writeautomaticsub'] = True
        self.DL.params['subtitleslang'] = ['en']
        subtitles = self.getSubtitles()
        self.assertTrue(len(subtitles.keys()) == 0)
    def test_nosubtitles(self):
        self.DL.expect_warning(u'video doesn\'t have subtitles')
        self.url = 'http://vimeo.com/56015672'
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles), 0)
    def test_multiple_langs(self):
        self.DL.params['writesubtitles'] = True
        langs = ['es', 'fr', 'de']
        self.DL.params['subtitleslangs'] = langs
        subtitles = self.getSubtitles()
        for lang in langs:
            self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(unified_strdate('8/7/2009'), '20090708')
        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
        self.assertEqual(unified_strdate('1968-12-10'), '19681210')
    def test_find_xpath_attr(self):
        testxml = u'''<root>
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], 'ytdl test PL')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
        self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
    def test_youtube_playlist_noplaylist(self):
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
        self.assertEqual(result['_type'], 'url')
-        self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
+        self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
    def test_issue_673(self):
        dl = FakeYDL()
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
        self.assertFalse('pElCt5oNDuI' in ytie_results)
        self.assertFalse('KdPEApIVdWM' in ytie_results)
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
        # TODO find a > 100 (paginating?) videos course
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        entries = result['entries']
-        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
+        self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
        self.assertEqual(len(entries), 25)
-        self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
+        self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
    def test_youtube_channel(self):
        dl = FakeYDL()
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
        original_video = entries[0]
        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
    def test_youtube_toptracks(self):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
        entries = result['entries']
        self.assertEqual(len(entries), 100)
    def test_youtube_toplist(self):
        dl = FakeYDL()
        ie = YoutubeTopListIE(dl)
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -27,6 +27,12 @@ _TESTS = [
        85,
        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
    ),
    (
        u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
        u'js',
        90,
        u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
    ),
 ]
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -41,6 +41,7 @@ __authors__  = (
    'Chris Gahan',
    'Saimadhav Heblikar',
    'Mike Col',
    'Andreas Schmitz',
 )
 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
        url = info_dict['url']
        player_url = info_dict.get('player_url', None)
        page_url = info_dict.get('page_url', None)
        app = info_dict.get('app', None)
        play_path = info_dict.get('play_path', None)
        tc_url = info_dict.get('tc_url', None)
        flash_version = info_dict.get('flash_version', None)
        live = info_dict.get('rtmp_live', False)
        conn = info_dict.get('rtmp_conn', None)
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
            basic_args += ['--pageUrl', page_url]
        if app is not None:
            basic_args += ['--app', app]
        if play_path is not None:
            basic_args += ['--playpath', play_path]
        if tc_url is not None:
            basic_args += ['--tcUrl', url]
        if test:
            basic_args += ['--stop', '1']
        if flash_version is not None:
            basic_args += ['--flashVer', flash_version]
        if live:
            basic_args += ['--live']
        if conn:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -15,6 +15,7 @@ from .arte import (
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
@ -54,12 +56,14 @@ from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .firstpost import FirstpostIE
 from .fktv import (
    FKTVIE,
    FKTVPosteckeIE,
@ -96,10 +100,12 @@ from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
 from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .ivi import (
    IviIE,
    IviCompilationIE
 )
 from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
@ -109,6 +115,7 @@ from .keezmovies import KeezMoviesIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .kontrtube import KontrTubeIE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
@ -117,6 +124,7 @@ from .lynda import (
    LyndaIE,
    LyndaCourseIE
 )
 from .m6 import M6IE
 from .macgamestore import MacGameStoreIE
 from .malemotion import MalemotionIE
 from .mdr import MDRIE
@ -126,6 +134,7 @@ from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
@ -137,8 +146,10 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .ndr import NDRIE
 from .ndtv import NDTVIE
 from .newgrounds import NewgroundsIE
 from .nfb import NFBIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -0,0 +1,217 @@
 from __future__ import unicode_literals
 import re
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import ExtractorError
 class BBCCoUkIE(SubtitlesInfoExtractor):
    IE_NAME = 'bbc.co.uk'
    IE_DESC = 'BBC iPlayer'
    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
    _TESTS = [
        {
            'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
            'info_dict': {
                'id': 'p01q7wz4',
                'ext': 'flv',
                'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
                'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
                'duration': 1936,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            }
        },
        {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
            'info_dict': {
                'id': 'b00yng1d',
                'ext': 'flv',
                'title': 'The Man in Black: Series 3: The Printed Name',
                'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
                'duration': 1800,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            }
        },
        {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
            'info_dict': {
                'id': 'b00yng1d',
                'ext': 'flv',
                'title': 'The Voice UK: Series 3: Blind Auditions 5',
                'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
                'duration': 5100,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
        }
    ]
    def _extract_asx_playlist(self, connection, programme_id):
        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
        return [ref.get('href') for ref in asx.findall('./Entry/ref')]
    def _extract_connection(self, connection, programme_id):
        formats = []
        protocol = connection.get('protocol')
        supplier = connection.get('supplier')
        if protocol == 'http':
            href = connection.get('href')
            # ASX playlist
            if supplier == 'asx':
                for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
                    formats.append({
                        'url': ref,
                        'format_id': 'ref%s_%s' % (i, supplier),
                    })
            # Direct link
            else:
                formats.append({
                    'url': href,
                    'format_id': supplier,
                })
        elif protocol == 'rtmp':
            application = connection.get('application', 'ondemand')
            auth_string = connection.get('authString')
            identifier = connection.get('identifier')
            server = connection.get('server')
            formats.append({
                'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
                'play_path': identifier,
                'app': '%s?%s' % (application, auth_string),
                'page_url': 'http://www.bbc.co.uk',
                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
                'rtmp_live': False,
                'ext': 'flv',
                'format_id': supplier,
            })
        return formats
    def _extract_items(self, playlist):
        return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
    def _extract_medias(self, media_selection):
        return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
    def _extract_connections(self, media):
        return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
    def _extract_video(self, media, programme_id):
        formats = []
        vbr = int(media.get('bitrate'))
        vcodec = media.get('encoding')
        service = media.get('service')
        width = int(media.get('width'))
        height = int(media.get('height'))
        file_size = int(media.get('media_file_size'))
        for connection in self._extract_connections(media):
            conn_formats = self._extract_connection(connection, programme_id)
            for format in conn_formats:
                format.update({
                    'format_id': '%s_%s' % (service, format['format_id']),
                    'width': width,
                    'height': height,
                    'vbr': vbr,
                    'vcodec': vcodec,
                    'filesize': file_size,
                })
            formats.extend(conn_formats)
        return formats
    def _extract_audio(self, media, programme_id):
        formats = []
        abr = int(media.get('bitrate'))
        acodec = media.get('encoding')
        service = media.get('service')
        for connection in self._extract_connections(media):
            conn_formats = self._extract_connection(connection, programme_id)
            for format in conn_formats:
                format.update({
                    'format_id': '%s_%s' % (service, format['format_id']),
                    'abr': abr,
                    'acodec': acodec,
                })
            formats.extend(conn_formats)
        return formats
    def _extract_captions(self, media, programme_id):
        subtitles = {}
        for connection in self._extract_connections(media):
            captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
            ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
            srt = ''
            for pos, p in enumerate(ps):
                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
                                                          p.text.strip() if p.text is not None else '')
            subtitles[lang] = srt
        return subtitles
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        group_id = mobj.group('id')
        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
            'Downloading playlist XML')
        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
        if no_items is not None:
            reason = no_items.get('reason')
            if reason == 'preAvailability':
                msg = 'Episode %s is not yet available' % group_id
            elif reason == 'postAvailability':
                msg = 'Episode %s is no longer available' % group_id
            else:
                msg = 'Episode %s is not available: %s' % (group_id, reason)
            raise ExtractorError(msg, expected=True)
        formats = []
        subtitles = None
        for item in self._extract_items(playlist):
            kind = item.get('kind')
            if kind != 'programme' and kind != 'radioProgramme':
                continue
            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
            programme_id = item.get('identifier')
            duration = int(item.get('duration'))
            media_selection = self._download_xml(
                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'  % programme_id,
                programme_id, 'Downloading media selection XML')
            for media in self._extract_medias(media_selection):
                kind = media.get('kind')
                if kind == 'audio':
                    formats.extend(self._extract_audio(media, programme_id))
                elif kind == 'video':
                    formats.extend(self._extract_video(media, programme_id))
                elif kind == 'captions':
                    subtitles = self._extract_captions(media, programme_id)
        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(programme_id, subtitles)
            return
        self._sort_formats(formats)
        return {
            'id': programme_id,
            'title': title,
            'description': description,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -1,19 +1,14 @@
 from __future__ import unicode_literals
 import datetime
 import json
 import re
 import socket
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_request,
    ExtractorError,
    unescapeHTML,
 )
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        ooyala_url = self._twitter_search_player(webpage)
+        embed_code = self._search_regex(
-        return self.url_result(ooyala_url, OoyalaIE.ie_key())
+            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
            'embed code')
        return OoyalaIE._build_url_result(embed_code)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
    @classmethod
    def _extract_brightcove_url(cls, webpage):
-        """Try to extract the brightcove url from the wepbage, returns None
+        """Try to extract the brightcove url from the webpage, returns None
        if it can't be found
        """
        urls = cls._extract_brightcove_urls(webpage)
        return urls[0] if urls else None
    @classmethod
    def _extract_brightcove_urls(cls, webpage):
        """Return a list of all Brightcove URLs from the webpage """
        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
        if url_m:
-            return url_m.group(1)
+            return [url_m.group(1)]
-        m_brightcove = re.search(
+        matches = re.findall(
            r'''(?sx)<object
            (?:
-                [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+                [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
            ).+?</object>''',
            webpage)
-        if m_brightcove is not None:
+        return [cls._build_brighcove_url(m) for m in matches]
            return cls._build_brighcove_url(m_brightcove.group())
        else:
            return None
    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
    '''
    IE_DESC = 'Channel 9'
    IE_NAME = 'channel9'
-    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
    _TESTS = [
        {
            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
            'info_dict': {
                'id': 'Events/TechEd/Australia/2013/KOS002',
                'ext': 'mp4',
                'title': 'Developer Kick-Off Session: Stuff We Love',
                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                'duration': 4576,
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
        },
        {
            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
            'info_dict': {
                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
                'ext': 'mp4',
                'title': 'Self-service BI with Power BI - nuclear testing',
                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                'duration': 1540,
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@ -0,0 +1,97 @@
 from __future__ import unicode_literals
 import re
 import base64
 import json
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    ExtractorError
 )
 class ChilloutzoneIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
    _TESTS = [{
        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
        'md5': 'a76f3457e813ea0037e5244f509e66d1',
        'info_dict': {
            'id': 'enemene-meck-alle-katzen-weg',
            'ext': 'mp4',
            'title': 'Enemene Meck - Alle Katzen weg',
            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
        },
    }, {
        'note': 'Video hosted at YouTube',
        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
        'info_dict': {
            'id': '1YVQaAgHyRU',
            'ext': 'mp4',
            'title': '16 Photos Taken 1 Second Before Disaster',
            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
            'uploader': 'BuzzFeedVideo',
            'uploader_id': 'BuzzFeedVideo',
            'upload_date': '20131105',
        },
    }, {
        'note': 'Video hosted at Vimeo',
        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
        'info_dict': {
            'id': '85523671',
            'ext': 'mp4',
            'title': 'The Sunday Times - Icons',
            'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
            'uploader': 'Us',
            'uploader_id': 'usfilms',
            'upload_date': '20140131'
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        base64_video_info = self._html_search_regex(
            r'var cozVidData = "(.+?)";', webpage, 'video data')
        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
        video_info_dict = json.loads(decoded_video_info)
        # get video information from dict
        video_url = video_info_dict['mediaUrl']
        description = clean_html(video_info_dict.get('description'))
        title = video_info_dict['title']
        native_platform = video_info_dict['nativePlatform']
        native_video_id = video_info_dict['nativeVideoId']
        source_priority = video_info_dict['sourcePriority']
        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
        if native_platform is None:
            youtube_url = self._html_search_regex(
                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
                webpage, 'fallback video URL', default=None)
            if youtube_url is not None:
                return self.url_result(youtube_url, ie='Youtube')
        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
        # the own CDN
        if source_priority == 'native':
            if native_platform == 'youtube':
                return self.url_result(native_video_id, ie='Youtube')
            if native_platform == 'vimeo':
                return self.url_result(
                    'http://vimeo.com/' + native_video_id, ie='Vimeo')
        if not video_url:
            raise ExtractorError('No video found')
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -4,6 +4,7 @@ import json
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class CollegeHumorIE(InfoExtractor):
@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
        'file': '6902724.mp4',
        'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
        'info_dict': {
            'id': '6902724',
            'ext': 'mp4',
            'title': 'Comic-Con Cosplay Catastrophe',
-            'description': 'Fans get creative this year at San Diego.  Too',
+            'description': 'Fans get creative this year',
            'age_limit': 13,
        },
    },
    {
        'url': 'http://www.collegehumor.com/video/3505939/font-conference',
        'file': '3505939.mp4',
        'md5': '72fa701d8ef38664a4dbb9e2ab721816',
        'info_dict': {
            'id': '3505939',
            'ext': 'mp4',
            'title': 'Font Conference',
-            'description': 'This video wasn\'t long enough, so we made it double-spaced.',
+            'description': 'This video wasn\'t long enough,',
            'age_limit': 10,
            'duration': 179,
        },
    },
    # embedded youtube video
@ -38,7 +42,7 @@ class CollegeHumorIE(InfoExtractor):
            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
            'uploader': 'Funnyplox TV',
            'uploader_id': 'funnyploxtv',
-            'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
+            'description': 'md5:11812366244110c3523968aa74f02521',
            'upload_date': '20140128',
        },
        'params': {
@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
                })
        self._sort_formats(formats)
        duration = int_or_none(vdata.get('duration'), 1000)
        return {
            'id': video_id,
            'title': vdata['title'],
@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
            'thumbnail': vdata.get('thumbnail'),
            'formats': formats,
            'age_limit': age_limit,
            'duration': duration,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -271,8 +271,11 @@ class InfoExtractor(object):
    def _download_json(self, url_or_request, video_id,
                       note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata'):
+                       errnote=u'Unable to download JSON metadata',
                       transform_source=None):
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
        if transform_source:
            json_string = transform_source(json_string)
        try:
            return json.loads(json_string)
        except ValueError as ve:
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@ -0,0 +1,58 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class ElPaisIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
    IE_DESC = 'El País'
    _TEST = {
        'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
        'md5': '98406f301f19562170ec071b83433d55',
        'info_dict': {
            'id': 'tiempo-nuevo-recetas-viejas',
            'ext': 'mp4',
            'title': 'Tiempo nuevo, recetas viejas',
            'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
            'upload_date': '20140206',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        prefix = self._html_search_regex(
            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
        video_suffix = self._search_regex(
            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
        video_url = prefix + video_suffix
        thumbnail_suffix = self._search_regex(
            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
            fatal=False)
        thumbnail = (
            None if thumbnail_suffix is None
            else prefix + thumbnail_suffix)
        title = self._html_search_regex(
            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
            webpage, 'title')
        date_str = self._search_regex(
            r'<p class="date-header date-int updated"\s+title="([^"]+)">',
            webpage, 'upload date', fatal=False)
        upload_date = (None if date_str is None else unified_strdate(date_str))
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': self._og_search_description(webpage),
            'thumbnail': thumbnail,
            'upload_date': upload_date,
        }
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -0,0 +1,38 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class FirstpostIE(InfoExtractor):
    IE_NAME = 'Firstpost.com'
    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
    _TEST = {
        'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
        'md5': 'ee9114957692f01fb1263ed87039112a',
        'info_dict': {
            'id': '1025403',
            'ext': 'mp4',
            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
            'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(
            r'<div.*?name="div_video".*?flashvars="([^"]+)">',
            webpage, 'video URL')
        return {
            'id': video_id,
            'url': video_url,
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
        # Look for BrightCove:
-        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
-        if bc_url is not None:
+        if bc_urls:
            self.to_screen('Brightcove video detected.')
-            surl = smuggle_url(bc_url, {'Referer': url})
+            entries = [{
-            return self.url_result(surl, 'Brightcove')
+                '_type': 'url',
                'url': smuggle_url(bc_url, {'Referer': url}),
                'ie_key': 'Brightcove'
            } for bc_url in bc_urls]
            return {
                '_type': 'playlist',
                'title': video_title,
                'id': video_id,
                'entries': entries,
            }
        # Look for embedded (iframe) Vimeo player
        mobj = re.search(
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import itertools
 import re
@ -8,32 +10,42 @@ from ..utils import (
 class GoogleSearchIE(SearchInfoExtractor):
-    IE_DESC = u'Google Video search'
+    IE_DESC = 'Google Video search'
    _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
    _MAX_RESULTS = 1000
-    IE_NAME = u'video.google:search'
+    IE_NAME = 'video.google:search'
    _SEARCH_KEY = 'gvsearch'
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
        entries = []
        res = {
            '_type': 'playlist',
            'id': query,
-            'entries': []
+            'title': query,
        }
-        for pagenum in itertools.count(1):
+        for pagenum in itertools.count():
-            result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
+            result_url = (
-            webpage = self._download_webpage(result_url, u'gvsearch:' + query,
+                'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
-                                             note='Downloading result page ' + str(pagenum))
+                % (compat_urllib_parse.quote_plus(query), pagenum * 10))
-            for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
+            webpage = self._download_webpage(
-                e = {
+                result_url, 'gvsearch:' + query,
                note='Downloading result page ' + str(pagenum + 1))
            for hit_idx, mobj in enumerate(re.finditer(
                    r'<h3 class="r"><a href="([^"]+)"', webpage)):
                # Skip playlists
                if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
                    continue
                entries.append({
                    '_type': 'url',
                    'url': mobj.group(1)
-                }
+                })
                res['entries'].append(e)
-            if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
+            if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
                res['entries'] = entries[:n]
                return res
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@ -1,39 +1,36 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class InaIE(InfoExtractor):
-    """Information Extractor for Ina.fr"""
+    _VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
    _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
    _TEST = {
-        u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+        'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
-        u'file': u'I12055569.mp4',
+        'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
-        u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'I12055569',
-            u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
+            'ext': 'mp4',
            'title': 'François Hollande "Je crois que c\'est clair"',
        }
    }
-    def _real_extract(self,url):
+    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
+        mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
-        video_extension = 'mp4'
+        info_doc = self._download_xml(mrss_url, video_id)
        webpage = self._download_webpage(mrss_url, video_id)
        self.report_extraction(video_id)
-        video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
+        video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
            webpage, u'video URL')
-        video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
+        return {
-            webpage, u'title')
+            'id': video_id,
-
+            'url': video_url,
-        return [{
+            'title': info_doc.find('.//title').text,
-            'id':       video_id,
+        }
            'url':      video_url,
            'ext':      video_extension,
            'title':    video_title,
        }]
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@ -0,0 +1,85 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import re
 from random import random
 from math import floor
 from .common import InfoExtractor
 from ..utils import compat_urllib_request
 class IPrimaIE(InfoExtractor):
    _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
    _TESTS = [{
        'url': 'http://play.iprima.cz/particka/particka-92',
        'info_dict': {
            'id': '39152',
            'ext': 'flv',
            'title': 'Partička (92)',
            'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
            'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
        },
        'params': {
            'skip_download': True,
        },
    },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        webpage = self._download_webpage(url, video_id)
        player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
                         floor(random()*1073741824),
                         floor(random()*1073741824))
        req = compat_urllib_request.Request(player_url)
        req.add_header('Referer', url)
        playerpage = self._download_webpage(req, video_id)
        base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
        zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
        if zoneGEO != '0':
            base_url = base_url.replace('token', 'token_'+zoneGEO)
        formats = []
        for format_id in ['lq', 'hq', 'hd']:
            filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
            if filename == 'null':
                continue
            real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
            if format_id == 'lq':
                quality = 0
            elif format_id == 'hq':
                quality = 1
            elif format_id == 'hd':
                quality = 2
                filename = 'hq/'+filename
            formats.append({
                'format_id': format_id,
                'url': base_url,
                'quality': quality,
                'play_path': 'mp4:'+filename.replace('"', '')[:-4],
                'rtmp_live': True,
                'ext': 'flv',
            })
        self._sort_formats(formats)
        return {
            'id': real_id,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
            'description': self._og_search_description(webpage),
        }
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@ -14,15 +14,16 @@ from ..utils import (
 class IviIE(InfoExtractor):
    IE_DESC = 'ivi.ru'
    IE_NAME = 'ivi'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
    _TESTS = [
        # Single movie
        {
            'url': 'http://www.ivi.ru/watch/53141',
            'file': '53141.mp4',
            'md5': '6ff5be2254e796ed346251d117196cf4',
            'info_dict': {
                'id': '53141',
                'ext': 'mp4',
                'title': 'Иван Васильевич меняет профессию',
                'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
                'duration': 5498,
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
        # Serial's serie
        {
            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
            'file': '74791.mp4',
            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
            'info_dict': {
                'id': '74791',
                'ext': 'mp4',
                'title': 'Дежурный ангел - 1 серия',
                'duration': 2490,
                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
 class IviCompilationIE(InfoExtractor):
    IE_DESC = 'ivi.ru compilations'
    IE_NAME = 'ivi:compilation'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
    def _extract_entries(self, html, compilation_id):
        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
--- a/youtube_dl/extractor/jadorecettepub.py
+++ b/youtube_dl/extractor/jadorecettepub.py
@ -0,0 +1,49 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 class JadoreCettePubIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
    _TEST = {
        'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
        'md5': '401286a06067c70b44076044b66515de',
        'info_dict': {
            'id': 'jLMja3tr7a4',
            'ext': 'mp4',
            'title': 'La pire utilisation de Star Wars',
            'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon.  Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        webpage = self._download_webpage(url, display_id)
        title = self._html_search_regex(
            r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
            webpage, 'title')
        description = self._html_search_regex(
            r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
            fatal=False)
        real_url = self._search_regex(
            r'\[/postlink\](.*)endofvid', webpage, 'video URL')
        video_id = YoutubeIE.extract_id(real_url)
        return {
            '_type': 'url_transparent',
            'url': real_url,
            'id': video_id,
            'title': title,
            'description': description,
        }
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@ -1,5 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
    _TEST = {
-        u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
+        'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
-        u'file': u'5182.mp4',
+        'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
-        u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
+        'info_dict': {
-        u'info_dict': {
+            'id': '5182',
-            u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
+            'ext': 'mp4',
-            u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+            'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
        },
    }
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
        webpage = self._download_webpage(url, title)
        xml_link = self._html_search_regex(
            r'<param name="flashvars" value="config=(.*?)" />',
-            webpage, u'config URL')
+            webpage, 'config URL')
        video_id = self._search_regex(
            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
-            xml_link, u'video ID')
+            xml_link, 'video ID')
        config = self._download_xml(
-            xml_link, title, u'Downloading XML config')
+            xml_link, title, 'Downloading XML config')
        info_json = config.find('format.json').text
        info = json.loads(info_json)['versions'][0]
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@ -0,0 +1,66 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class KontrTubeIE(InfoExtractor):
    IE_NAME = 'kontrtube'
    IE_DESC = 'KontrTube.ru - Труба зовёт'
    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
    _TEST = {
        'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
        'md5': '975a991a4926c9a85f383a736a2e6b80',
        'info_dict': {
            'id': '2678',
            'ext': 'mp4',
            'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
            'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
            'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
            'duration': 270,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id, 'Downloading page')
        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
        title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
            'video title')
        description = self._html_search_meta('description', webpage, 'video description')
        mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
            webpage)
        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
        view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
            'view count', fatal=False)
        view_count = int(view_count) if view_count is not None else None
        comment_count = None
        comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
            fatal=False)
        if comment_str.startswith('комментариев нет'):
            comment_count = 0
        else:
            mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
            if mobj:
                comment_count = int(mobj.group('total'))
        return {
            'id': video_id,
            'url': video_url,
            'thumbnail': thumbnail,
            'title': title,
            'description': description,
            'duration': duration,
            'view_count': view_count,
            'comment_count': comment_count,
        }
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor):
        webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
        video_url = self._html_search_regex(
-            r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
+            r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
        thumbnail = self._html_search_regex(
            r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
--- a/youtube_dl/extractor/m6.py
+++ b/youtube_dl/extractor/m6.py
@ -0,0 +1,56 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class M6IE(InfoExtractor):
    IE_NAME = 'm6'
    _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
    _TEST = {
        'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
        'md5': '242994a87de2c316891428e0176bcb77',
        'info_dict': {
            'id': '11323908',
            'ext': 'mp4',
            'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
            'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
            'duration': 100,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
            'Downloading video RSS')
        title = rss.find('./channel/item/title').text
        description = rss.find('./channel/item/description').text
        thumbnail = rss.find('./channel/item/visuel_clip_big').text
        duration = int(rss.find('./channel/item/duration').text)
        view_count = int(rss.find('./channel/item/nombre_vues').text)
        formats = []
        for format_id in ['lq', 'sd', 'hq', 'hd']:
            video_url = rss.find('./channel/item/url_video_%s' % format_id)
            if video_url is None:
                continue
            formats.append({
                'url': video_url.text,
                'format_id': format_id,
            })
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@ -0,0 +1,114 @@
 from __future__ import unicode_literals
 import re
 import time
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_request,
    compat_urllib_parse,
 )
 class MooshareIE(InfoExtractor):
    IE_NAME = 'mooshare'
    IE_DESC = 'Mooshare.biz'
    _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
    _TESTS = [
        {
            'url': 'http://mooshare.biz/8dqtk4bjbp8g',
            'md5': '4e14f9562928aecd2e42c6f341c8feba',
            'info_dict': {
                'id': '8dqtk4bjbp8g',
                'ext': 'mp4',
                'title': 'Comedy Football 2011 - (part 1-2)',
                'duration': 893,
            },
        },
        {
            'url': 'http://mooshare.biz/aipjtoc4g95j',
            'info_dict': {
                'id': 'aipjtoc4g95j',
                'ext': 'mp4',
                'title': 'Orange Caramel  Dashing Through the Snow',
                'duration': 212,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            }
        }
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id, 'Downloading page')
        if re.search(r'>Video Not Found or Deleted<', page) is not None:
            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
        hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
        title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
        download_form = {
            'op': 'download1',
            'id': video_id,
            'hash': hash_key,
        }
        request = compat_urllib_request.Request(
            'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        self.to_screen('%s: Waiting for timeout' % video_id)
        time.sleep(5)
        video_page = self._download_webpage(request, video_id, 'Downloading video page')
        thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
        duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
        duration = int(duration_str) if duration_str is not None else None
        formats = []
        # SD video
        mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('url'),
                'format_id': 'sd',
                'format': 'SD',
            })
        # HD video
        mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('url'),
                'format_id': 'hd',
                'format': 'HD',
            })
        # rtmp video
        mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('rtmpurl'),
                'play_path': mobj.group('playpath'),
                'rtmp_live': False,
                'ext': 'mp4',
                'format_id': 'rtmp',
                'format': 'HD',
            })
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
            title_el = find_xpath_attr(
                itemdoc, './/{http://search.yahoo.com/mrss/}category',
                'scheme', 'urn:mtvn:video_title')
        if title_el is None:
            title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
        if title_el is None:
            title_el = itemdoc.find('.//title')
            if title_el.text is None:
                title_el = None
        if title_el is None:
            title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
        title = title_el.text
        if title is None:
            raise ExtractorError('Could not find video title')
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@ -0,0 +1,89 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class NDRIE(InfoExtractor):
    IE_NAME = 'ndr'
    IE_DESC = 'NDR.de - Mediathek'
    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
    _TESTS = [
        # video
        {
            'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
            'md5': '20eba151ff165f386643dad9c1da08f7',
            'info_dict': {
                'id': '19925',
                'ext': 'mp4',
                'title': 'Hallo Niedersachsen  ',
                'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
                'duration': 1722,
            },
        },
        # audio
        {
            'url': 'http://www.ndr.de/903/audio191719.html',
            'md5': '41ed601768534dd18a9ae34d84798129',
            'info_dict': {
                'id': '191719',
                'ext': 'mp3',
                'title': '"Es war schockierend"',
                'description': 'md5:ed7ff8364793545021a6355b97e95f10',
                'duration': 112,
            }
        }
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id, 'Downloading page')
        title = self._og_search_title(page)
        description = self._og_search_description(page)
        mobj = re.search(
            r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
            page)
        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
        formats = []
        mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
        if mp3_url:
            formats.append({
                'url': mp3_url.group('audio'),
                'format_id': 'mp3',
            })
        thumbnail = None
        video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
        if video_url:
            thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
                page, 'thumbnail', fatal=False)
            if thumbnail:
                thumbnail = 'http://www.ndr.de' + thumbnail
            for format_id in ['lo', 'hi', 'hq']:
                formats.append({
                    'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
                    'format_id': format_id,
                })
        if not formats:
            raise ExtractorError('No media links available for %s' % video_id)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@ -0,0 +1,93 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_urllib_parse,
 )
 class NFBIE(InfoExtractor):
    IE_NAME = 'nfb'
    IE_DESC = 'National Film Board of Canada'
    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
    _TEST = {
        'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
        'info_dict': {
            'id': 'qallunaat_why_white_people_are_funny',
            'ext': 'mp4',
            'title': 'Qallunaat! Why White People Are Funny ',
            'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
            'duration': 3128,
            'uploader': 'Mark Sandiford',
            'uploader_id': 'mark-sandiford',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
            page, 'director id', fatal=False)
        uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
            page, 'director name', fatal=False)
        request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
            compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
        config = self._download_xml(request, video_id, 'Downloading player config XML')
        title = None
        description = None
        thumbnail = None
        duration = None
        formats = []
        def extract_thumbnail(media):
            thumbnails = {}
            for asset in media.findall('assets/asset'):
                thumbnails[asset.get('quality')] = asset.find('default/url').text
            if not thumbnails:
                return None
            if 'high' in thumbnails:
                return thumbnails['high']
            return list(thumbnails.values())[0]
        for media in config.findall('./player/stream/media'):
            if media.get('type') == 'posterImage':
                thumbnail = extract_thumbnail(media)
            elif media.get('type') == 'video':
                duration = int(media.get('duration'))
                title = media.find('title').text
                description = media.find('description').text
                # It seems assets always go from lower to better quality, so no need to sort
                formats = [{
                    'url': x.find('default/streamerURI').text + '/',
                    'play_path': x.find('default/url').text,
                    'rtmp_live': False,
                    'ext': 'mp4',
                    'format_id': x.get('quality'),
                } for x in media.findall('assets/asset')]
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'formats': formats,
        }
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -1,34 +1,68 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 class PBSIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
+    _VALID_URL = r'''(?x)https?://
        (?:
            # Direct video URL
            video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
            # Article with embedded player
           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
           # Player
           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
        )
    '''
    _TEST = {
-        u'url': u'http://video.pbs.org/video/2365006249/',
+        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-        u'file': u'2365006249.mp4',
+        'md5': 'ce1888486f0908d555a8093cac9a7362',
-        u'md5': 'ce1888486f0908d555a8093cac9a7362',
+        'info_dict': {
-        u'info_dict': {
+            'id': '2365006249',
-            u'title': u'A More Perfect Union',
+            'ext': 'mp4',
-            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
+            'title': 'A More Perfect Union',
-            u'duration': 3190,
+            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
            'duration': 3190,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+
        presumptive_id = mobj.group('presumptive_id')
        display_id = presumptive_id
        if presumptive_id:
            webpage = self._download_webpage(url, display_id)
            url = self._search_regex(
                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                webpage, 'player URL')
            mobj = re.match(self._VALID_URL, url)
        player_id = mobj.group('player_id')
        if not display_id:
            display_id = player_id
        if player_id:
            player_page = self._download_webpage(
                url, display_id, note='Downloading player page',
                errnote='Could not download player page')
            video_id = self._search_regex(
                r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
        else:
            video_id = mobj.group('id')
            display_id = video_id
        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
-        info_page = self._download_webpage(info_url, video_id)
+        info = self._download_json(info_url, display_id)
-        info =json.loads(info_page)
+
-        return {'id': video_id,
+        return {
-                'title': info['title'],
+            'id': video_id,
-                'url': info['alternate_encoding']['url'],
+            'title': info['title'],
-                'ext': 'mp4',
+            'url': info['alternate_encoding']['url'],
-                'description': info['program'].get('description'),
+            'ext': 'mp4',
-                'thumbnail': info.get('image_url'),
+            'description': info['program'].get('description'),
-                'duration': info.get('duration'),
+            'thumbnail': info.get('image_url'),
-                }
+            'duration': info.get('duration'),
        }
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 import json
@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
    _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
    _TEST = {
-        u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+        'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
-        u'file': u'25665706.mp4',
+        'info_dict': {
-        u'info_dict': {
+            'id': '25665706',
-            u'title': u'Managing Scale and Complexity',
+            'ext': 'mp4',
-            u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
+            'title': 'Managing Scale and Complexity',
            'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
        },
    }
@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
        webpage = self._download_webpage(url, page_title)
        slideshare_obj = self._search_regex(
            r'var slideshare_object =  ({.*?}); var user_info =',
-            webpage, u'slideshare object')
+            webpage, 'slideshare object')
        info = json.loads(slideshare_obj)
-        if info['slideshow']['type'] != u'video':
+        if info['slideshow']['type'] != 'video':
-            raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+            raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
        doc = info['doc']
        bucket = info['jsplayer']['video_bucket']
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        description = self._html_search_regex(
            r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
        return {
            '_type': 'video',
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
            'ext': ext,
            'url': video_url,
            'thumbnail': info['slideshow']['pin_image_url'],
-            'description': self._og_search_description(webpage),
+            'description': description,
        }
--- a/youtube_dl/extractor/statigram.py
+++ b/youtube_dl/extractor/statigram.py
@ -1,36 +1,38 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class StatigramIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+    _VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
    _TEST = {
-        u'url': u'http://statigr.am/p/522207370455279102_24101272',
+        'url': 'http://statigr.am/p/522207370455279102_24101272',
-        u'file': u'522207370455279102_24101272.mp4',
+        'md5': '6eb93b882a3ded7c378ee1d6884b1814',
-        u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
+        'info_dict': {
-        u'info_dict': {
+            'id': '522207370455279102_24101272',
-            u'uploader_id': u'aguynamedpatrick',
+            'ext': 'mp4',
-            u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
+            'uploader_id': 'aguynamedpatrick',
            'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        html_title = self._html_search_regex(
            r'<title>(.+?)</title>',
-            webpage, u'title')
+            webpage, 'title')
        title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
        uploader_id = self._html_search_regex(
-            r'@([^ ]+)', title, u'uploader name', fatal=False)
+            r'@([^ ]+)', title, 'uploader name', fatal=False)
        ext = 'mp4'
-        return [{
+        return {
-            'id':        video_id,
+            'id': video_id,
-            'url':       self._og_search_video_url(webpage),
+            'url': self._og_search_video_url(webpage),
-            'ext':       ext,
+            'title': title,
            'title':     title,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id': uploader_id
-        }]
+        }
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor):
    def _request_subtitle_url(self, sub_lang, url):
        """ makes the http request for the subtitle """
        try:
-            return self._download_subtitle_url(sub_lang, url)
+            sub = self._download_subtitle_url(sub_lang, url)
        except ExtractorError as err:
            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
            return
        if not sub:
            self._downloader.report_warning(u'Did not fetch video subtitles')
            return
        return sub
    def _get_available_subtitles(self, video_id, webpage):
        """
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@ -1,22 +1,23 @@
 #coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import (
+from ..utils import determine_ext
-    determine_ext,
+
 )
 class ThisAVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
    _TEST = {
-        u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
+        'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
-        u"file": u"47734.flv",
+        'md5': '0480f1ef3932d901f0e0e719f188f19b',
-        u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
+        'info_dict': {
-        u"info_dict": {
+            'id': '47734',
-            u"title": u"高樹マリア - Just fit",
+            'ext': 'flv',
-            u"uploader": u"dj7970",
+            'title': '高樹マリア - Just fit',
-            u"uploader_id": u"dj7970"
+            'uploader': 'dj7970',
            'uploader_id': 'dj7970'
        }
    }
@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
+        title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
        video_url = self._html_search_regex(
-            r"addVariable\('file','([^']+)'\);", webpage, u'video url')
+            r"addVariable\('file','([^']+)'\);", webpage, 'video url')
        uploader = self._html_search_regex(
            r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
-            webpage, u'uploader name', fatal=False)
+            webpage, 'uploader name', fatal=False)
        uploader_id = self._html_search_regex(
            r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
-            webpage, u'uploader id', fatal=False)
+            webpage, 'uploader id', fatal=False)
        ext = determine_ext(video_url)
        return {
            '_type':       'video',
            'id':          video_id,
            'url':         video_url,
            'uploader':    uploader,
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@ -1,4 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -9,25 +11,25 @@ from ..utils import (
 class TouTvIE(InfoExtractor):
-    IE_NAME = u'tou.tv'
+    IE_NAME = 'tou.tv'
    _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
    _TEST = {
-        u'url': u'http://www.tou.tv/30-vies/S04E41',
+        'url': 'http://www.tou.tv/30-vies/S04E41',
-        u'file': u'30-vies_S04E41.mp4',
+        'file': '30-vies_S04E41.mp4',
-        u'info_dict': {
+        'info_dict': {
-            u'title': u'30 vies Saison 4 / Épisode 41',
+            'title': '30 vies Saison 4 / Épisode 41',
-            u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
+            'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
-            u'age_limit': 8,
+            'age_limit': 8,
-            u'uploader': u'Groupe des Nouveaux Médias',
+            'uploader': 'Groupe des Nouveaux Médias',
-            u'duration': 1296,
+            'duration': 1296,
-            u'upload_date': u'20131118',
+            'upload_date': '20131118',
-            u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
+            'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
        },
-        u'params': {
+        'params': {
-            u'skip_download': True,  # Requires rtmpdump
+            'skip_download': True,  # Requires rtmpdump
        },
-        u'skip': 'Only available in Canada'
+        'skip': 'Only available in Canada'
    }
    def _real_extract(self, url):
@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        mediaId = self._search_regex(
-            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
+            r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
-        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
+        streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
        streams_doc = self._download_xml(
-            streams_url, video_id, note=u'Downloading stream list')
+            streams_url, video_id, note='Downloading stream list')
        video_url = next(n.text
                         for n in streams_doc.findall('.//choice/url')
-                         if u'//ad.doubleclick' not in n.text)
+                         if '//ad.doubleclick' not in n.text)
        if video_url.endswith('/Unavailable.flv'):
            raise ExtractorError(
-                u'Access to this video is blocked from outside of Canada',
+                'Access to this video is blocked from outside of Canada',
                expected=True)
        duration_str = self._html_search_meta(
-            'video:duration', webpage, u'duration')
+            'video:duration', webpage, 'duration')
        duration = int(duration_str) if duration_str else None
        upload_date_str = self._html_search_meta(
-            'video:release_date', webpage, u'upload date')
+            'video:release_date', webpage, 'upload date')
        upload_date = unified_strdate(upload_date_str) if upload_date_str else None
        return {
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dl/extractor/traileraddict.py
@ -6,6 +6,7 @@ from .common import InfoExtractor
 class TrailerAddictIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
    _TEST = {
        'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -11,7 +11,7 @@ from ..aes import (
 )
 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
    _TEST = {
        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
        u'file': u'229795.mp4',
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@ -1,3 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -10,45 +13,44 @@ from ..utils import (
 class Vbox7IE(InfoExtractor):
-    """Information Extractor for Vbox7"""
+    _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
    _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
    _TEST = {
-        u'url': u'http://vbox7.com/play:249bb972c2',
+        'url': 'http://vbox7.com/play:249bb972c2',
-        u'file': u'249bb972c2.flv',
+        'md5': '99f65c0c9ef9b682b97313e052734c3f',
-        u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '249bb972c2',
-            u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
+            'ext': 'flv',
-        }
+            'title': 'Смях! Чудо - чист за секунди - Скрита камера',
        },
    }
-    def _real_extract(self,url):
+    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
+        video_id = mobj.group('id')
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group(1)
        redirect_page, urlh = self._download_webpage_handle(url, video_id)
-        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
+        new_location = self._search_regex(r'window\.location = \'(.*)\';',
            redirect_page, 'redirect location')
        redirect_url = urlh.geturl() + new_location
-        webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
+        webpage = self._download_webpage(redirect_url, video_id,
            'Downloading redirect page')
        title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'title').split('/')[0].strip()
+            webpage, 'title').split('/')[0].strip()
        ext = "flv"
        info_url = "http://vbox7.com/play/magare.do"
-        data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
+        data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
        info_request = compat_urllib_request.Request(info_url, data)
        info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
+        info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
        if info_response is None:
-            raise ExtractorError(u'Unable to extract the media url')
+            raise ExtractorError('Unable to extract the media url')
        (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
-        return [{
+        return {
-            'id':        video_id,
+            'id': video_id,
-            'url':       final_url,
+            'url': final_url,
-            'ext':       ext,
+            'ext': 'flv',
-            'title':     title,
+            'title': title,
            'thumbnail': thumbnail_url,
-        }]
+        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -6,10 +6,10 @@ import re
 import itertools
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    clean_html,
    get_element_by_attribute,
    ExtractorError,
@ -19,7 +19,7 @@ from ..utils import (
 )
-class VimeoIE(InfoExtractor):
+class VimeoIE(SubtitlesInfoExtractor):
    """Information extractor for vimeo.com."""
    # _VALID_URL matches Vimeo URLs
@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
                'videopassword': 'youtube-dl',
            },
        },
        {
            'url': 'http://vimeo.com/76979871',
            'md5': '3363dd6ffebe3784d56f4132317fd446',
            'note': 'Video with subtitles',
            'info_dict': {
                'id': '76979871',
                'ext': 'mp4',
                'title': 'The New Vimeo Player (You Know, For Videos)',
                'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
                'upload_date': '20131015',
                'uploader_id': 'staff',
                'uploader': 'Vimeo Staff',
            }
        },
    ]
    def _login(self):
@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
        if len(formats) == 0:
            raise ExtractorError('No known codec found')
        subtitles = {}
        text_tracks = config['request'].get('text_tracks')
        if text_tracks:
            for tt in text_tracks:
                subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
        video_subtitles = self.extract_subtitles(video_id, subtitles)
        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(video_id, subtitles)
            return
        return {
-            'id':       video_id,
+            'id': video_id,
            'uploader': video_uploader,
            'uploader_id': video_uploader_id,
-            'upload_date':  video_upload_date,
+            'upload_date': video_upload_date,
-            'title':    video_title,
+            'title': video_title,
-            'thumbnail':    video_thumbnail,
+            'thumbnail': video_thumbnail,
-            'description':  video_description,
+            'description': video_description,
            'formats': formats,
            'webpage_url': url,
            'view_count': view_count,
            'like_count': like_count,
            'comment_count': comment_count,
            'subtitles': video_subtitles,
        }
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -1,18 +1,21 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class VineIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
    _TEST = {
-        u'url': u'https://vine.co/v/b9KOOWX7HUx',
+        'url': 'https://vine.co/v/b9KOOWX7HUx',
-        u'file': u'b9KOOWX7HUx.mp4',
+        'md5': '2f36fed6235b16da96ce9b4dc890940d',
-        u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'b9KOOWX7HUx',
-            u"uploader": u"Jack Dorsey", 
+            'ext': 'mp4',
-            u"title": u"Chicken."
+            'uploader': 'Jack Dorsey',
-        }
+            'title': 'Chicken.',
        },
    }
    def _real_extract(self, url):
@ -24,17 +27,17 @@ class VineIE(InfoExtractor):
        self.report_extraction(video_id)
-        video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
+        video_url = self._html_search_meta('twitter:player:stream', webpage,
-            webpage, u'video URL')
+            'video URL')
        uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
-            webpage, u'uploader', fatal=False, flags=re.DOTALL)
+            webpage, 'uploader', fatal=False, flags=re.DOTALL)
-        return [{
+        return {
-            'id':        video_id,
+            'id': video_id,
-            'url':       video_url,
+            'url': video_url,
-            'ext':       'mp4',
+            'ext': 'mp4',
-            'title':     self._og_search_title(webpage),
+            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader':  uploader,
+            'uploader': uploader,
-        }]
+        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -34,6 +34,7 @@ from ..utils import (
    unified_strdate,
    orderedSet,
    write_json_file,
    uppercase_escape,
 )
 class YoutubeBaseInfoExtractor(InfoExtractor):
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                            (?:www\.)?deturl\.com/www\.youtube\.com/|
-                            (?:www\.)?pwnyoutube\.com|
+                            (?:www\.)?pwnyoutube\.com/|
                            tube\.majestyc\.net/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
@ -502,7 +503,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                return a % b
            m = re.match(
-                r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
            if m:
                fname = m.group('func')
                if fname not in functions:
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            self._downloader.report_warning(err_msg)
            return {}
-    def _extract_id(self, url):
+    @classmethod
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
+    def extract_id(cls, url):
        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group(2)
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
-        video_id = self._extract_id(url)
+        video_id = self.extract_id(url)
        # Get video webpage
        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    IE_DESC = u'YouTube.com playlists'
-    _VALID_URL = r"""(?:
+    _VALID_URL = r"""(?x)(?:
                        (?:https?://)?
                        (?:\w+\.)?
                        youtube\.com/
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                           \? (?:.*?&)*? (?:p|a|list)=
                        |  p/
                        )
-                        ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
+                        (
                            (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
                            # Top tracks, they can also include dots 
                            |(?:MC)[\w\.]*
                        )
                        .*
                     |
                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'
    @classmethod
    def suitable(cls, url):
        """Receives a URL and returns True if suitable for this IE."""
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
    def _real_initialize(self):
        self._login()
@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    def _real_extract(self, url):
        # Extract playlist id
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
+        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        playlist_id = mobj.group(1) or mobj.group(2)
@ -1590,10 +1591,9 @@ class YoutubeChannelIE(InfoExtractor):
            # Download all channel pages using the json-based channel_ajax query
            for pagenum in itertools.count(1):
                url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
+                page = self._download_json(
-                                              u'Downloading page #%s' % pagenum)
+                    url, channel_id, note=u'Downloading page #%s' % pagenum,
-    
+                    transform_source=uppercase_escape)
                page = json.loads(page)
                ids_in_page = self.extract_videos_from_page(page['content_html'])
                video_ids.extend(ids_in_page)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
    https_request = http_request
    https_response = http_response
 def unified_strdate(date_str):
    """Return a string with the date in the format YYYYMMDD"""
    upload_date = None
    #Replace commas
-    date_str = date_str.replace(',',' ')
+    date_str = date_str.replace(',', ' ')
    # %z (UTC offset) is only supported in python>=3.2
-    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
    format_expressions = [
        '%d %B %Y',
        '%B %d %Y',
@ -771,11 +772,12 @@ def unified_strdate(date_str):
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
        '%Y-%m-%dT%H:%M',
    ]
    for expression in format_expressions:
        try:
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
-        except:
+        except ValueError:
            pass
    if upload_date is None:
        timetuple = email.utils.parsedate_tz(date_str)
@ -1212,3 +1214,9 @@ class PagedList(object):
            if end == nextfirstid:
                break
        return res
 def uppercase_escape(s):
    return re.sub(
        r'\\U([0-9a-fA-F]{8})',
        lambda m: compat_chr(int(m.group(1), base=16)), s)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.03'
+__version__ = '2014.02.10'
Author	SHA1	Message	Date
Philipp Hagemeister	2e20bba708	release 2014.02.10	2014-02-10 02:01:11 +01:00
Filippo Valsorda	e70dc1d14b	[youtube] Correct a minor regex typo	2014-02-10 01:30:47 +01:00
Philipp Hagemeister	026fcc0495	Fix #2355 (date parsing with dashes)	2014-02-09 18:09:57 +01:00
Philipp Hagemeister	81c2f20b53	[youtube] Correct invalid JSON (Fixes #2353 )	2014-02-09 17:56:10 +01:00
Jaime Marquínez Ferrándiz	1afe753462	[slideshare] Fix description extraction and modernize The ‘og:description’ property doesn’t contain the full description	2014-02-09 14:23:19 +01:00
Jaime Marquínez Ferrándiz	524c2c716a	[bloomberg] Fix extraction of ooyala embed code	2014-02-09 14:11:45 +01:00
Sergey M.	b542d4bbd7	[kontrtube] Add support for kontrtube.ru (Closes #2354 )	2014-02-09 19:53:11 +07:00
Sergey M.	17968e444c	[bbc.co.uk] Fix TV episode test	2014-02-09 04:04:21 +07:00
Sergey M	2e3fd9ec2f	[bbc.co.uk] Improve overall extractor structure, add subtitles support (#2184) Everything from http://www.bbc.co.uk/iplayer/ should be downloadable now.	2014-02-09 04:00:49 +07:00
Philipp Hagemeister	d6a283b025	release 2014.02.08.2	2014-02-08 19:20:35 +01:00
Philipp Hagemeister	9766538124	[jadorecettepub] Add extractor (Fixes #2148 )	2014-02-08 19:20:23 +01:00
Philipp Hagemeister	98dbee8681	[jeuxvideo] Modernize	2014-02-08 18:43:12 +01:00
Philipp Hagemeister	e421491b3b	release 2014.02.08.1	2014-02-08 18:38:05 +01:00
Philipp Hagemeister	6828d37c41	Merge branch 'master' of github.com:rg3/youtube-dl	2014-02-08 18:37:53 +01:00
Philipp Hagemeister	bf5f610099	[pbs] Add support for viralplayer links (Fixes #2350 )	2014-02-08 18:37:33 +01:00
Sergey M.	8b7f73404a	[bbc.co.uk] Fix regex	2014-02-08 22:55:43 +07:00
Sergey M	85cacb2f51	[bbc.co.uk] Add one more link format	2014-02-08 22:54:05 +07:00
Philipp Hagemeister	b3fa3917e2	release 2014.02.08	2014-02-08 16:25:03 +01:00
Sergey M.	082c6c867a	[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184 )	2014-02-08 21:55:28 +07:00
Filippo Valsorda	03fcf1ab57	Merge pull request #2342 from MikeCol/tube8 [Tube8] Extended valid urls schema	2014-02-08 04:00:50 +01:00
MikeCol	3b00dea5eb	Extended valid urls schema	2014-02-08 00:09:26 +01:00
Philipp Hagemeister	8bc6c8e3c0	[chilloutzone] Add additional tests (#2340 )	2014-02-07 15:42:31 +01:00
Sergey M.	79bc27b53a	[channel9] Simplify	2014-02-07 19:41:18 +07:00
Sergey M.	84dd703199	[ivi] Simplify	2014-02-07 19:36:50 +07:00
Sergey M.	c6fdba23a6	[nfb] Add workaround for python2.6	2014-02-07 19:23:53 +07:00
Philipp Hagemeister	b19fe521a9	Merge pull request #2340 from Fnordlab/master [chilloutzone] Fixes refactoring bug	2014-02-07 12:46:56 +01:00
Andreas Schmitz	c1e672d121	[chilloutzone] fixes bug with youtube extraction the id used for extracting the video from youtube is stored in native_video_id not video_id. This id is only used on chilloutzone.net	2014-02-07 12:29:58 +01:00
Andreas Schmitz	f4371f4784	Merge remote-tracking branch 'upstream/master'	2014-02-07 12:20:58 +01:00
Philipp Hagemeister	d914d9d187	[chilloutzone] Add import	2014-02-07 12:03:19 +01:00
Philipp Hagemeister	845d14d377	credit @Fnordlab for chilloutzone	2014-02-07 12:00:58 +01:00
Philipp Hagemeister	4a9540b6d2	[chilloutzone] Simplify (#2338 )	2014-02-07 12:00:25 +01:00
Philipp Hagemeister	9f31be7000	Merge remote-tracking branch 'Fnordlab/chilloutzone'	2014-02-07 11:50:26 +01:00
Philipp Hagemeister	41fa1b627d	release 2014.02.06.3	2014-02-07 01:41:01 +01:00
Andreas Schmitz	c0c4e66b29	Merge branch 'chilloutzone'	2014-02-06 21:33:16 +01:00
Andreas Schmitz	cd8662de22	[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox	2014-02-06 21:31:04 +01:00
Sergey M.	3587159614	[nfb] Add encode POST data	2014-02-07 02:13:04 +07:00
Jaime Marquínez Ferrándiz	d67cc9fa7c	[youtube:playlist] Recognize ‘top tracks’ urls (closes #2332 ) The list parameter starts with ‘MC’ and can have more characters after it, including dots	2014-02-06 19:46:26 +01:00
Sergey M.	bf3a2fe923	[elpais] Fix typo	2014-02-07 00:38:29 +07:00
Sergey M.	e9ea0bf123	[ndr] Add support for ndr.de (Closes #2325 )	2014-02-07 00:35:26 +07:00
Philipp Hagemeister	63424b6233	release 2014.02.06.2	2014-02-06 15:45:47 +01:00
Sergey M.	0bf35c5cf5	[nfb] Add support for onf.ca URLs	2014-02-06 21:41:31 +07:00
Sergey M.	95c29381eb	[mooshare] Fix bogus video page URL	2014-02-06 21:26:12 +07:00
Sergey M.	94c4abce7f	[nfb] Add support for nfb.ca (Closes #2069 )	2014-02-06 21:19:13 +07:00
Andreas Schmitz	f2dffe55f8	Merge branch 'chilloutzone'	2014-02-06 11:49:38 +01:00
Andreas Schmitz	46a073bfac	[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email.	2014-02-06 11:44:44 +01:00
Philipp Hagemeister	df872ec4e7	release 2014.02.06.1	2014-02-06 11:30:00 +01:00
Philipp Hagemeister	5de90176d9	[elpais] Add extractor	2014-02-06 11:29:46 +01:00
Philipp Hagemeister	dcf3eec47a	[test_download] Skip over BadStatusLine errors An error like https://travis-ci.org/rg3/youtube-dl/jobs/18317799#L449 is almost certainly the server's fault.	2014-02-06 04:19:57 +01:00
Philipp Hagemeister	e9e4f30d26	[pbs] Remove unused import	2014-02-06 04:19:43 +01:00
Philipp Hagemeister	83cebd73d4	[collegehumor] We only get shortened descriptions now	2014-02-06 04:16:22 +01:00
Philipp Hagemeister	1df4229bd7	[mtv/gametrailers] Change order of title preference It looks like the plain title is better again	2014-02-06 04:15:12 +01:00
Philipp Hagemeister	3c995527e9	release 2014.02.06	2014-02-06 03:30:30 +01:00
Philipp Hagemeister	7c62b568a2	Merge branch 'master' of github.com:rg3/youtube-dl	2014-02-06 03:30:18 +01:00
Philipp Hagemeister	ccf9114e84	[googlesearch] Fix start, and skip playlists (Fixes #2329 )	2014-02-06 03:29:10 +01:00
Jaime Marquínez Ferrándiz	d8061908bb	[ina] Improve _VALID_URL regex (fixes #2328 ) Accept all letters in upper case and don’t require anything after the id	2014-02-05 23:01:24 +01:00
Philipp Hagemeister	211e17dd43	release 2014.02.05	2014-02-05 21:23:28 +01:00
Philipp Hagemeister	6cb38a9994	[firstpost] Add extractor (Fixes #2324 )	2014-02-05 21:23:21 +01:00
Sergey M.	fa7df757a7	[thisav] Simplify and use unicode literals	2014-02-05 19:13:06 +07:00
Sergey M.	8c82077619	[toutv] Use unicode literals	2014-02-05 19:02:03 +07:00
Sergey M.	e5d1f9e50a	[m6] Add support for m6.fr (Closes #2313 )	2014-02-05 17:38:17 +07:00
Philipp Hagemeister	7ee50ae7b5	release 2014.02.04.1	2014-02-04 23:26:55 +01:00
Jaime Marquínez Ferrándiz	de563c9da0	[ina] Simplify Download the feed with ‘_download_xml’ to make the extraction easier	2014-02-04 23:15:36 +01:00
Jaime Marquínez Ferrándiz	50451f2a18	[vbox7] simplify	2014-02-04 23:02:53 +01:00
Jaime Marquínez Ferrándiz	9bc70948e1	[statigram] Simplify	2014-02-04 22:52:27 +01:00
Jaime Marquínez Ferrándiz	5dc733f071	[vine] Simplify	2014-02-04 22:02:15 +01:00
Jaime Marquínez Ferrándiz	bc4850908c	[test/youtube_signature] Add a test with the last player To verify it correctly handles function with “$” in their names.	2014-02-04 21:56:17 +01:00
Jaime Marquínez Ferrándiz	20650c8654	[youtube] signatures: Recognize javascript functions that contain “$” (fixes #2304 )	2014-02-04 21:38:50 +01:00
Philipp Hagemeister	56dced2670	remove accidentally duplicated test file	2014-02-04 16:35:22 +01:00
Philipp Hagemeister	eef726c04b	release 2014.02.04	2014-02-04 16:33:19 +01:00
Philipp Hagemeister	acf1555d76	Merge remote-tracking branch 'origin/master'	2014-02-04 16:33:06 +01:00
Philipp Hagemeister	22e7f1a6ec	[pbs] Add support for article pages (Fixes #870 )	2014-02-04 16:31:00 +01:00
Sergey M.	3c49325658	[lifenews] Fix video URL extraction (Closes #2302 )	2014-02-04 21:31:25 +07:00
Sergey M	bb1cd2bea1	[mooshare] Add support for mooshare.biz (Closes #2149 )	2014-02-04 20:53:46 +07:00
Philipp Hagemeister	fdf1f8d4ce	[collegehumor] Adapt test to changed video description	2014-02-04 10:37:01 +01:00
Philipp Hagemeister	117c8c6b97	[bliptv] Remove unused imports	2014-02-04 10:25:19 +01:00
Philipp Hagemeister	5cef4ff09b	[subtittles] Check that the result is not empty	2014-02-04 10:24:17 +01:00
Philipp Hagemeister	91264ce572	[iprima] Use centralized format sorting	2014-02-04 10:24:00 +01:00
Philipp Hagemeister	c79ef8e1ae	Merge remote-tracking branch 'pulpe/_iprima'	2014-02-04 10:21:42 +01:00
Philipp Hagemeister	58d915df51	[traileraddict] mark as broken traileraddict has changed their URL encoding scheme. I'm working on restoring support, but that may take some time.	2014-02-04 10:13:52 +01:00
pulpe	7881a64499	[iprima] Add support for play.iprima.cz	2014-02-04 07:45:41 +01:00
Philipp Hagemeister	90159f5561	release 2014.02.03.1	2014-02-03 15:20:41 +01:00
Philipp Hagemeister	99877772d0	[generic] Add support for multiple brightcove URLs (Fixes #2283 )	2014-02-03 15:19:40 +01:00
Sergey M.	b0268cb6ce	[vimeo] Remove superfluous whitespace	2014-02-03 20:24:11 +07:00
Sergey M.	4edff4cfa8	[vimeo] Add subtitle tests	2014-02-03 20:19:23 +07:00
Sergey M.	1eac553e7e	[vimeo] Add support for subtitles (Closes #2239 )	2014-02-03 20:02:58 +07:00
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.03'`	`__version__ = '2014.02.10'`