release 2014.02.10

[youtube] Correct a minor regex typo
Fix #2355 (date parsing with dashes)
2014-02-10 02:01:11 +01:00 · 2014-02-10 01:30:47 +01:00 · 2014-02-09 18:09:57 +01:00 · 2014-02-09 17:56:10 +01:00 · 2014-02-09 14:23:19 +01:00 · 2014-02-09 14:11:45 +01:00
25 changed files with 734 additions and 70 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python

+from __future__ import unicode_literals
+
 # Allow direct execution
 import os
 import sys
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    JustinTVIE,
+    PBSIE,
    YoutubeIE,
 )

@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):

    def test_youtube_playlist_matching(self):
        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
-        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
-        assertPlaylist(u'PL63F0C78739B09958')
-        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
-        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
+        assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist('PL63F0C78739B09958')
+        assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
+        # Top tracks
+        assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')

    def test_youtube_matching(self):
-        self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
-        self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
+        self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))

    def test_youtube_extract(self):
-        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')

    def test_facebook_matching(self):
-        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))

    def test_no_duplicates(self):
        ies = gen_extractors()
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
        self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])

+    def test_pbs(self):
+        # https://github.com/rg3/youtube-dl/issues/2350
+        self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(unified_strdate('8/7/2009'), '20090708')
        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
+        self.assertEqual(unified_strdate('1968-12-10'), '19681210')

    def test_find_xpath_attr(self):
        testxml = u'''<root>
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], 'ytdl test PL')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
        self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])

    def test_youtube_playlist_noplaylist(self):
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
        self.assertEqual(result['_type'], 'url')
-        self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
+        self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')

    def test_issue_673(self):
        dl = FakeYDL()
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
        dl = FakeYDL()
        ie = YoutubePlaylistIE(dl)
        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
+        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
        self.assertFalse('pElCt5oNDuI' in ytie_results)
        self.assertFalse('KdPEApIVdWM' in ytie_results)
        
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
        # TODO find a > 100 (paginating?) videos course
        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        entries = result['entries']
-        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
+        self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
        self.assertEqual(len(entries), 25)
-        self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
+        self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')

    def test_youtube_channel(self):
        dl = FakeYDL()
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
        original_video = entries[0]
        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')

+    def test_youtube_toptracks(self):
+        dl = FakeYDL()
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
+        entries = result['entries']
+        self.assertEqual(len(entries), 100)
+
    def test_youtube_toplist(self):
        dl = FakeYDL()
        ie = YoutubeTopListIE(dl)
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -41,6 +41,7 @@ __authors__  = (
    'Chris Gahan',
    'Saimadhav Heblikar',
    'Mike Col',
+    'Andreas Schmitz',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
        url = info_dict['url']
        player_url = info_dict.get('player_url', None)
        page_url = info_dict.get('page_url', None)
+        app = info_dict.get('app', None)
        play_path = info_dict.get('play_path', None)
        tc_url = info_dict.get('tc_url', None)
+        flash_version = info_dict.get('flash_version', None)
        live = info_dict.get('rtmp_live', False)
        conn = info_dict.get('rtmp_conn', None)

@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
            basic_args += ['--pageUrl', page_url]
+        if app is not None:
+            basic_args += ['--app', app]
        if play_path is not None:
            basic_args += ['--playpath', play_path]
        if tc_url is not None:
            basic_args += ['--tcUrl', url]
        if test:
            basic_args += ['--stop', '1']
+        if flash_version is not None:
+            basic_args += ['--flashVer', flash_version]
        if live:
            basic_args += ['--live']
        if conn:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -15,6 +15,7 @@ from .arte import (
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
+from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .channel9 import Channel9IE
+from .chilloutzone import ChilloutzoneIE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
@ -103,6 +105,7 @@ from .ivi import (
    IviIE,
    IviCompilationIE
 )
+from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
@ -112,6 +115,7 @@ from .keezmovies import KeezMoviesIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
+from .kontrtube import KontrTubeIE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
@ -142,8 +146,10 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
+from .ndr import NDRIE
 from .ndtv import NDTVIE
 from .newgrounds import NewgroundsIE
+from .nfb import NFBIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -0,0 +1,217 @@
+from __future__ import unicode_literals
+
+import re
+
+from .subtitles import SubtitlesInfoExtractor
+from ..utils import ExtractorError
+
+
+class BBCCoUkIE(SubtitlesInfoExtractor):
+    IE_NAME = 'bbc.co.uk'
+    IE_DESC = 'BBC iPlayer'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
+
+    _TESTS = [
+        {
+            'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
+            'info_dict': {
+                'id': 'p01q7wz4',
+                'ext': 'flv',
+                'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
+                'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
+                'duration': 1936,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        },
+        {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
+            'info_dict': {
+                'id': 'b00yng1d',
+                'ext': 'flv',
+                'title': 'The Man in Black: Series 3: The Printed Name',
+                'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
+                'duration': 1800,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        },
+        {
+            'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
+            'info_dict': {
+                'id': 'b00yng1d',
+                'ext': 'flv',
+                'title': 'The Voice UK: Series 3: Blind Auditions 5',
+                'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
+                'duration': 5100,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+        }
+    ]
+
+    def _extract_asx_playlist(self, connection, programme_id):
+        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
+        return [ref.get('href') for ref in asx.findall('./Entry/ref')]
+
+    def _extract_connection(self, connection, programme_id):
+        formats = []
+        protocol = connection.get('protocol')
+        supplier = connection.get('supplier')
+        if protocol == 'http':
+            href = connection.get('href')
+            # ASX playlist
+            if supplier == 'asx':
+                for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
+                    formats.append({
+                        'url': ref,
+                        'format_id': 'ref%s_%s' % (i, supplier),
+                    })
+            # Direct link
+            else:
+                formats.append({
+                    'url': href,
+                    'format_id': supplier,
+                })
+        elif protocol == 'rtmp':
+            application = connection.get('application', 'ondemand')
+            auth_string = connection.get('authString')
+            identifier = connection.get('identifier')
+            server = connection.get('server')
+            formats.append({
+                'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
+                'play_path': identifier,
+                'app': '%s?%s' % (application, auth_string),
+                'page_url': 'http://www.bbc.co.uk',
+                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
+                'rtmp_live': False,
+                'ext': 'flv',
+                'format_id': supplier,
+            })
+        return formats
+
+    def _extract_items(self, playlist):
+        return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
+
+    def _extract_medias(self, media_selection):
+        return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
+
+    def _extract_connections(self, media):
+        return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
+
+    def _extract_video(self, media, programme_id):
+        formats = []
+        vbr = int(media.get('bitrate'))
+        vcodec = media.get('encoding')
+        service = media.get('service')
+        width = int(media.get('width'))
+        height = int(media.get('height'))
+        file_size = int(media.get('media_file_size'))
+        for connection in self._extract_connections(media):
+            conn_formats = self._extract_connection(connection, programme_id)
+            for format in conn_formats:
+                format.update({
+                    'format_id': '%s_%s' % (service, format['format_id']),
+                    'width': width,
+                    'height': height,
+                    'vbr': vbr,
+                    'vcodec': vcodec,
+                    'filesize': file_size,
+                })
+            formats.extend(conn_formats)
+        return formats
+
+    def _extract_audio(self, media, programme_id):
+        formats = []
+        abr = int(media.get('bitrate'))
+        acodec = media.get('encoding')
+        service = media.get('service')
+        for connection in self._extract_connections(media):
+            conn_formats = self._extract_connection(connection, programme_id)
+            for format in conn_formats:
+                format.update({
+                    'format_id': '%s_%s' % (service, format['format_id']),
+                    'abr': abr,
+                    'acodec': acodec,
+                })
+            formats.extend(conn_formats)
+        return formats
+
+    def _extract_captions(self, media, programme_id):
+        subtitles = {}
+        for connection in self._extract_connections(media):
+            captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
+            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
+            srt = ''
+            for pos, p in enumerate(ps):
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
+                                                          p.text.strip() if p.text is not None else '')
+            subtitles[lang] = srt
+        return subtitles
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        group_id = mobj.group('id')
+
+        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
+            'Downloading playlist XML')
+
+        no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+        if no_items is not None:
+            reason = no_items.get('reason')
+            if reason == 'preAvailability':
+                msg = 'Episode %s is not yet available' % group_id
+            elif reason == 'postAvailability':
+                msg = 'Episode %s is no longer available' % group_id
+            else:
+                msg = 'Episode %s is not available: %s' % (group_id, reason)
+            raise ExtractorError(msg, expected=True)
+
+        formats = []
+        subtitles = None
+
+        for item in self._extract_items(playlist):
+            kind = item.get('kind')
+            if kind != 'programme' and kind != 'radioProgramme':
+                continue
+            title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+            description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+
+            programme_id = item.get('identifier')
+            duration = int(item.get('duration'))
+
+            media_selection = self._download_xml(
+                'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'  % programme_id,
+                programme_id, 'Downloading media selection XML')
+
+            for media in self._extract_medias(media_selection):
+                kind = media.get('kind')
+                if kind == 'audio':
+                    formats.extend(self._extract_audio(media, programme_id))
+                elif kind == 'video':
+                    formats.extend(self._extract_video(media, programme_id))
+                elif kind == 'captions':
+                    subtitles = self._extract_captions(media, programme_id)
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(programme_id, subtitles)
+            return
+
+        self._sort_formats(formats)
+
+        return {
+            'id': programme_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
-        ooyala_url = self._twitter_search_player(webpage)
-        return self.url_result(ooyala_url, OoyalaIE.ie_key())
+        embed_code = self._search_regex(
+            r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
+            'embed code')
+        return OoyalaIE._build_url_result(embed_code)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
    '''
    IE_DESC = 'Channel 9'
    IE_NAME = 'channel9'
-    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'

    _TESTS = [
        {
            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
            'info_dict': {
+                'id': 'Events/TechEd/Australia/2013/KOS002',
+                'ext': 'mp4',
                'title': 'Developer Kick-Off Session: Stuff We Love',
                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                'duration': 4576,
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
        },
        {
            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
            'info_dict': {
+                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+                'ext': 'mp4',
                'title': 'Self-service BI with Power BI - nuclear testing',
                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                'duration': 1540,
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+        'md5': 'a76f3457e813ea0037e5244f509e66d1',
+        'info_dict': {
+            'id': 'enemene-meck-alle-katzen-weg',
+            'ext': 'mp4',
+            'title': 'Enemene Meck - Alle Katzen weg',
+            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+        },
+    }, {
+        'note': 'Video hosted at YouTube',
+        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+        'info_dict': {
+            'id': '1YVQaAgHyRU',
+            'ext': 'mp4',
+            'title': '16 Photos Taken 1 Second Before Disaster',
+            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+            'uploader': 'BuzzFeedVideo',
+            'uploader_id': 'BuzzFeedVideo',
+            'upload_date': '20131105',
+        },
+    }, {
+        'note': 'Video hosted at Vimeo',
+        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+        'info_dict': {
+            'id': '85523671',
+            'ext': 'mp4',
+            'title': 'The Sunday Times - Icons',
+            'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
+            'uploader': 'Us',
+            'uploader_id': 'usfilms',
+            'upload_date': '20140131'
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        base64_video_info = self._html_search_regex(
+            r'var cozVidData = "(.+?)";', webpage, 'video data')
+        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+        video_info_dict = json.loads(decoded_video_info)
+
+        # get video information from dict
+        video_url = video_info_dict['mediaUrl']
+        description = clean_html(video_info_dict.get('description'))
+        title = video_info_dict['title']
+        native_platform = video_info_dict['nativePlatform']
+        native_video_id = video_info_dict['nativeVideoId']
+        source_priority = video_info_dict['sourcePriority']
+
+        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+        if native_platform is None:
+            youtube_url = self._html_search_regex(
+                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+                webpage, 'fallback video URL', default=None)
+            if youtube_url is not None:
+                return self.url_result(youtube_url, ie='Youtube')
+
+        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+        # the own CDN
+        if source_priority == 'native':
+            if native_platform == 'youtube':
+                return self.url_result(native_video_id, ie='Youtube')
+            if native_platform == 'vimeo':
+                return self.url_result(
+                    'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+        if not video_url:
+            raise ExtractorError('No video found')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -271,8 +271,11 @@ class InfoExtractor(object):

    def _download_json(self, url_or_request, video_id,
                       note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata'):
+                       errnote=u'Unable to download JSON metadata',
+                       transform_source=None):
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        if transform_source:
+            json_string = transform_source(json_string)
        try:
            return json.loads(json_string)
        except ValueError as ve:
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@ -9,7 +9,7 @@ from ..utils import unified_strdate

 class ElPaisIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
-    IE_DESCR = 'El País'
+    IE_DESC = 'El País'

    _TEST = {
        'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@ -14,15 +14,16 @@ from ..utils import (
 class IviIE(InfoExtractor):
    IE_DESC = 'ivi.ru'
    IE_NAME = 'ivi'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'

    _TESTS = [
        # Single movie
        {
            'url': 'http://www.ivi.ru/watch/53141',
-            'file': '53141.mp4',
            'md5': '6ff5be2254e796ed346251d117196cf4',
            'info_dict': {
+                'id': '53141',
+                'ext': 'mp4',
                'title': 'Иван Васильевич меняет профессию',
                'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
                'duration': 5498,
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
        # Serial's serie
        {
            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'file': '74791.mp4',
            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
            'info_dict': {
+                'id': '74791',
+                'ext': 'mp4',
                'title': 'Дежурный ангел - 1 серия',
                'duration': 2490,
                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
 class IviCompilationIE(InfoExtractor):
    IE_DESC = 'ivi.ru compilations'
    IE_NAME = 'ivi:compilation'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'

    def _extract_entries(self, html, compilation_id):
        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
--- a/youtube_dl/extractor/jadorecettepub.py
+++ b/youtube_dl/extractor/jadorecettepub.py
@ -0,0 +1,49 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class JadoreCettePubIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
+
+    _TEST = {
+        'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
+        'md5': '401286a06067c70b44076044b66515de',
+        'info_dict': {
+            'id': 'jLMja3tr7a4',
+            'ext': 'mp4',
+            'title': 'La pire utilisation de Star Wars',
+            'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon.  Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
+            webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
+            fatal=False)
+        real_url = self._search_regex(
+            r'\[/postlink\](.*)endofvid', webpage, 'video URL')
+        video_id = YoutubeIE.extract_id(real_url)
+
+        return {
+            '_type': 'url_transparent',
+            'url': real_url,
+            'id': video_id,
+            'title': title,
+            'description': description,
+        }
+
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@ -1,5 +1,7 @@
 # coding: utf-8

+from __future__ import unicode_literals
+
 import json
 import re

@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'

    _TEST = {
-        u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
-        u'file': u'5182.mp4',
-        u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
-        u'info_dict': {
-            u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
-            u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+        'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
+        'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
+        'info_dict': {
+            'id': '5182',
+            'ext': 'mp4',
+            'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
+            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
        },
    }

@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
        webpage = self._download_webpage(url, title)
        xml_link = self._html_search_regex(
            r'<param name="flashvars" value="config=(.*?)" />',
-            webpage, u'config URL')
+            webpage, 'config URL')
        
        video_id = self._search_regex(
            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
-            xml_link, u'video ID')
+            xml_link, 'video ID')

        config = self._download_xml(
-            xml_link, title, u'Downloading XML config')
+            xml_link, title, 'Downloading XML config')
        info_json = config.find('format.json').text
        info = json.loads(info_json)['versions'][0]
        
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@ -0,0 +1,66 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class KontrTubeIE(InfoExtractor):
+    IE_NAME = 'kontrtube'
+    IE_DESC = 'KontrTube.ru - Труба зовёт'
+    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
+
+    _TEST = {
+        'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
+        'md5': '975a991a4926c9a85f383a736a2e6b80',
+        'info_dict': {
+            'id': '2678',
+            'ext': 'mp4',
+            'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
+            'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
+            'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
+            'duration': 270,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id, 'Downloading page')
+
+        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
+        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+        title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
+            'video title')
+        description = self._html_search_meta('description', webpage, 'video description')
+
+        mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
+            webpage)
+        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+
+        view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
+            'view count', fatal=False)
+        view_count = int(view_count) if view_count is not None else None
+
+        comment_count = None
+        comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
+            fatal=False)
+        if comment_str.startswith('комментариев нет'):
+            comment_count = 0
+        else:
+            mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
+            if mobj:
+                comment_count = int(mobj.group('total'))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+        }
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor):
        }

        request = compat_urllib_request.Request(
-            'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
+            'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

        self.to_screen('%s: Waiting for timeout' % video_id)
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
-        }
+        }
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@ -0,0 +1,89 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class NDRIE(InfoExtractor):
+    IE_NAME = 'ndr'
+    IE_DESC = 'NDR.de - Mediathek'
+    _VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
+
+    _TESTS = [
+        # video
+        {
+            'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
+            'md5': '20eba151ff165f386643dad9c1da08f7',
+            'info_dict': {
+                'id': '19925',
+                'ext': 'mp4',
+                'title': 'Hallo Niedersachsen  ',
+                'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
+                'duration': 1722,
+            },
+        },
+        # audio
+        {
+            'url': 'http://www.ndr.de/903/audio191719.html',
+            'md5': '41ed601768534dd18a9ae34d84798129',
+            'info_dict': {
+                'id': '191719',
+                'ext': 'mp3',
+                'title': '"Es war schockierend"',
+                'description': 'md5:ed7ff8364793545021a6355b97e95f10',
+                'duration': 112,
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id, 'Downloading page')
+
+        title = self._og_search_title(page)
+        description = self._og_search_description(page)
+
+        mobj = re.search(
+            r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
+            page)
+        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
+
+        formats = []
+
+        mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
+        if mp3_url:
+            formats.append({
+                'url': mp3_url.group('audio'),
+                'format_id': 'mp3',
+            })
+
+        thumbnail = None
+
+        video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+        if video_url:
+            thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
+                page, 'thumbnail', fatal=False)
+            if thumbnail:
+                thumbnail = 'http://www.ndr.de' + thumbnail
+            for format_id in ['lo', 'hi', 'hq']:
+                formats.append({
+                    'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
+                    'format_id': format_id,
+                })
+
+        if not formats:
+            raise ExtractorError('No media links available for %s' % video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
+)
+
+
+class NFBIE(InfoExtractor):
+    IE_NAME = 'nfb'
+    IE_DESC = 'National Film Board of Canada'
+    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
+
+    _TEST = {
+        'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
+        'info_dict': {
+            'id': 'qallunaat_why_white_people_are_funny',
+            'ext': 'mp4',
+            'title': 'Qallunaat! Why White People Are Funny ',
+            'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
+            'duration': 3128,
+            'uploader': 'Mark Sandiford',
+            'uploader_id': 'mark-sandiford',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
+
+        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
+            page, 'director id', fatal=False)
+        uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
+            page, 'director name', fatal=False)
+
+        request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
+            compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
+
+        config = self._download_xml(request, video_id, 'Downloading player config XML')
+
+        title = None
+        description = None
+        thumbnail = None
+        duration = None
+        formats = []
+
+        def extract_thumbnail(media):
+            thumbnails = {}
+            for asset in media.findall('assets/asset'):
+                thumbnails[asset.get('quality')] = asset.find('default/url').text
+            if not thumbnails:
+                return None
+            if 'high' in thumbnails:
+                return thumbnails['high']
+            return list(thumbnails.values())[0]
+
+        for media in config.findall('./player/stream/media'):
+            if media.get('type') == 'posterImage':
+                thumbnail = extract_thumbnail(media)
+            elif media.get('type') == 'video':
+                duration = int(media.get('duration'))
+                title = media.find('title').text
+                description = media.find('description').text
+                # It seems assets always go from lower to better quality, so no need to sort
+                formats = [{
+                    'url': x.find('default/streamerURI').text + '/',
+                    'play_path': x.find('default/url').text,
+                    'rtmp_live': False,
+                    'ext': 'mp4',
+                    'format_id': x.get('quality'),
+                } for x in media.findall('assets/asset')]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -9,7 +9,7 @@ class PBSIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://
        (?:
            # Direct video URL
-            video\.pbs\.org/video/(?P<id>[0-9]+)/? |
+            video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
            # Article with embedded player
           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
           # Player
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json

@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
    _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'

    _TEST = {
-        u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
-        u'file': u'25665706.mp4',
-        u'info_dict': {
-            u'title': u'Managing Scale and Complexity',
-            u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
+        'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+        'info_dict': {
+            'id': '25665706',
+            'ext': 'mp4',
+            'title': 'Managing Scale and Complexity',
+            'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
        },
    }

@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
        webpage = self._download_webpage(url, page_title)
        slideshare_obj = self._search_regex(
            r'var slideshare_object =  ({.*?}); var user_info =',
-            webpage, u'slideshare object')
+            webpage, 'slideshare object')
        info = json.loads(slideshare_obj)
-        if info['slideshow']['type'] != u'video':
-            raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+        if info['slideshow']['type'] != 'video':
+            raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)

        doc = info['doc']
        bucket = info['jsplayer']['video_bucket']
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
+        description = self._html_search_regex(
+            r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')

        return {
            '_type': 'video',
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
            'ext': ext,
            'url': video_url,
            'thumbnail': info['slideshow']['pin_image_url'],
-            'description': self._og_search_description(webpage),
+            'description': description,
        }
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -11,7 +11,7 @@ from ..aes import (
 )

 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
    _TEST = {
        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
        u'file': u'229795.mp4',
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -34,6 +34,7 @@ from ..utils import (
    unified_strdate,
    orderedSet,
    write_json_file,
+    uppercase_escape,
 )

 class YoutubeBaseInfoExtractor(InfoExtractor):
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                            (?:www\.)?deturl\.com/www\.youtube\.com/|
-                            (?:www\.)?pwnyoutube\.com|
+                            (?:www\.)?pwnyoutube\.com/|
                            tube\.majestyc\.net/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            self._downloader.report_warning(err_msg)
            return {}

-    def _extract_id(self, url):
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
+    @classmethod
+    def extract_id(cls, url):
+        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group(2)
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
-        video_id = self._extract_id(url)
+        video_id = self.extract_id(url)

        # Get video webpage
        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    IE_DESC = u'YouTube.com playlists'
-    _VALID_URL = r"""(?:
+    _VALID_URL = r"""(?x)(?:
                        (?:https?://)?
                        (?:\w+\.)?
                        youtube\.com/
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                           \? (?:.*?&)*? (?:p|a|list)=
                        |  p/
                        )
-                        ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
+                        (
+                            (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            # Top tracks, they can also include dots 
+                            |(?:MC)[\w\.]*
+                        )
                        .*
                     |
                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'

-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
-
    def _real_initialize(self):
        self._login()

@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):

    def _real_extract(self, url):
        # Extract playlist id
-        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
+        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        playlist_id = mobj.group(1) or mobj.group(2)
@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
            # Download all channel pages using the json-based channel_ajax query
            for pagenum in itertools.count(1):
                url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
-    
-                page = json.loads(page)
-    
+                page = self._download_json(
+                    url, channel_id, note=u'Downloading page #%s' % pagenum,
+                    transform_source=uppercase_escape)
+
                ids_in_page = self.extract_videos_from_page(page['content_html'])
                video_ids.extend(ids_in_page)
    
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -756,9 +756,9 @@ def unified_strdate(date_str):
    """Return a string with the date in the format YYYYMMDD"""
    upload_date = None
    #Replace commas
-    date_str = date_str.replace(',',' ')
+    date_str = date_str.replace(',', ' ')
    # %z (UTC offset) is only supported in python>=3.2
-    date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
+    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
    format_expressions = [
        '%d %B %Y',
        '%B %d %Y',
@ -1214,3 +1214,9 @@ class PagedList(object):
            if end == nextfirstid:
                break
        return res
+
+
+def uppercase_escape(s):
+    return re.sub(
+        r'\\U([0-9a-fA-F]{8})',
+        lambda m: compat_chr(int(m.group(1), base=16)), s)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.02.06.1'
+__version__ = '2014.02.10'
Author	SHA1	Message	Date
Philipp Hagemeister	2e20bba708	release 2014.02.10	2014-02-10 02:01:11 +01:00
Filippo Valsorda	e70dc1d14b	[youtube] Correct a minor regex typo	2014-02-10 01:30:47 +01:00
Philipp Hagemeister	026fcc0495	Fix #2355 (date parsing with dashes)	2014-02-09 18:09:57 +01:00
Philipp Hagemeister	81c2f20b53	[youtube] Correct invalid JSON (Fixes #2353 )	2014-02-09 17:56:10 +01:00
Jaime Marquínez Ferrándiz	1afe753462	[slideshare] Fix description extraction and modernize The ‘og:description’ property doesn’t contain the full description	2014-02-09 14:23:19 +01:00
Jaime Marquínez Ferrándiz	524c2c716a	[bloomberg] Fix extraction of ooyala embed code	2014-02-09 14:11:45 +01:00
Sergey M.	b542d4bbd7	[kontrtube] Add support for kontrtube.ru (Closes #2354 )	2014-02-09 19:53:11 +07:00
Sergey M.	17968e444c	[bbc.co.uk] Fix TV episode test	2014-02-09 04:04:21 +07:00
Sergey M	2e3fd9ec2f	[bbc.co.uk] Improve overall extractor structure, add subtitles support (#2184) Everything from http://www.bbc.co.uk/iplayer/ should be downloadable now.	2014-02-09 04:00:49 +07:00
Philipp Hagemeister	d6a283b025	release 2014.02.08.2	2014-02-08 19:20:35 +01:00
Philipp Hagemeister	9766538124	[jadorecettepub] Add extractor (Fixes #2148 )	2014-02-08 19:20:23 +01:00
Philipp Hagemeister	98dbee8681	[jeuxvideo] Modernize	2014-02-08 18:43:12 +01:00
Philipp Hagemeister	e421491b3b	release 2014.02.08.1	2014-02-08 18:38:05 +01:00
Philipp Hagemeister	6828d37c41	Merge branch 'master' of github.com:rg3/youtube-dl	2014-02-08 18:37:53 +01:00
Philipp Hagemeister	bf5f610099	[pbs] Add support for viralplayer links (Fixes #2350 )	2014-02-08 18:37:33 +01:00
Sergey M.	8b7f73404a	[bbc.co.uk] Fix regex	2014-02-08 22:55:43 +07:00
Sergey M	85cacb2f51	[bbc.co.uk] Add one more link format	2014-02-08 22:54:05 +07:00
Philipp Hagemeister	b3fa3917e2	release 2014.02.08	2014-02-08 16:25:03 +01:00
Sergey M.	082c6c867a	[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184 )	2014-02-08 21:55:28 +07:00
Filippo Valsorda	03fcf1ab57	Merge pull request #2342 from MikeCol/tube8 [Tube8] Extended valid urls schema	2014-02-08 04:00:50 +01:00
MikeCol	3b00dea5eb	Extended valid urls schema	2014-02-08 00:09:26 +01:00
Philipp Hagemeister	8bc6c8e3c0	[chilloutzone] Add additional tests (#2340 )	2014-02-07 15:42:31 +01:00
Sergey M.	79bc27b53a	[channel9] Simplify	2014-02-07 19:41:18 +07:00
Sergey M.	84dd703199	[ivi] Simplify	2014-02-07 19:36:50 +07:00
Sergey M.	c6fdba23a6	[nfb] Add workaround for python2.6	2014-02-07 19:23:53 +07:00
Philipp Hagemeister	b19fe521a9	Merge pull request #2340 from Fnordlab/master [chilloutzone] Fixes refactoring bug	2014-02-07 12:46:56 +01:00
Andreas Schmitz	c1e672d121	[chilloutzone] fixes bug with youtube extraction the id used for extracting the video from youtube is stored in native_video_id not video_id. This id is only used on chilloutzone.net	2014-02-07 12:29:58 +01:00
Andreas Schmitz	f4371f4784	Merge remote-tracking branch 'upstream/master'	2014-02-07 12:20:58 +01:00
Philipp Hagemeister	d914d9d187	[chilloutzone] Add import	2014-02-07 12:03:19 +01:00
Philipp Hagemeister	845d14d377	credit @Fnordlab for chilloutzone	2014-02-07 12:00:58 +01:00
Philipp Hagemeister	4a9540b6d2	[chilloutzone] Simplify (#2338 )	2014-02-07 12:00:25 +01:00
Philipp Hagemeister	9f31be7000	Merge remote-tracking branch 'Fnordlab/chilloutzone'	2014-02-07 11:50:26 +01:00
Philipp Hagemeister	41fa1b627d	release 2014.02.06.3	2014-02-07 01:41:01 +01:00
Andreas Schmitz	c0c4e66b29	Merge branch 'chilloutzone'	2014-02-06 21:33:16 +01:00
Andreas Schmitz	cd8662de22	[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox	2014-02-06 21:31:04 +01:00
Sergey M.	3587159614	[nfb] Add encode POST data	2014-02-07 02:13:04 +07:00
Jaime Marquínez Ferrándiz	d67cc9fa7c	[youtube:playlist] Recognize ‘top tracks’ urls (closes #2332 ) The list parameter starts with ‘MC’ and can have more characters after it, including dots	2014-02-06 19:46:26 +01:00
Sergey M.	bf3a2fe923	[elpais] Fix typo	2014-02-07 00:38:29 +07:00
Sergey M.	e9ea0bf123	[ndr] Add support for ndr.de (Closes #2325 )	2014-02-07 00:35:26 +07:00
Philipp Hagemeister	63424b6233	release 2014.02.06.2	2014-02-06 15:45:47 +01:00
Sergey M.	0bf35c5cf5	[nfb] Add support for onf.ca URLs	2014-02-06 21:41:31 +07:00
Sergey M.	95c29381eb	[mooshare] Fix bogus video page URL	2014-02-06 21:26:12 +07:00
Sergey M.	94c4abce7f	[nfb] Add support for nfb.ca (Closes #2069 )	2014-02-06 21:19:13 +07:00
Andreas Schmitz	f2dffe55f8	Merge branch 'chilloutzone'	2014-02-06 11:49:38 +01:00
Andreas Schmitz	46a073bfac	[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email.	2014-02-06 11:44:44 +01:00