release 2014.02.08

[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184 )
Merge pull request #2342 from MikeCol/tube8
2014-02-08 16:25:03 +01:00 · 2014-02-08 21:55:28 +07:00 · 2014-02-08 04:00:50 +01:00 · 2014-02-08 00:09:26 +01:00 · 2014-02-07 15:42:31 +01:00 · 2014-02-07 19:41:18 +07:00
10 changed files with 265 additions and 22 deletions
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -41,6 +41,7 @@ __authors__  = (
    'Chris Gahan',
    'Saimadhav Heblikar',
    'Mike Col',
+    'Andreas Schmitz',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
        url = info_dict['url']
        player_url = info_dict.get('player_url', None)
        page_url = info_dict.get('page_url', None)
+        app = info_dict.get('app', None)
        play_path = info_dict.get('play_path', None)
        tc_url = info_dict.get('tc_url', None)
+        flash_version = info_dict.get('flash_version', None)
        live = info_dict.get('rtmp_live', False)
        conn = info_dict.get('rtmp_conn', None)

@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
            basic_args += ['--pageUrl', page_url]
+        if app is not None:
+            basic_args += ['--app', app]
        if play_path is not None:
            basic_args += ['--playpath', play_path]
        if tc_url is not None:
            basic_args += ['--tcUrl', url]
        if test:
            basic_args += ['--stop', '1']
+        if flash_version is not None:
+            basic_args += ['--flashVer', flash_version]
        if live:
            basic_args += ['--live']
        if conn:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -15,6 +15,7 @@ from .arte import (
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
+from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .channel9 import Channel9IE
+from .chilloutzone import ChilloutzoneIE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@ -0,0 +1,116 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class BBCCoUkIE(InfoExtractor):
+    IE_NAME = 'bbc.co.uk'
+    IE_DESC = 'BBC - iPlayer Radio'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>[\da-z]{8})'
+
+    _TEST = {
+        'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
+        'info_dict': {
+            'id': 'p01q7wz4',
+            'ext': 'flv',
+            'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
+            'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
+            'duration': 1936,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        group_id = mobj.group('id')
+
+        playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
+            'Downloading playlist XML')
+
+        item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
+        if item is None:
+            no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
+            if no_items is not None:
+                reason = no_items.get('reason')
+                if reason == 'preAvailability':
+                    msg = 'Episode %s is not yet available' % group_id
+                elif reason == 'postAvailability':
+                    msg = 'Episode %s is no longer available' % group_id
+                else:
+                    msg = 'Episode %s is not available: %s' % (group_id, reason)
+                raise ExtractorError(msg, expected=True)
+            raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
+
+        title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
+        description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
+
+        radio_programme_id = item.get('identifier')
+        duration = int(item.get('duration'))
+
+        media_selection = self._download_xml(
+            'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'  % radio_programme_id,
+            radio_programme_id, 'Downloading media selection XML')
+
+        formats = []
+        for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
+            bitrate = int(media.get('bitrate'))
+            encoding = media.get('encoding')
+            service = media.get('service')
+            connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
+            protocol = connection.get('protocol')
+            priority = connection.get('priority')
+            supplier = connection.get('supplier')
+            if protocol == 'http':
+                href = connection.get('href')
+                # ASX playlist
+                if supplier == 'asx':
+                    asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
+                    for i, ref in enumerate(asx.findall('./Entry/ref')):
+                        formats.append({
+                            'url': ref.get('href'),
+                            'format_id': '%s_ref%s' % (service, i),
+                            'abr': bitrate,
+                            'acodec': encoding,
+                            'preference': priority,
+                        })
+                    continue
+                # Direct link
+                formats.append({
+                    'url': href,
+                    'format_id': service,
+                    'abr': bitrate,
+                    'acodec': encoding,
+                    'preference': priority,
+                })
+            elif protocol == 'rtmp':
+                application = connection.get('application', 'ondemand')
+                auth_string = connection.get('authString')
+                identifier = connection.get('identifier')
+                server = connection.get('server')
+                formats.append({
+                    'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
+                    'play_path': identifier,
+                    'app': '%s?%s' % (application, auth_string),
+                    'rtmp_live': False,
+                    'ext': 'flv',
+                    'format_id': service,
+                    'abr': bitrate,
+                    'acodec': encoding,
+                    'preference': priority,
+                })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': radio_programme_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
    '''
    IE_DESC = 'Channel 9'
    IE_NAME = 'channel9'
-    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'

    _TESTS = [
        {
            'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
            'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
            'info_dict': {
+                'id': 'Events/TechEd/Australia/2013/KOS002',
+                'ext': 'mp4',
                'title': 'Developer Kick-Off Session: Stuff We Love',
                'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                'duration': 4576,
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
        },
        {
            'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
            'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
            'info_dict': {
+                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+                'ext': 'mp4',
                'title': 'Self-service BI with Power BI - nuclear testing',
                'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                'duration': 1540,
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+        'md5': 'a76f3457e813ea0037e5244f509e66d1',
+        'info_dict': {
+            'id': 'enemene-meck-alle-katzen-weg',
+            'ext': 'mp4',
+            'title': 'Enemene Meck - Alle Katzen weg',
+            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+        },
+    }, {
+        'note': 'Video hosted at YouTube',
+        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+        'info_dict': {
+            'id': '1YVQaAgHyRU',
+            'ext': 'mp4',
+            'title': '16 Photos Taken 1 Second Before Disaster',
+            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+            'uploader': 'BuzzFeedVideo',
+            'uploader_id': 'BuzzFeedVideo',
+            'upload_date': '20131105',
+        },
+    }, {
+        'note': 'Video hosted at Vimeo',
+        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+        'info_dict': {
+            'id': '85523671',
+            'ext': 'mp4',
+            'title': 'The Sunday Times - Icons',
+            'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
+            'uploader': 'Us',
+            'uploader_id': 'usfilms',
+            'upload_date': '20140131'
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        base64_video_info = self._html_search_regex(
+            r'var cozVidData = "(.+?)";', webpage, 'video data')
+        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+        video_info_dict = json.loads(decoded_video_info)
+
+        # get video information from dict
+        video_url = video_info_dict['mediaUrl']
+        description = clean_html(video_info_dict.get('description'))
+        title = video_info_dict['title']
+        native_platform = video_info_dict['nativePlatform']
+        native_video_id = video_info_dict['nativeVideoId']
+        source_priority = video_info_dict['sourcePriority']
+
+        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+        if native_platform is None:
+            youtube_url = self._html_search_regex(
+                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+                webpage, 'fallback video URL', default=None)
+            if youtube_url is not None:
+                return self.url_result(youtube_url, ie='Youtube')
+
+        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+        # the own CDN
+        if source_priority == 'native':
+            if native_platform == 'youtube':
+                return self.url_result(native_video_id, ie='Youtube')
+            if native_platform == 'vimeo':
+                return self.url_result(
+                    'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+        if not video_url:
+            raise ExtractorError('No video found')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+        }
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@ -14,15 +14,16 @@ from ..utils import (
 class IviIE(InfoExtractor):
    IE_DESC = 'ivi.ru'
    IE_NAME = 'ivi'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'

    _TESTS = [
        # Single movie
        {
            'url': 'http://www.ivi.ru/watch/53141',
-            'file': '53141.mp4',
            'md5': '6ff5be2254e796ed346251d117196cf4',
            'info_dict': {
+                'id': '53141',
+                'ext': 'mp4',
                'title': 'Иван Васильевич меняет профессию',
                'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
                'duration': 5498,
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
        # Serial's serie
        {
            'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'file': '74791.mp4',
            'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
            'info_dict': {
+                'id': '74791',
+                'ext': 'mp4',
                'title': 'Дежурный ангел - 1 серия',
                'duration': 2490,
                'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
 class IviCompilationIE(InfoExtractor):
    IE_DESC = 'ivi.ru compilations'
    IE_NAME = 'ivi:compilation'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'

    def _extract_entries(self, html, compilation_id):
        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@ -49,20 +49,37 @@ class NFBIE(InfoExtractor):

        config = self._download_xml(request, video_id, 'Downloading player config XML')

-        thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
-        video = config.find("./player/stream/media[@type='video']")
-        duration = int(video.get('duration'))
-        title = video.find('title').text
-        description = video.find('description').text
+        title = None
+        description = None
+        thumbnail = None
+        duration = None
+        formats = []

-        # It seems assets always go from lower to better quality, so no need to sort
-        formats = [{
-            'url': x.find('default/streamerURI').text + '/',
-            'play_path': x.find('default/url').text,
-            'rtmp_live': False,
-            'ext': 'mp4',
-            'format_id': x.get('quality'),
-        } for x in video.findall('assets/asset')]
+        def extract_thumbnail(media):
+            thumbnails = {}
+            for asset in media.findall('assets/asset'):
+                thumbnails[asset.get('quality')] = asset.find('default/url').text
+            if not thumbnails:
+                return None
+            if 'high' in thumbnails:
+                return thumbnails['high']
+            return list(thumbnails.values())[0]
+
+        for media in config.findall('./player/stream/media'):
+            if media.get('type') == 'posterImage':
+                thumbnail = extract_thumbnail(media)
+            elif media.get('type') == 'video':
+                duration = int(media.get('duration'))
+                title = media.find('title').text
+                description = media.find('description').text
+                # It seems assets always go from lower to better quality, so no need to sort
+                formats = [{
+                    'url': x.find('default/streamerURI').text + '/',
+                    'play_path': x.find('default/url').text,
+                    'rtmp_live': False,
+                    'ext': 'mp4',
+                    'format_id': x.get('quality'),
+                } for x in media.findall('assets/asset')]

        return {
            'id': video_id,
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -11,7 +11,7 @@ from ..aes import (
 )

 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
    _TEST = {
        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
        u'file': u'229795.mp4',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.02.06.3'
+__version__ = '2014.02.08'
Author	SHA1	Message	Date
Philipp Hagemeister	b3fa3917e2	release 2014.02.08	2014-02-08 16:25:03 +01:00
Sergey M.	082c6c867a	[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184 )	2014-02-08 21:55:28 +07:00
Filippo Valsorda	03fcf1ab57	Merge pull request #2342 from MikeCol/tube8 [Tube8] Extended valid urls schema	2014-02-08 04:00:50 +01:00
MikeCol	3b00dea5eb	Extended valid urls schema	2014-02-08 00:09:26 +01:00
Philipp Hagemeister	8bc6c8e3c0	[chilloutzone] Add additional tests (#2340 )	2014-02-07 15:42:31 +01:00
Sergey M.	79bc27b53a	[channel9] Simplify	2014-02-07 19:41:18 +07:00
Sergey M.	84dd703199	[ivi] Simplify	2014-02-07 19:36:50 +07:00
Sergey M.	c6fdba23a6	[nfb] Add workaround for python2.6	2014-02-07 19:23:53 +07:00
Philipp Hagemeister	b19fe521a9	Merge pull request #2340 from Fnordlab/master [chilloutzone] Fixes refactoring bug	2014-02-07 12:46:56 +01:00
Andreas Schmitz	c1e672d121	[chilloutzone] fixes bug with youtube extraction the id used for extracting the video from youtube is stored in native_video_id not video_id. This id is only used on chilloutzone.net	2014-02-07 12:29:58 +01:00
Andreas Schmitz	f4371f4784	Merge remote-tracking branch 'upstream/master'	2014-02-07 12:20:58 +01:00
Philipp Hagemeister	d914d9d187	[chilloutzone] Add import	2014-02-07 12:03:19 +01:00
Philipp Hagemeister	845d14d377	credit @Fnordlab for chilloutzone	2014-02-07 12:00:58 +01:00
Philipp Hagemeister	4a9540b6d2	[chilloutzone] Simplify (#2338 )	2014-02-07 12:00:25 +01:00
Philipp Hagemeister	9f31be7000	Merge remote-tracking branch 'Fnordlab/chilloutzone'	2014-02-07 11:50:26 +01:00
Andreas Schmitz	c0c4e66b29	Merge branch 'chilloutzone'	2014-02-06 21:33:16 +01:00
Andreas Schmitz	cd8662de22	[chilloutzone] Bug fix, runs against tests Fixes a bug with python3.3 and made the extractor run successfully against tox	2014-02-06 21:31:04 +01:00
Andreas Schmitz	f2dffe55f8	Merge branch 'chilloutzone'	2014-02-06 11:49:38 +01:00
Andreas Schmitz	46a073bfac	[chilloutzone] Added support for chilloutzone.net Added support for chilloutzone.net videos including embedded youtube and vimeo movies. In case you find a not working movie, drop me an email.	2014-02-06 11:44:44 +01:00