release 2014.08.10

[appletrailers] Support height-less videos
[ard] Show error message for videos that are no longer available (#3422 )
2014-08-10 19:47:15 +02:00 · 2014-08-10 13:04:45 +02:00 · 2014-08-10 17:53:17 +07:00 · 2014-08-10 11:57:15 +02:00 · 2014-08-10 11:08:56 +02:00 · 2014-08-10 11:00:14 +02:00
20 changed files with 317 additions and 117 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):

    def test_facebook_matching(self):
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))

    def test_no_duplicates(self):
        ies = gen_extractors()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

-    def test_uppercase_escpae(self):
+    def test_uppercase_escape(self):
        self.assertEqual(uppercase_escape(u'aä'), u'aä')
        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -66,6 +66,9 @@ __authors__  = (
    'Naglis Jonaitis',
    'Charles Chen',
    'Hassaan Ali',
+    'Dobrosław Żybort',
+    'David Fabijan',
+    'Sebastian Haas',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -292,7 +292,7 @@ class FileDownloader(object):

    def real_download(self, filename, info_dict):
        """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by sublcasses')
+        raise NotImplementedError(u'This method must be implemented by subclasses')

    def _hook_progress(self, status):
        for ph in self._progress_hooks:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -181,6 +181,7 @@ from .mixcloud import MixcloudIE
 from .mlb import MLBIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
+from .mojvideo import MojvideoIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
@ -224,9 +225,12 @@ from .nrk import (
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
-from .oe1 import OE1IE
 from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
+    ORFTVthekIE,
+    ORFOE1IE,
+    ORFFM4IE,
+)
 from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
@ -327,7 +331,7 @@ from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
 from .tvplay import TVPlayIE
-from.ubu import UbuIE
+from .ubu import UbuIE
 from .udemy import (
    UdemyIE,
    UdemyCourseIE
@ -383,6 +387,7 @@ from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
 from .wrzuta import WrzutaIE
 from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
 from .xnxx import XNXXIE
 from .xvideos import XVideosIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
+    int_or_none,
 )


@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
                formats.append({
                    'url': format_url,
                    'format': format['type'],
-                    'width': format['width'],
-                    'height': int(format['height']),
+                    'width': int_or_none(format['width']),
+                    'height': int_or_none(format['height']),
                })

            self._sort_formats(formats)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
+            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
        title = self._html_search_regex(
            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
             r'<meta name="dcterms.title" content="(.*?)"/>',
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
+        # TODO: Might want not to drop videos that does not match requested language
+        # but to process those formats with lower precedence
        formats = filter(_match_lang, all_formats)
-        formats = list(formats) # in python3 filter returns an iterator
+        formats = list(formats)  # in python3 filter returns an iterator
        if not formats:
            # Some videos are only available in the 'Originalversion'
            # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
-                formats = all_formats
-            else:
-                raise ExtractorError(u'The formats list is empty')
+            # Sometimes there are neither videos of requested lang code
+            # nor original version videos available
+            # For such cases we just take all_formats as is
+            formats = all_formats
+            if not formats:
+                raise ExtractorError('The formats list is empty')

        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
            def sort_key(f):
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -20,7 +20,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        https?://(?:\w+\.)?facebook\.com/
-        (?:[^#?]*\#!/)?
+        (?:[^#]*?\#!/)?
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
        (?:v|video_id)=(?P<id>[0-9]+)
        (?:.*)'''
--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
        fields = dict(re.findall(r'''(?x)<input\s+
            type="hidden"\s+
            name="([^"]+)"\s+
-            (?:id="[^"]+"\s+)?
            value="([^"]*)"
            ''', webpage))

@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
                                 webpage, 'extension', fatal=False)
        video_url = self._search_regex(
-            r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+            r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')

        formats = [{
            'format_id': 'sd',
--- a/youtube_dl/extractor/mojvideo.py
+++ b/youtube_dl/extractor/mojvideo.py
@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_duration,
+)
+
+
+class MojvideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
+    _TEST = {
+        'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
+        'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
+        'info_dict': {
+            'id': '3d1ed4497707730b2906',
+            'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
+            'ext': 'mp4',
+            'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
+            'thumbnail': 're:^http://.*\.jpg$',
+            'duration': 242,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        # XML is malformed
+        playerapi = self._download_webpage(
+            'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
+
+        if '<error>true</error>' in playerapi:
+            error_desc = self._html_search_regex(
+                r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)</title>', playerapi, 'title')
+        video_url = self._html_search_regex(
+            r'<file>([^<]+)</file>', playerapi, 'video URL')
+        thumbnail = self._html_search_regex(
+            r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@ -1,3 +1,4 @@
+# encoding: utf-8
 from __future__ import unicode_literals

 import re
@ -8,19 +9,34 @@ from ..utils import ExtractorError


 class NownessIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
+    _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'

-    _TEST = {
-        'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
-        'md5': '068bc0202558c2e391924cb8cc470676',
-        'info_dict': {
-            'id': '2520295746001',
-            'ext': 'mp4',
-            'description': 'Candor: The Art of Gesticulation',
-            'uploader': 'Nowness',
-            'title': 'Candor: The Art of Gesticulation',
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
+            'md5': '068bc0202558c2e391924cb8cc470676',
+            'info_dict': {
+                'id': '2520295746001',
+                'ext': 'mp4',
+                'title': 'Candor: The Art of Gesticulation',
+                'description': 'Candor: The Art of Gesticulation',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Nowness',
+            }
+        },
+        {
+            'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
+            'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+            'info_dict': {
+                'id': '3716354522001',
+                'ext': 'mp4',
+                'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+                'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Nowness',
+            }
+        },
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import calendar
-import datetime
-import re
-
-from .common import InfoExtractor
-
-# audios on oe1.orf.at are only available for 7 days, so we can't
-# add tests.
-
-
-class OE1IE(InfoExtractor):
-    IE_DESC = 'oe1.orf.at'
-    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
-
-        data = self._download_json(
-            'http://oe1.orf.at/programm/%s/konsole' % show_id,
-            show_id
-        )
-
-        timestamp = datetime.datetime.strptime('%s %s' % (
-            data['item']['day_label'],
-            data['item']['time']
-        ), '%d.%m.%Y %H:%M')
-        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
-
-        return {
-            'id': show_id,
-            'title': data['item']['title'],
-            'url': data['item']['url_stream'],
-            'ext': 'mp3',
-            'description': data['item'].get('info'),
-            'timestamp': unix_timestamp
-        }
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -3,6 +3,8 @@ from __future__ import unicode_literals

 import json
 import re
+import calendar
+import datetime

 from .common import InfoExtractor
 from ..utils import (
@ -12,7 +14,9 @@ from ..utils import (
 )


-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
+    IE_NAME = 'orf:tvthek'
+    IE_DESC = 'ORF TVthek'
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'

    _TEST = {
@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
            'entries': entries,
            'id': playlist_id,
        }
+
+
+# Audios on ORF radio are only available for 7 days, so we can't add tests.
+
+
+class ORFOE1IE(InfoExtractor):
+    IE_NAME = 'orf:oe1'
+    IE_DESC = 'Radio Österreich 1'
+    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
+
+
+class ORFFM4IE(InfoExtractor):
+    IE_DESC = 'orf:fm4'
+    IE_DESC = 'radio FM4'
+    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_date = mobj.group('date')
+        show_id = mobj.group('show')
+
+        data = self._download_json(
+            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+            show_id
+        )
+
+        def extract_entry_dict(info, title, subtitle):
+            return {
+                'id': info['loopStreamId'].replace('.mp3', ''),
+                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+                'title': title,
+                'description': subtitle,
+                'duration': (info['end'] - info['start']) / 1000,
+                'timestamp': info['start'] / 1000,
+                'ext': 'mp3'
+            }
+
+        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': data['title'],
+            'description': data['subtitle'],
+            'entries': entries
+        }
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@ -1,23 +1,23 @@
 from __future__ import unicode_literals

 import re
-import time

 from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none


 class ReverbNationIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
    _TESTS = [{
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'file': '16965047.mp3',
        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
        'info_dict': {
+            "id": "16965047",
+            "ext": "mp3",
            "title": "MONA LISA",
            "uploader": "ALKILADOS",
-            "uploader_id": 216429,
-            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+            "uploader_id": "216429",
+            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
        },
    }]

@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
        song_id = mobj.group('id')

        api_res = self._download_json(
-            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
-                % (song_id, int(time.time() * 1000)),
+            'https://api.reverbnation.com/song/%s' % song_id,
            song_id,
-            transform_source=strip_jsonp,
            note='Downloading information of song %s' % song_id
        )

@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
            'title': api_res.get('name'),
            'url': api_res.get('url'),
            'uploader': api_res.get('artist', {}).get('name'),
-            'uploader_id': api_res.get('artist', {}).get('id'),
-            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+            'thumbnail': self._proto_relative_url(
+                api_res.get('image', api_res.get('thumbnail'))),
            'ext': 'mp3',
            'vcodec': 'none',
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                'videopassword': 'youtube-dl',
            },
        },
+        {
+            'url': 'http://vimeo.com/channels/keypeele/75629013',
+            'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+            'note': 'Video is freely available via original URL '
+                    'and protected with password when accessed via http://vimeo.com/75629013',
+            'info_dict': {
+                'id': '75629013',
+                'ext': 'mp4',
+                'title': 'Key & Peele: Terrorist Interrogation',
+                'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+                'uploader_id': 'atencio',
+                'uploader': 'Peter Atencio',
+                'duration': 187,
+            },
+        },
        {
            'url': 'http://vimeo.com/76979871',
            'md5': '3363dd6ffebe3784d56f4132317fd446',
@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        video_id = mobj.group('id')
        if mobj.group('pro') or mobj.group('player'):
            url = 'http://player.vimeo.com/video/' + video_id
-        else:
-            url = 'https://vimeo.com/' + video_id

        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url, None, headers)
@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        if video_thumbnail is None:
            video_thumbs = config["video"].get("thumbs")
            if video_thumbs and isinstance(video_thumbs, dict):
-                _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
+                _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]

        # Extract video description
        video_description = None
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@ -1,10 +1,12 @@
 from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    compat_str,
+)


 class VubeIE(InfoExtractor):
@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
                'like_count': int,
                'dislike_count': int,
                'comment_count': int,
+                'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
            }
        },
        {
@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
                'like_count': int,
                'dislike_count': int,
                'comment_count': int,
+                'categories': ['seraina', 'jessica', 'krewella', 'alive'],
            }
        }, {
            'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Frozen - Let It Go Cover by Siren Gene',
                'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
-                'uploader': 'Siren Gene',
-                'uploader_id': 'Siren',
                'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
+                'uploader': 'Siren',
+                'timestamp': 1395448018,
+                'upload_date': '20140322',
                'duration': 221.788,
                'like_count': int,
                'dislike_count': int,
                'comment_count': int,
+                'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
            }
        }
    ]
@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        webpage = self._download_webpage(url, video_id)
-        data_json = self._search_regex(
-            r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
-            webpage, 'video data'
-        )
-        data = json.loads(data_json)
-        video = (
-            data.get('video') or
-            data)
-        assert isinstance(video, dict)
+        video = self._download_json(
+            'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')

        public_id = video['public_id']

-        formats = [
-            {
-                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
-                'height': int(fmt['height']),
-                'abr': int(fmt['audio_bitrate']),
-                'vbr': int(fmt['video_bitrate']),
-                'format_id': fmt['media_resolution_id']
-            } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
-        ]
+        formats = []
+
+        for media in video['media'].get('video', []) + video['media'].get('audio', []):
+            if media['transcoding_status'] != 'processed':
+                continue
+            fmt = {
+                'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
+                'abr': int(media['audio_bitrate']),
+                'format_id': compat_str(media['media_resolution_id']),
+            }
+            vbr = int(media['video_bitrate'])
+            if vbr:
+                fmt.update({
+                    'vbr': vbr,
+                    'height': int(media['height']),
+                })
+            formats.append(fmt)

        self._sort_formats(formats)

        title = video['title']
        description = video.get('description')
-        thumbnail = self._proto_relative_url(
-            video.get('thumbnail') or video.get('thumbnail_src'),
-            scheme='http:')
-        uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
-        uploader_id = data.get('user', {}).get('name')
+        thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
+        uploader = video.get('user_alias') or video.get('channel')
        timestamp = int_or_none(video.get('upload_time'))
        duration = video['duration']
        view_count = video.get('raw_view_count')
-        like_count = video.get('rlikes')
-        if like_count is None:
-            like_count = video.get('total_likes')
-        dislike_count = video.get('rhates')
-        if dislike_count is None:
-            dislike_count = video.get('total_hates')
+        like_count = video.get('total_likes')
+        dislike_count = video.get('total_hates')

        comments = video.get('comments')
        comment_count = None
@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
        else:
            comment_count = len(comments)

+        categories = [tag['text'] for tag in video['tags']]
+
        return {
            'id': video_id,
            'formats': formats,
@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
-            'uploader_id': uploader_id,
            'timestamp': timestamp,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'comment_count': comment_count,
+            'categories': categories,
        }
--- a/youtube_dl/extractor/xboxclips.py
+++ b/youtube_dl/extractor/xboxclips.py
@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    float_or_none,
+    int_or_none,
+)
+
+
+class XboxClipsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
+    _TEST = {
+        'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+        'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
+        'info_dict': {
+            'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
+            'ext': 'mp4',
+            'title': 'Iabdulelah playing Upload Studio',
+            'filesize_approx': 28101836.8,
+            'timestamp': 1407388500,
+            'upload_date': '20140807',
+            'duration': 56,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'>Link: <a href="([^"]+)">', webpage, 'video URL')
+        title = self._html_search_regex(
+            r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
+        timestamp = parse_iso8601(self._html_search_regex(
+            r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
+        filesize = float_or_none(self._html_search_regex(
+            r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
+        duration = int_or_none(self._html_search_regex(
+            r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'>Views: (\d+)<', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'timestamp': timestamp,
+            'filesize_approx': filesize,
+            'duration': duration,
+            'view_count': view_count,
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
    if get_attr:
        if v is not None:
            v = getattr(v, get_attr, None)
+    if v == '':
+        v = None
    return default if v is None else (int(v) * invscale // scale)


+def str_or_none(v, default=None):
+    return default if v is None else compat_str(v)
+
+
 def str_to_int(int_str):
    if int_str is None:
        return None
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.08.02.1'
+__version__ = '2014.08.10'
Author	SHA1	Message	Date
Philipp Hagemeister	90e075da3a	release 2014.08.10	2014-08-10 19:47:15 +02:00
Philipp Hagemeister	9572013de9	[appletrailers] Support height-less videos	2014-08-10 13:04:45 +02:00
Sergey M․	3a5beb0ca1	[ard] Show error message for videos that are no longer available (#3422 )	2014-08-10 17:53:17 +07:00
Jaime Marquínez Ferrándiz	a6da7b6b96	[facebook] Allow '?' before '#!' (fixes #3477 )	2014-08-10 11:57:15 +02:00
Jaime Marquínez Ferrándiz	173a7026d5	[test/test_utils] Fix typo in method name	2014-08-10 11:08:56 +02:00
Jaime Marquínez Ferrándiz	40a90862f4	[reverbnation] The 'uploader_id' field must be a string	2014-08-10 11:00:14 +02:00
Jaime Marquínez Ferrándiz	511c4325dc	[reverbnation] Simplify json download We can directly get a json file instead of the jsonp.	2014-08-10 10:58:22 +02:00
Jaime Marquínez Ferrándiz	85a699246a	[reverbnation] Modernize test	2014-08-10 10:56:37 +02:00
Jaime Marquínez Ferrándiz	4dc5286e13	[reverbnation] Make sure that the thumbnail url contain the protocol They are protocol relative.	2014-08-10 10:45:27 +02:00
Sergey M․	c767dc74b8	[downloader/common] Fix typo	2014-08-10 01:41:01 +07:00
Sergey M․	56ca04f662	Credit @sehaas for ORF FM4 extractor (#3431 )	2014-08-10 01:26:23 +07:00
Sergey M․	eb3680123a	[orf] Move all ORF extractors in one place	2014-08-10 01:21:16 +07:00
Sergey M․	f5273890ee	[fm4] Remove unused imports and minor changes	2014-08-10 01:04:10 +07:00
Sergey M.	c7a088a816	Merge pull request #3431 from sehaas/fm4 [fm4] Add new extractor	2014-08-10 00:55:56 +07:00
Sergey M․	fb17b60811	[arte] Do not filter formats when there are no videos of requested lang code (Closes #3433 )	2014-08-09 05:45:15 +07:00
Sergey M․	1e58804260	Merge branch 'pyed-xboxclips'	2014-08-08 19:22:31 +07:00
Sergey M․	31bf213032	[xboxclips] PEP8 and extract more metadata	2014-08-08 19:21:24 +07:00
Sergey M․	1cccc41ddc	Merge branch 'xboxclips' of https://github.com/pyed/youtube-dl into pyed-xboxclips	2014-08-08 18:48:10 +07:00
Sergey M․	a91cf27767	[nowness] Add support for cn URLs (Closes #3465 )	2014-08-08 18:43:28 +07:00
pyed	64d02399d8	[xboxclips] Add new extractor	2014-08-08 09:48:02 +03:00
Sergey M․	5961017202	[vube] Extract audio and categories	2014-08-07 20:04:29 +07:00
Sergey M.	d9760fd43c	Merge pull request #3461 from tinybug/patch-2 Update vube.py	2014-08-07 19:14:48 +07:00
tinybug	d42b2d2985	Update vube.py fix extractor is broken #3459	2014-08-07 11:24:51 +08:00
Philipp Hagemeister	cccfab6412	Restore youtube-dl compat binary Be on the lookout, it might be modified in pull requests. When I come back from my vacation (in three days from now), I'll start looking whether we really need the compat binary.	2014-08-06 19:30:16 +02:00
Sergey M․	4665664c92	Credit @DavidFabijan for mojvideo (#3423 )	2014-08-06 20:40:55 +07:00
Sergey M․	0adc996bc3	Merge branch 'DavidFabijan-mojvideo'	2014-08-06 20:38:27 +07:00
Sergey M․	b42a2a720b	[mojvideo] Switch to API, handle errors, remove faked width and height	2014-08-06 20:37:59 +07:00
Sergey M․	37edd7dd4a	Merge branch 'mojvideo' of https://github.com/DavidFabijan/youtube-dl into DavidFabijan-mojvideo	2014-08-06 20:06:48 +07:00
Sergey M.	f87b3500c5	Merge pull request #3453 from naglis/firedrive_fix [firedrive] fix broken extractor	2014-08-06 19:48:45 +07:00
David Fabijan	66420a2db4	Fixed the encoding	2014-08-06 14:44:29 +02:00
Naglis Jonaitis	6b8492a782	[firedrive] fix broken extractor	2014-08-06 02:26:42 +03:00
Philipp Hagemeister	6de0595eb8	release 2014.08.05	2014-08-05 17:02:47 +02:00
Sergey M․	e48a2c646d	Credit @matrixik for #3441	2014-08-05 19:09:11 +07:00
Sergey M.	0f831a1a92	Merge pull request #3441 from matrixik/patch-1 [vimeo] Ignore video 'base' thumbnail (Closes #3438)	2014-08-05 19:07:05 +07:00
Dobrosław Żybort	3e510af38d	[vimeo] Ignore video 'base' thumbnail (Closes #3438 )	2014-08-04 21:37:36 +02:00
Sebastian Haas	5ecd7b0a92	[fm4] Add new extractor	2014-08-03 20:50:46 +02:00
Sergey M․	548f31d99c	[vimeo] Use original URL when for standard vimeo.com links (Closes #3428 ) Some videos that are freely accessible without password via the original URL (e.g. http://vimeo.com/channels/keypeele/75629013) ask for password when accessed via http://vimeo.com/<video_id>.	2014-08-04 00:04:47 +07:00
David Fabijan	78b296b0ff	[Mojvideo] Add new extractor (minor changes)	2014-08-03 11:56:32 +02:00
David Fabijan	be79b07907	[Mojvideo] Add new extractor (minor changes)	2014-08-03 11:55:51 +02:00
David Fabijan	5537dce84d	[Mojvideo] Add new extractor	2014-08-03 10:50:25 +02:00
Sergey M․	493987fefe	[ubu] Add missing whitespace	2014-08-03 01:20:51 +07:00