Compare commits

..

41 Commits

Author SHA1 Message Date
90e075da3a release 2014.08.10 2014-08-10 19:47:15 +02:00
9572013de9 [appletrailers] Support height-less videos 2014-08-10 13:04:45 +02:00
3a5beb0ca1 [ard] Show error message for videos that are no longer available (#3422) 2014-08-10 17:53:17 +07:00
a6da7b6b96 [facebook] Allow '?' before '#!' (fixes #3477) 2014-08-10 11:57:15 +02:00
173a7026d5 [test/test_utils] Fix typo in method name 2014-08-10 11:08:56 +02:00
40a90862f4 [reverbnation] The 'uploader_id' field must be a string 2014-08-10 11:00:14 +02:00
511c4325dc [reverbnation] Simplify json download
We can directly get a json file instead of the jsonp.
2014-08-10 10:58:22 +02:00
85a699246a [reverbnation] Modernize test 2014-08-10 10:56:37 +02:00
4dc5286e13 [reverbnation] Make sure that the thumbnail url contain the protocol
They are protocol relative.
2014-08-10 10:45:27 +02:00
c767dc74b8 [downloader/common] Fix typo 2014-08-10 01:41:01 +07:00
56ca04f662 Credit @sehaas for ORF FM4 extractor (#3431) 2014-08-10 01:26:23 +07:00
eb3680123a [orf] Move all ORF extractors in one place 2014-08-10 01:21:16 +07:00
f5273890ee [fm4] Remove unused imports and minor changes 2014-08-10 01:04:10 +07:00
c7a088a816 Merge pull request #3431 from sehaas/fm4
[fm4] Add new extractor
2014-08-10 00:55:56 +07:00
fb17b60811 [arte] Do not filter formats when there are no videos of requested lang code (Closes #3433) 2014-08-09 05:45:15 +07:00
1e58804260 Merge branch 'pyed-xboxclips' 2014-08-08 19:22:31 +07:00
31bf213032 [xboxclips] PEP8 and extract more metadata 2014-08-08 19:21:24 +07:00
1cccc41ddc Merge branch 'xboxclips' of https://github.com/pyed/youtube-dl into pyed-xboxclips 2014-08-08 18:48:10 +07:00
a91cf27767 [nowness] Add support for cn URLs (Closes #3465) 2014-08-08 18:43:28 +07:00
64d02399d8 [xboxclips] Add new extractor 2014-08-08 09:48:02 +03:00
5961017202 [vube] Extract audio and categories 2014-08-07 20:04:29 +07:00
d9760fd43c Merge pull request #3461 from tinybug/patch-2
Update vube.py
2014-08-07 19:14:48 +07:00
d42b2d2985 Update vube.py
fix extractor is broken #3459
2014-08-07 11:24:51 +08:00
cccfab6412 Restore youtube-dl compat binary
Be on the lookout, it might be modified in pull requests.
When I come back from my vacation (in three days from now), I'll start looking whether we really need the compat binary.
2014-08-06 19:30:16 +02:00
4665664c92 Credit @DavidFabijan for mojvideo (#3423) 2014-08-06 20:40:55 +07:00
0adc996bc3 Merge branch 'DavidFabijan-mojvideo' 2014-08-06 20:38:27 +07:00
b42a2a720b [mojvideo] Switch to API, handle errors, remove faked width and height 2014-08-06 20:37:59 +07:00
37edd7dd4a Merge branch 'mojvideo' of https://github.com/DavidFabijan/youtube-dl into DavidFabijan-mojvideo 2014-08-06 20:06:48 +07:00
f87b3500c5 Merge pull request #3453 from naglis/firedrive_fix
[firedrive] fix broken extractor
2014-08-06 19:48:45 +07:00
66420a2db4 Fixed the encoding 2014-08-06 14:44:29 +02:00
6b8492a782 [firedrive] fix broken extractor 2014-08-06 02:26:42 +03:00
6de0595eb8 release 2014.08.05 2014-08-05 17:02:47 +02:00
e48a2c646d Credit @matrixik for #3441 2014-08-05 19:09:11 +07:00
0f831a1a92 Merge pull request #3441 from matrixik/patch-1
[vimeo] Ignore video 'base' thumbnail (Closes #3438)
2014-08-05 19:07:05 +07:00
3e510af38d [vimeo] Ignore video 'base' thumbnail (Closes #3438) 2014-08-04 21:37:36 +02:00
5ecd7b0a92 [fm4] Add new extractor 2014-08-03 20:50:46 +02:00
548f31d99c [vimeo] Use original URL when for standard vimeo.com links (Closes #3428)
Some videos that are freely accessible without password via the original URL (e.g. http://vimeo.com/channels/keypeele/75629013)
ask for password when accessed via http://vimeo.com/<video_id>.
2014-08-04 00:04:47 +07:00
78b296b0ff [Mojvideo] Add new extractor (minor changes) 2014-08-03 11:56:32 +02:00
be79b07907 [Mojvideo] Add new extractor (minor changes) 2014-08-03 11:55:51 +02:00
5537dce84d [Mojvideo] Add new extractor 2014-08-03 10:50:25 +02:00
493987fefe [ubu] Add missing whitespace 2014-08-03 01:20:51 +07:00
20 changed files with 317 additions and 117 deletions

View File

@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_facebook_matching(self): def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()

View File

@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}]) self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escpae(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(u''), u'') self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

View File

@ -66,6 +66,9 @@ __authors__ = (
'Naglis Jonaitis', 'Naglis Jonaitis',
'Charles Chen', 'Charles Chen',
'Hassaan Ali', 'Hassaan Ali',
'Dobrosław Żybort',
'David Fabijan',
'Sebastian Haas',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

View File

@ -292,7 +292,7 @@ class FileDownloader(object):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses.""" """Real download process. Redefine in subclasses."""
raise NotImplementedError(u'This method must be implemented by sublcasses') raise NotImplementedError(u'This method must be implemented by subclasses')
def _hook_progress(self, status): def _hook_progress(self, status):
for ph in self._progress_hooks: for ph in self._progress_hooks:

View File

@ -181,6 +181,7 @@ from .mixcloud import MixcloudIE
from .mlb import MLBIE from .mlb import MLBIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mojvideo import MojvideoIE
from .mooshare import MooshareIE from .mooshare import MooshareIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import MotherlessIE from .motherless import MotherlessIE
@ -224,9 +225,12 @@ from .nrk import (
from .ntv import NTVIE from .ntv import NTVIE
from .nytimes import NYTimesIE from .nytimes import NYTimesIE
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
)
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
@ -327,7 +331,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE from .tvp import TvpIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from.ubu import UbuIE from .ubu import UbuIE
from .udemy import ( from .udemy import (
UdemyIE, UdemyIE,
UdemyCourseIE UdemyCourseIE
@ -383,6 +387,7 @@ from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE from .wrzuta import WrzutaIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE

View File

@ -6,6 +6,7 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse, compat_urlparse,
int_or_none,
) )
@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format': format['type'], 'format': format['type'],
'width': format['width'], 'width': int_or_none(format['width']),
'height': int(format['height']), 'height': int_or_none(format['height']),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
title = self._html_search_regex( title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>', r'<meta name="dcterms.title" content="(.*?)"/>',

View File

@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes) return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url # Some formats may not be in the same language as the url
# TODO: Might want not to drop videos that does not match requested language
# but to process those formats with lower precedence
formats = filter(_match_lang, all_formats) formats = filter(_match_lang, all_formats)
formats = list(formats) # in python3 filter returns an iterator formats = list(formats) # in python3 filter returns an iterator
if not formats: if not formats:
# Some videos are only available in the 'Originalversion' # Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German # they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): # Sometimes there are neither videos of requested lang code
# nor original version videos available
# For such cases we just take all_formats as is
formats = all_formats formats = all_formats
else: if not formats:
raise ExtractorError(u'The formats list is empty') raise ExtractorError('The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
def sort_key(f): def sort_key(f):

View File

@ -20,7 +20,7 @@ from ..utils import (
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:\w+\.)?facebook\.com/ https?://(?:\w+\.)?facebook\.com/
(?:[^#?]*\#!/)? (?:[^#]*?\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?) (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+) (?:v|video_id)=(?P<id>[0-9]+)
(?:.*)''' (?:.*)'''

View File

@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
fields = dict(re.findall(r'''(?x)<input\s+ fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+ type="hidden"\s+
name="([^"]+)"\s+ name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)" value="([^"]*)"
''', webpage)) ''', webpage))
@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
ext = self._search_regex(r'type:\s?\'([^\']+)\',', ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False) webpage, 'extension', fatal=False)
video_url = self._search_regex( video_url = self._search_regex(
r'file:\s?\'(http[^\']+)\',', webpage, 'file url') r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
formats = [{ formats = [{
'format_id': 'sd', 'format_id': 'sd',

View File

@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
)
class MojvideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
'info_dict': {
'id': '3d1ed4497707730b2906',
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
'ext': 'mp4',
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
'thumbnail': 're:^http://.*\.jpg$',
'duration': 242,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
# XML is malformed
playerapi = self._download_webpage(
'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
if '<error>true</error>' in playerapi:
error_desc = self._html_search_regex(
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
title = self._html_search_regex(
r'<title>([^<]+)</title>', playerapi, 'title')
video_url = self._html_search_regex(
r'<file>([^<]+)</file>', playerapi, 'video URL')
thumbnail = self._html_search_regex(
r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
duration = parse_duration(self._html_search_regex(
r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -1,3 +1,4 @@
# encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -8,19 +9,34 @@ from ..utils import ExtractorError
class NownessIE(InfoExtractor): class NownessIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
_TEST = { _TESTS = [
{
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', 'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
'md5': '068bc0202558c2e391924cb8cc470676', 'md5': '068bc0202558c2e391924cb8cc470676',
'info_dict': { 'info_dict': {
'id': '2520295746001', 'id': '2520295746001',
'ext': 'mp4', 'ext': 'mp4',
'description': 'Candor: The Art of Gesticulation',
'uploader': 'Nowness',
'title': 'Candor: The Art of Gesticulation', 'title': 'Candor: The Art of Gesticulation',
'description': 'Candor: The Art of Gesticulation',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Nowness',
} }
},
{
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
'info_dict': {
'id': '3716354522001',
'ext': 'mp4',
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Nowness',
} }
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -1,40 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import calendar
import datetime
import re
from .common import InfoExtractor
# audios on oe1.orf.at are only available for 7 days, so we can't
# add tests.
class OE1IE(InfoExtractor):
IE_DESC = 'oe1.orf.at'
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('id')
data = self._download_json(
'http://oe1.orf.at/programm/%s/konsole' % show_id,
show_id
)
timestamp = datetime.datetime.strptime('%s %s' % (
data['item']['day_label'],
data['item']['time']
), '%d.%m.%Y %H:%M')
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
return {
'id': show_id,
'title': data['item']['title'],
'url': data['item']['url_stream'],
'ext': 'mp3',
'description': data['item'].get('info'),
'timestamp': unix_timestamp
}

View File

@ -3,6 +3,8 @@ from __future__ import unicode_literals
import json import json
import re import re
import calendar
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -12,7 +14,9 @@ from ..utils import (
) )
class ORFIE(InfoExtractor): class ORFTVthekIE(InfoExtractor):
IE_NAME = 'orf:tvthek'
IE_DESC = 'ORF TVthek'
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
_TEST = { _TEST = {
@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
'entries': entries, 'entries': entries,
'id': playlist_id, 'id': playlist_id,
} }
# Audios on ORF radio are only available for 7 days, so we can't add tests.
class ORFOE1IE(InfoExtractor):
IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_id = mobj.group('id')
data = self._download_json(
'http://oe1.orf.at/programm/%s/konsole' % show_id,
show_id
)
timestamp = datetime.datetime.strptime('%s %s' % (
data['item']['day_label'],
data['item']['time']
), '%d.%m.%Y %H:%M')
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
return {
'id': show_id,
'title': data['item']['title'],
'url': data['item']['url_stream'],
'ext': 'mp3',
'description': data['item'].get('info'),
'timestamp': unix_timestamp
}
class ORFFM4IE(InfoExtractor):
IE_DESC = 'orf:fm4'
IE_DESC = 'radio FM4'
_VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_date = mobj.group('date')
show_id = mobj.group('show')
data = self._download_json(
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
show_id
)
def extract_entry_dict(info, title, subtitle):
return {
'id': info['loopStreamId'].replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
'title': title,
'description': subtitle,
'duration': (info['end'] - info['start']) / 1000,
'timestamp': info['start'] / 1000,
'ext': 'mp3'
}
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
return {
'_type': 'playlist',
'id': show_id,
'title': data['title'],
'description': data['subtitle'],
'entries': entries
}

View File

@ -1,23 +1,23 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import strip_jsonp from ..utils import str_or_none
class ReverbNationIE(InfoExtractor): class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'file': '16965047.mp3',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7', 'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
'info_dict': { 'info_dict': {
"id": "16965047",
"ext": "mp3",
"title": "MONA LISA", "title": "MONA LISA",
"uploader": "ALKILADOS", "uploader": "ALKILADOS",
"uploader_id": 216429, "uploader_id": "216429",
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
}, },
}] }]
@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
song_id = mobj.group('id') song_id = mobj.group('id')
api_res = self._download_json( api_res = self._download_json(
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' 'https://api.reverbnation.com/song/%s' % song_id,
% (song_id, int(time.time() * 1000)),
song_id, song_id,
transform_source=strip_jsonp,
note='Downloading information of song %s' % song_id note='Downloading information of song %s' % song_id
) )
@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
'title': api_res.get('name'), 'title': api_res.get('name'),
'url': api_res.get('url'), 'url': api_res.get('url'),
'uploader': api_res.get('artist', {}).get('name'), 'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': api_res.get('artist', {}).get('id'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
'thumbnail': api_res.get('image', api_res.get('thumbnail')), 'thumbnail': self._proto_relative_url(
api_res.get('image', api_res.get('thumbnail'))),
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none', 'vcodec': 'none',
} }

View File

@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
}, },
{
'url': 'http://vimeo.com/channels/keypeele/75629013',
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
'note': 'Video is freely available via original URL '
'and protected with password when accessed via http://vimeo.com/75629013',
'info_dict': {
'id': '75629013',
'ext': 'mp4',
'title': 'Key & Peele: Terrorist Interrogation',
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
'duration': 187,
},
},
{ {
'url': 'http://vimeo.com/76979871', 'url': 'http://vimeo.com/76979871',
'md5': '3363dd6ffebe3784d56f4132317fd446', 'md5': '3363dd6ffebe3784d56f4132317fd446',
@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
if mobj.group('pro') or mobj.group('player'): if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id url = 'http://player.vimeo.com/video/' + video_id
else:
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers)
@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if video_thumbnail is None: if video_thumbnail is None:
video_thumbs = config["video"].get("thumbs") video_thumbs = config["video"].get("thumbs")
if video_thumbs and isinstance(video_thumbs, dict): if video_thumbs and isinstance(video_thumbs, dict):
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1] _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
# Extract video description # Extract video description
video_description = None video_description = None

View File

@ -1,10 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
compat_str,
)
class VubeIE(InfoExtractor): class VubeIE(InfoExtractor):
@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
} }
}, },
{ {
@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
} }
}, { }, {
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Frozen - Let It Go Cover by Siren Gene', 'title': 'Frozen - Let It Go Cover by Siren Gene',
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
'uploader': 'Siren Gene',
'uploader_id': 'Siren',
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
'uploader': 'Siren',
'timestamp': 1395448018,
'upload_date': '20140322',
'duration': 221.788, 'duration': 221.788,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
} }
} }
] ]
@ -71,46 +77,39 @@ class VubeIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) video = self._download_json(
data_json = self._search_regex( 'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
webpage, 'video data'
)
data = json.loads(data_json)
video = (
data.get('video') or
data)
assert isinstance(video, dict)
public_id = video['public_id'] public_id = video['public_id']
formats = [ formats = []
{
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id), for media in video['media'].get('video', []) + video['media'].get('audio', []):
'height': int(fmt['height']), if media['transcoding_status'] != 'processed':
'abr': int(fmt['audio_bitrate']), continue
'vbr': int(fmt['video_bitrate']), fmt = {
'format_id': fmt['media_resolution_id'] 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed' 'abr': int(media['audio_bitrate']),
] 'format_id': compat_str(media['media_resolution_id']),
}
vbr = int(media['video_bitrate'])
if vbr:
fmt.update({
'vbr': vbr,
'height': int(media['height']),
})
formats.append(fmt)
self._sort_formats(formats) self._sort_formats(formats)
title = video['title'] title = video['title']
description = video.get('description') description = video.get('description')
thumbnail = self._proto_relative_url( thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
video.get('thumbnail') or video.get('thumbnail_src'), uploader = video.get('user_alias') or video.get('channel')
scheme='http:')
uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
uploader_id = data.get('user', {}).get('name')
timestamp = int_or_none(video.get('upload_time')) timestamp = int_or_none(video.get('upload_time'))
duration = video['duration'] duration = video['duration']
view_count = video.get('raw_view_count') view_count = video.get('raw_view_count')
like_count = video.get('rlikes')
if like_count is None:
like_count = video.get('total_likes') like_count = video.get('total_likes')
dislike_count = video.get('rhates')
if dislike_count is None:
dislike_count = video.get('total_hates') dislike_count = video.get('total_hates')
comments = video.get('comments') comments = video.get('comments')
@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
else: else:
comment_count = len(comments) comment_count = len(comments)
categories = [tag['text'] for tag in video['tags']]
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'dislike_count': dislike_count, 'dislike_count': dislike_count,
'comment_count': comment_count, 'comment_count': comment_count,
'categories': categories,
} }

View File

@ -0,0 +1,57 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
float_or_none,
int_or_none,
)
class XboxClipsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
_TEST = {
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
'info_dict': {
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
'ext': 'mp4',
'title': 'Iabdulelah playing Upload Studio',
'filesize_approx': 28101836.8,
'timestamp': 1407388500,
'upload_date': '20140807',
'duration': 56,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'>Link: <a href="([^"]+)">', webpage, 'video URL')
title = self._html_search_regex(
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
timestamp = parse_iso8601(self._html_search_regex(
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
filesize = float_or_none(self._html_search_regex(
r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
duration = int_or_none(self._html_search_regex(
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'timestamp': timestamp,
'filesize_approx': filesize,
'duration': duration,
'view_count': view_count,
}

View File

@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr: if get_attr:
if v is not None: if v is not None:
v = getattr(v, get_attr, None) v = getattr(v, get_attr, None)
if v == '':
v = None
return default if v is None else (int(v) * invscale // scale) return default if v is None else (int(v) * invscale // scale)
def str_or_none(v, default=None):
return default if v is None else compat_str(v)
def str_to_int(int_str): def str_to_int(int_str):
if int_str is None: if int_str is None:
return None return None

View File

@ -1,2 +1,2 @@
__version__ = '2014.08.02.1' __version__ = '2014.08.10'