Compare commits

..

17 Commits

Author SHA1 Message Date
Philipp Hagemeister
eef726c04b release 2014.02.04 2014-02-04 16:33:19 +01:00
Philipp Hagemeister
acf1555d76 Merge remote-tracking branch 'origin/master' 2014-02-04 16:33:06 +01:00
Philipp Hagemeister
22e7f1a6ec [pbs] Add support for article pages (Fixes #870) 2014-02-04 16:31:00 +01:00
Sergey M.
3c49325658 [lifenews] Fix video URL extraction (Closes #2302) 2014-02-04 21:31:25 +07:00
Sergey M
bb1cd2bea1 [mooshare] Add support for mooshare.biz (Closes #2149) 2014-02-04 20:53:46 +07:00
Philipp Hagemeister
fdf1f8d4ce [collegehumor] Adapt test to changed video description 2014-02-04 10:37:01 +01:00
Philipp Hagemeister
117c8c6b97 [bliptv] Remove unused imports 2014-02-04 10:25:19 +01:00
Philipp Hagemeister
5cef4ff09b [subtittles] Check that the result is not empty 2014-02-04 10:24:17 +01:00
Philipp Hagemeister
91264ce572 [iprima] Use centralized format sorting 2014-02-04 10:24:00 +01:00
Philipp Hagemeister
c79ef8e1ae Merge remote-tracking branch 'pulpe/_iprima' 2014-02-04 10:21:42 +01:00
Philipp Hagemeister
58d915df51 [traileraddict] mark as broken
traileraddict has changed their URL encoding scheme.
I'm working on restoring support, but that may take some time.
2014-02-04 10:13:52 +01:00
pulpe
7881a64499 [iprima] Add support for play.iprima.cz 2014-02-04 07:45:41 +01:00
Philipp Hagemeister
90159f5561 release 2014.02.03.1 2014-02-03 15:20:41 +01:00
Philipp Hagemeister
99877772d0 [generic] Add support for multiple brightcove URLs (Fixes #2283) 2014-02-03 15:19:40 +01:00
Sergey M.
b0268cb6ce [vimeo] Remove superfluous whitespace 2014-02-03 20:24:11 +07:00
Sergey M.
4edff4cfa8 [vimeo] Add subtitle tests 2014-02-03 20:19:23 +07:00
Sergey M.
1eac553e7e [vimeo] Add support for subtitles (Closes #2239) 2014-02-03 20:02:58 +07:00
16 changed files with 461 additions and 47 deletions

View File

@@ -34,6 +34,7 @@ from youtube_dl.extractor import (
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE, RutubeChannelIE,
GenericIE,
) )
@@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409') self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34) self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

75
test/test_signatures.py Normal file
View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io
import re
import string
from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import compat_str, compat_urlretrieve
_TESTS = [
(
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
u'js',
86,
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
u'js',
85,
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
]
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
if not os.path.exists(self.TESTDATA_DIR):
os.mkdir(self.TESTDATA_DIR)
def make_tfunc(url, stype, sig_length, expected_sig):
basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename
test_id = m.group(1)
def test_func(self):
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
compat_urlretrieve(url, fn)
ie = YoutubeIE()
if stype == 'js':
with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read()
func = ie._parse_sig_js(jscode)
else:
assert stype == 'swf'
with open(fn, 'rb') as testf:
swfcode = testf.read()
func = ie._parse_sig_swf(swfcode)
src_sig = compat_str(string.printable[:sig_length])
got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig)
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
setattr(TestSignature, test_func.__name__, test_func)
for test_spec in _TESTS:
make_tfunc(*test_spec)
if __name__ == '__main__':
unittest.main()

View File

@@ -14,6 +14,7 @@ from youtube_dl.extractor import (
YoutubeIE, YoutubeIE,
DailymotionIE, DailymotionIE,
TEDIE, TEDIE,
VimeoIE,
) )
@@ -223,5 +224,60 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4') self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://vimeo.com/56015672'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -96,6 +96,7 @@ from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import ( from .ivi import (
IviIE, IviIE,
IviCompilationIE IviCompilationIE
@@ -126,6 +127,7 @@ from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVIggyIE, MTVIggyIE,

View File

@@ -1,19 +1,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime import datetime
import json
import re import re
import socket
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_str, compat_str,
compat_urllib_error,
compat_urllib_request, compat_urllib_request,
ExtractorError,
unescapeHTML, unescapeHTML,
) )

View File

@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None """Try to extract the brightcove url from the webpage, returns None
if it can't be found if it can't be found
""" """
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m: if url_m:
return url_m.group(1) return [url_m.group(1)]
m_brightcove = re.search( matches = re.findall(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
if m_brightcove is not None: return [cls._build_brighcove_url(m) for m in matches]
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})

View File

@@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
'uploader': 'Funnyplox TV', 'uploader': 'Funnyplox TV',
'uploader_id': 'funnyploxtv', 'uploader_id': 'funnyploxtv',
'description': 'md5:506f69f7a297ed698ced3375f2363b0e', 'description': 'md5:11812366244110c3523968aa74f02521',
'upload_date': '20140128', 'upload_date': '20140128',
}, },
'params': { 'params': {

View File

@@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_url is not None: if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
surl = smuggle_url(bc_url, {'Referer': url}) entries = [{
return self.url_result(surl, 'Brightcove') '_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
} for bc_url in bc_urls]
return {
'_type': 'playlist',
'title': video_title,
'id': video_id,
'entries': entries,
}
# Look for embedded (iframe) Vimeo player # Look for embedded (iframe) Vimeo player
mobj = re.search( mobj = re.search(

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from random import random
from math import floor
from .common import InfoExtractor
from ..utils import compat_urllib_request
class IPrimaIE(InfoExtractor):
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
_TESTS = [{
'url': 'http://play.iprima.cz/particka/particka-92',
'info_dict': {
'id': '39152',
'ext': 'flv',
'title': 'Partička (92)',
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
},
'params': {
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
webpage = self._download_webpage(url, video_id)
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
floor(random()*1073741824),
floor(random()*1073741824))
req = compat_urllib_request.Request(player_url)
req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id)
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
if zoneGEO != '0':
base_url = base_url.replace('token', 'token_'+zoneGEO)
formats = []
for format_id in ['lq', 'hq', 'hd']:
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
if filename == 'null':
continue
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
if format_id == 'lq':
quality = 0
elif format_id == 'hq':
quality = 1
elif format_id == 'hd':
quality = 2
filename = 'hq/'+filename
formats.append({
'format_id': format_id,
'url': base_url,
'quality': quality,
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
'rtmp_live': True,
'ext': 'flv',
})
self._sort_formats(formats)
return {
'id': real_id,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor):
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL') r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')

View File

@@ -0,0 +1,114 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_request,
compat_urllib_parse,
)
class MooshareIE(InfoExtractor):
IE_NAME = 'mooshare'
IE_DESC = 'Mooshare.biz'
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
_TESTS = [
{
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
'md5': '4e14f9562928aecd2e42c6f341c8feba',
'info_dict': {
'id': '8dqtk4bjbp8g',
'ext': 'mp4',
'title': 'Comedy Football 2011 - (part 1-2)',
'duration': 893,
},
},
{
'url': 'http://mooshare.biz/aipjtoc4g95j',
'info_dict': {
'id': 'aipjtoc4g95j',
'ext': 'mp4',
'title': 'Orange Caramel Dashing Through the Snow',
'duration': 212,
},
'params': {
# rtmp download
'skip_download': True,
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
download_form = {
'op': 'download1',
'id': video_id,
'hash': hash_key,
}
request = compat_urllib_request.Request(
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.to_screen('%s: Waiting for timeout' % video_id)
time.sleep(5)
video_page = self._download_webpage(request, video_id, 'Downloading video page')
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
duration = int(duration_str) if duration_str is not None else None
formats = []
# SD video
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'sd',
'format': 'SD',
})
# HD video
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'hd',
'format': 'HD',
})
# rtmp video
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('rtmpurl'),
'play_path': mobj.group('playpath'),
'rtmp_live': False,
'ext': 'mp4',
'format_id': 'rtmp',
'format': 'HD',
})
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
import json import json
@@ -5,26 +7,59 @@ from .common import InfoExtractor
class PBSIE(InfoExtractor): class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?' _VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
)
'''
_TEST = { _TEST = {
u'url': u'http://video.pbs.org/video/2365006249/', 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
u'file': u'2365006249.mp4', 'md5': 'ce1888486f0908d555a8093cac9a7362',
u'md5': 'ce1888486f0908d555a8093cac9a7362', 'info_dict': {
u'info_dict': { 'id': '2365006249',
u'title': u'A More Perfect Union', 'ext': 'mp4',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', 'title': 'A More Perfect Union',
u'duration': 3190, 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
presumptive_id = mobj.group('presumptive_id')
display_id = presumptive_id
if presumptive_id:
webpage = self._download_webpage(url, display_id)
url = self._search_regex(
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
webpage, 'player URL')
mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id')
if not display_id:
display_id = player_id
if player_id:
player_page = self._download_webpage(
url, display_id, note='Downloading player page',
errnote='Could not download player page')
video_id = self._search_regex(
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
else:
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = video_id
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id) info = self._download_json(info_url, display_id)
info =json.loads(info_page)
return {'id': video_id, return {
'id': video_id,
'title': info['title'], 'title': info['title'],
'url': info['alternate_encoding']['url'], 'url': info['alternate_encoding']['url'],
'ext': 'mp4', 'ext': 'mp4',

View File

@@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor):
def _request_subtitle_url(self, sub_lang, url): def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """ """ makes the http request for the subtitle """
try: try:
return self._download_subtitle_url(sub_lang, url) sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return return
if not sub: if not sub:
self._downloader.report_warning(u'Did not fetch video subtitles') self._downloader.report_warning(u'Did not fetch video subtitles')
return return
return sub
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
""" """

View File

@@ -6,6 +6,7 @@ from .common import InfoExtractor
class TrailerAddictIE(InfoExtractor): class TrailerAddictIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
_TEST = { _TEST = {
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer', 'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',

View File

@@ -6,10 +6,10 @@ import re
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
clean_html, clean_html,
get_element_by_attribute, get_element_by_attribute,
ExtractorError, ExtractorError,
@@ -19,7 +19,7 @@ from ..utils import (
) )
class VimeoIE(InfoExtractor): class VimeoIE(SubtitlesInfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
@@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
}, },
{
'url': 'http://vimeo.com/76979871',
'md5': '3363dd6ffebe3784d56f4132317fd446',
'note': 'Video with subtitles',
'info_dict': {
'id': '76979871',
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'upload_date': '20131015',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
}
},
] ]
def _login(self): def _login(self):
@@ -273,6 +287,17 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0: if len(formats) == 0:
raise ExtractorError('No known codec found') raise ExtractorError('No known codec found')
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return { return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'uploader': video_uploader,
@@ -286,6 +311,7 @@ class VimeoIE(InfoExtractor):
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'comment_count': comment_count, 'comment_count': comment_count,
'subtitles': video_subtitles,
} }

View File

@@ -1,2 +1,2 @@
__version__ = '2014.02.03' __version__ = '2014.02.04'