Compare commits

..

18 Commits

Author SHA1 Message Date
4242001863 release 2014.06.26 2014-06-26 16:44:01 +02:00
78338f71ca [livestream:original] Add support for folder urls (closes #2631)
The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
2014-06-26 16:34:36 +02:00
f5172a3084 [teachertube] Add support for new URL formats 2014-06-26 20:01:59 +07:00
c7df67edbd [teachertube] Improve extraction 2014-06-26 20:00:47 +07:00
d410fee91d [VideoTt] fix ValueError (#3161) 2014-06-26 07:35:47 +02:00
ba7aa464de [soundgasm] PEP8 and add a display_id (#3155) 2014-06-25 23:47:38 +02:00
8333034dce Merge remote-tracking branch 'pachacamac/soundgasm' 2014-06-25 23:45:03 +02:00
637b6af80f release 2014.06.25 2014-06-25 21:24:01 +02:00
1044f8afd2 [Soundgasm] Add new extractor 2014-06-25 18:07:23 +02:00
2f775107f9 Merge branch 'master' of github.com:rg3/youtube-dl 2014-06-25 17:45:24 +02:00
85342674b2 [Dailymotion] fix uploader name (fixes #3153) 2014-06-25 17:44:19 +02:00
fd69098a45 [rutube] Update playlist tests 2014-06-25 19:06:11 +07:00
8867f908fc Merge pull request #3148 from crazedpsyc/master
[BlipTV] Allow plus sign in video ID
2014-06-25 07:14:04 +02:00
b7c33124c8 [BlipTV] Allow plus sign in video ID 2014-06-24 17:55:08 -06:00
89a8c423c7 Merge pull request #3146 from pvdl/patch-1
[discovery] Change default url
2014-06-24 18:11:26 +02:00
cea2582df2 [discovery] Change default url
URL does a redirect from dsc.discovery.com to www.discovery.com
This commit fixes the correct URL.
2014-06-24 17:41:53 +02:00
e423e0baaa [wistia] Add duration and modernize 2014-06-24 19:34:39 +07:00
60b2dd1285 [comedycentral] Correct handling when latest tds episode is a special-episode instead of a regular one 2014-06-24 10:50:41 +02:00
13 changed files with 146 additions and 29 deletions

View File

@ -30,6 +30,7 @@ from youtube_dl.extractor import (
SoundcloudPlaylistIE, SoundcloudPlaylistIE,
TeacherTubeClassroomIE, TeacherTubeClassroomIE,
LivestreamIE, LivestreamIE,
LivestreamOriginalIE,
NHLVideocenterIE, NHLVideocenterIE,
BambuserChannelIE, BambuserChannelIE,
BandcampAlbumIE, BandcampAlbumIE,
@ -40,6 +41,7 @@ from youtube_dl.extractor import (
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE, RutubeChannelIE,
RutubePersonIE,
GoogleSearchIE, GoogleSearchIE,
GenericIE, GenericIE,
TEDIE, TEDIE,
@ -154,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_livestreamoriginal_folder(self):
dl = FakeYDL()
ie = LivestreamOriginalIE(dl)
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertTrue(len(result['entries']) >= 28)
def test_nhl_videocenter(self): def test_nhl_videocenter(self):
dl = FakeYDL() dl = FakeYDL()
ie = NHLVideocenterIE(dl) ie = NHLVideocenterIE(dl)
@ -256,10 +266,18 @@ class TestPlaylists(unittest.TestCase):
def test_rutube_channel(self): def test_rutube_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = RutubeChannelIE(dl) ie = RutubeChannelIE(dl)
result = ie.extract('http://rutube.ru/tags/video/1409') result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1409') self.assertEqual(result['id'], '1800')
self.assertTrue(len(result['entries']) >= 34) self.assertTrue(len(result['entries']) >= 68)
def test_rutube_person(self):
dl = FakeYDL()
ie = RutubePersonIE(dl)
result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878')
self.assertTrue(len(result['entries']) >= 37)
def test_multiple_brightcove_videos(self): def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283 # https://github.com/rg3/youtube-dl/issues/2283

View File

@ -147,7 +147,11 @@ from .ku6 import Ku6IE
from .la7 import LA7IE from .la7 import LA7IE
from .lifenews import LifeNewsIE from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
LivestreamShortenerIE,
)
from .lynda import ( from .lynda import (
LyndaIE, LyndaIE,
LyndaCourseIE LyndaCourseIE
@ -255,6 +259,7 @@ from .soundcloud import (
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudPlaylistIE SoundcloudPlaylistIE
) )
from .soundgasm import SoundgasmIE
from .southparkstudios import ( from .southparkstudios import (
SouthParkStudiosIE, SouthParkStudiosIE,
SouthparkDeIE, SouthparkDeIE,

View File

@ -15,7 +15,7 @@ from ..utils import (
class BlipTVIE(SubtitlesInfoExtractor): class BlipTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))' _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
_TESTS = [ _TESTS = [
{ {

View File

@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor):
raise ExtractorError('Invalid redirected URL: ' + url) raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '': if mobj.group('episode') == '':
raise ExtractorError('Redirected URL is still not specific: ' + url) raise ExtractorError('Redirected URL is still not specific: ' + url)
epTitle = mobj.group('episode').rpartition('/')[-1] epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0: if len(mMovieParams) == 0:

View File

@ -459,6 +459,9 @@ class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs) return self._html_search_regex(regexes, html, name, **kargs)
def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False): def _html_search_meta(self, name, html, display_name=None, fatal=False):
if display_name is None: if display_name is None:
display_name = name display_name = name

View File

@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'uploader': info['owner_screenname'], 'uploader': info['owner.screenname'],
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'subtitles': video_subtitles, 'subtitles': video_subtitles,

View File

@ -7,9 +7,9 @@ from .common import InfoExtractor
class DiscoveryIE(InfoExtractor): class DiscoveryIE(InfoExtractor):
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = { _TEST = {
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'md5': 'e12614f9ee303a6ccef415cb0793eba2', 'md5': 'e12614f9ee303a6ccef415cb0793eba2',
'info_dict': { 'info_dict': {
'id': '614784', 'id': '614784',

View File

@ -9,6 +9,7 @@ from ..utils import (
compat_urlparse, compat_urlparse,
xpath_with_ns, xpath_with_ns,
compat_str, compat_str,
orderedSet,
) )
@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
# The original version of Livestream uses a different system # The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor): class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original' IE_NAME = 'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' _VALID_URL = r'''(?x)https?://www\.livestream\.com/
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TEST = { _TEST = {
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': { 'info_dict': {
@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
}, },
} }
def _real_extract(self, url): def _extract_video(self, user, video_id):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id) info = self._download_xml(api_url, video_id)
@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'thumbnail': thumbnail_url, 'thumbnail': thumbnail_url,
} }
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
'url': video_url,
} for video_url in urls],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
user = mobj.group('user')
url_type = mobj.group('type')
if url_type == 'folder':
return self._extract_folder(url, id)
else:
return self._extract_video(user, id)
# The server doesn't support HEAD request, the generic extractor can't detect
# the redirection
class LivestreamShortenerIE(InfoExtractor):
IE_NAME = 'livestream:shortener'
IE_DESC = False # Do not list
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
webpage = self._download_webpage(url, id)
return {
'_type': 'url',
'url': self._og_search_url(webpage),
}

View File

@ -0,0 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split('\/|\.', audio_url)[-2]
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
}

View File

@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor):
IE_NAME = 'teachertube' IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos' IE_DESC = 'teachertube.com videos'
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
@ -66,6 +66,7 @@ class TeacherTubeIE(InfoExtractor):
media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
formats = [ formats = [
{ {
@ -79,7 +80,7 @@ class TeacherTubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'), 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
'formats': formats, 'formats': formats,
'description': description, 'description': description,
} }

View File

@ -4,7 +4,10 @@ import re
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import (
unified_strdate,
int_or_none,
)
class VideoTtIE(InfoExtractor): class VideoTtIE(InfoExtractor):
@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor):
'thumbnail': settings['config']['thumbnail'], 'thumbnail': settings['config']['thumbnail'],
'upload_date': unified_strdate(video['added']), 'upload_date': unified_strdate(video['added']),
'uploader': video['owner'], 'uploader': video['owner'],
'view_count': int(video['view_count']), 'view_count': int_or_none(video['view_count']),
'comment_count': int(video['comment_count']), 'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']),
'like_count': int(video['liked']), 'like_count': int_or_none(video['liked']),
'dislike_count': int(video['disliked']), 'dislike_count': int_or_none(video['disliked']),
'formats': formats, 'formats': formats,
} }

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json import json
import re import re
@ -5,14 +7,16 @@ from .common import InfoExtractor
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
_TEST = { _TEST = {
u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt", 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
u"file": u"sh7fpupwlt.mov", 'md5': 'cafeb56ec0c53c18c97405eecb3133df',
u"md5": u"cafeb56ec0c53c18c97405eecb3133df", 'info_dict': {
u"info_dict": { 'id': 'sh7fpupwlt',
u"title": u"cfh_resourceful_zdkh_final_1" 'ext': 'mov',
'title': 'Being Resourceful',
'duration': 117,
}, },
} }
@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex( data_json = self._html_search_regex(
r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data') r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
data = json.loads(data_json) data = json.loads(data_json)
@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor):
'title': data['name'], 'title': data['name'],
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': data.get('duration'),
} }

View File

@ -1,2 +1,2 @@
__version__ = '2014.06.24.1' __version__ = '2014.06.26'