Compare commits

...

13 Commits

Author SHA1 Message Date
5277f09dfc release 2015.02.11 2015-02-11 19:02:39 +01:00
2d30521ab9 [youtube] Extract average rating (closes #2362) 2015-02-11 18:39:31 +01:00
050fa43561 flake8: Ignore some error added in pep8 1.6
* E402: we exectute code between imports, like modifying 'sys.path' in the tests
* E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)
2015-02-11 18:15:15 +01:00
f36f92f4da [aes] style: Put __all__ variable at the end of the file 2015-02-11 18:15:15 +01:00
124f3bc67d [dotsub] Fix extraction and modernize 2015-02-11 22:33:03 +06:00
d304209a85 [test/parameters.json] Set 'fixup' to 'never'
The fixed audio files for Youtube have a size lower that the minimum required.
2015-02-11 17:25:04 +01:00
c56d7d899d [dctptv] Skip rtmp download 2015-02-11 22:10:33 +06:00
ea5db8469e [canalplus] Add support for itele.fr URLs (Closes #4931) 2015-02-11 16:21:52 +02:00
3811c567e7 [teamcoco] Fix video id extraction 2015-02-11 15:47:19 +02:00
054fe3cc40 [ntvru] Adapt to new direct delivery and modernize (Closes #4918) 2015-02-10 21:35:34 +06:00
af0d11f244 release 2015.02.10.5 2015-02-10 15:56:04 +01:00
9650885be9 [escapist] Filter video differently (Fixes #4919) 2015-02-10 15:55:51 +01:00
596ac6e31f [escapist] Modernize 2015-02-10 15:45:36 +01:00
12 changed files with 128 additions and 110 deletions

View File

@ -3,4 +3,4 @@ universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
ignore = E501
ignore = E402,E501,E731

View File

@ -39,5 +39,6 @@
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
"socket_timeout": 20
"socket_timeout": 20,
"fixup": "never"
}

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
@ -329,3 +327,5 @@ def inc(data):
data[i] = data[i] + 1
break
return data
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']

View File

@ -15,12 +15,13 @@ from ..utils import (
class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
_SITE_ID_MAP = {
'canalplus.fr': 'cplus',
'piwiplus.fr': 'teletoon',
'd8.tv': 'd8',
'itele.fr': 'itele',
}
_TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20131108',
},
'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
'md5': '65aa83ad62fe107ce29e564bb8712580',
'info_dict': {
'id': '1213714',
'ext': 'flv',
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
'upload_date': '20150211',
},
}]
def _real_extract(self, url):

View File

@ -157,6 +157,7 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
average_rating: Average rating give by users, the scale used depends on the webpage
comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional):
@ -271,7 +272,7 @@ class InfoExtractor(object):
raise
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occured.', cause=e, expected=True)
except (KeyError,) as e:
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader):

View File

@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade'
},
'params': {
# rtmp download
'skip_download': True,
}
}

View File

@ -1,13 +1,14 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class DotsubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = {
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'ext': 'flv',
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'duration': 3169,
'uploader': '4v4l0n42',
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'timestamp': 1292248482.625,
'upload_date': '20101213',
'view_count': int,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
info = self._download_json(info_url, video_id)
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
video_id = self._match_id(url)
info = self._download_json(
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
video_url = info.get('mediaURI')
if not video_url:
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
return {
'id': video_id,
'url': info['mediaURI'],
'url': video_url,
'ext': 'flv',
'title': info['title'],
'thumbnail': info['screenshotURI'],
'description': info['description'],
'uploader': info['user'],
'view_count': info['numberOfViews'],
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
'description': info.get('description'),
'thumbnail': info.get('screenshotURI'),
'duration': int_or_none(info.get('duration'), 1000),
'uploader': info.get('user'),
'timestamp': float_or_none(info.get('dateCreated'), 1000),
'view_count': int_or_none(info.get('numberOfViews')),
}

View File

@ -1,18 +1,17 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
)
from ..utils import (
ExtractorError,
js_to_json,
)
class EscapistIE(InfoExtractor):
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
'id': '6618',
'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
'uploader': 'the-escapist-presents',
'uploader_id': 'the-escapist-presents',
'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
showName = mobj.group('showname')
video_id = mobj.group('id')
self.report_extraction(video_id)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
videoDesc = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, 'description', fatal=False)
uploader_id = self._html_search_regex(
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
webpage, 'uploader ID', fatal=False)
uploader = self._html_search_regex(
r"<h1 class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage)
playerUrl = self._og_search_video_url(webpage, name='player URL')
raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2]
title = self._html_search_regex(
r'<meta name="title" content="([^"]*)"',
webpage, 'title').split(' : ')[-1]
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
configUrl = compat_urllib_parse.unquote(configUrl)
player_url = self._og_search_video_url(webpage, name='player URL')
config_url = compat_urllib_parse.unquote(self._search_regex(
r'config=(.*)$', player_url, 'config URL'))
formats = []
@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
cfgurl, video_id,
'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration',
transform_source=lambda s: s.replace("'", '"'))
transform_source=js_to_json)
playlist = config['playlist']
video_url = next(
p['url'] for p in playlist
if p.get('eventCategory') == 'Video')
formats.append({
'url': playlist[1]['url'],
'url': video_url,
'format_id': name,
'quality': quality,
})
_add_format('normal', configUrl, quality=0)
hq_url = (configUrl +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
_add_format('normal', config_url, quality=0)
hq_url = (config_url +
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
try:
_add_format('hq', hq_url, quality=1)
except ExtractorError:
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
'uploader': showName,
'uploader': uploader,
'uploader_id': uploader_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc,
'player_url': playerUrl,
'description': description,
'player_url': player_url,
}

View File

@ -3,7 +3,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unescapeHTML
clean_html,
xpath_text,
int_or_none,
)
@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.ntv.ru/novosti/863142/',
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
'info_dict': {
'id': '746000',
'ext': 'flv',
'ext': 'mp4',
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'thumbnail': 're:^http://.*\.jpg',
'duration': 136,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/video/novosti/750370/',
'md5': 'adecff79691b4d71e25220a191477124',
'info_dict': {
'id': '750370',
'ext': 'flv',
'ext': 'mp4',
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'thumbnail': 're:^http://.*\.jpg',
'duration': 172,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c',
'info_dict': {
'id': '747480',
'ext': 'flv',
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
'ext': 'mp4',
'title': '«Сегодня». 21 марта 2014 года. 16:00',
'description': '«Сегодня». 21 марта 2014 года. 16:00',
'thumbnail': 're:^http://.*\.jpg',
'duration': 1496,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/kino/Koma_film',
'md5': 'f825770930937aa7e5aca0dc0d29319a',
'info_dict': {
'id': '758100',
'ext': 'flv',
'id': '1007609',
'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»',
'thumbnail': 're:^http://.*\.jpg',
'duration': 5592,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
'info_dict': {
'id': '751482',
'ext': 'flv',
'ext': 'mp4',
'title': '«Дело врачей»: «Деревце жизни»',
'description': '«Дело врачей»: «Деревце жизни»',
'thumbnail': 're:^http://.*\.jpg',
'duration': 2590,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
webpage = self._download_webpage(url, video_id)
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
title = unescapeHTML(player.find('./data/title').text)
description = unescapeHTML(player.find('./data/description').text)
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
player = self._download_xml(
'http://www.ntv.ru/vi%s/' % video_id,
video_id, 'Downloading video XML')
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
description = clean_html(xpath_text(player, './data/description', 'description'))
video = player.find('./data/video')
video_id = video.find('./id').text
thumbnail = video.find('./splash').text
duration = int(video.find('./totaltime').text)
view_count = int(video.find('./views').text)
puid22 = video.find('./puid22').text
video_id = xpath_text(video, './id', 'video id')
thumbnail = xpath_text(video, './splash', 'thumbnail')
duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
view_count = int_or_none(xpath_text(video, './views', 'view count'))
apps = {
'4': 'video1',
'7': 'video2',
}
app = apps.get(puid22, apps['4'])
token = self._download_webpage(
'http://stat.ntv.ru/services/access/token',
video_id, 'Downloading access token')
formats = []
for format_id in ['', 'hi', 'webm']:
file = video.find('./%sfile' % format_id)
if file is None:
file_ = video.find('./%sfile' % format_id)
if file_ is None:
continue
size = video.find('./%ssize' % format_id)
formats.append({
'url': 'rtmp://media.ntv.ru/%s' % app,
'app': app,
'play_path': file.text,
'rtmp_conn': 'B:1',
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
'page_url': 'http://www.ntv.ru',
'flash_version': 'LNX 11,2,202,341',
'rtmp_live': True,
'ext': 'flv',
'filesize': int(size.text),
'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
'filesize': int_or_none(size.text if size is not None else None),
})
self._sort_formats(formats)

View File

@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor):
}
}
]
_VIDEO_ID_REGEXES = (
r'"eVar42"\s*:\s*(\d+)',
r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
r'"id_not"\s*:\s*(\d+)'
)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
r'<div\s+class="player".*?data-id="(\d+?)"',
webpage, 'video id')
self._VIDEO_ID_REGEXES, webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(

View File

@ -25,6 +25,7 @@ from ..compat import (
from ..utils import (
clean_html,
ExtractorError,
float_or_none,
get_element_by_attribute,
get_element_by_id,
int_or_none,
@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
'formats': formats,
}

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.02.10.4'
__version__ = '2015.02.11'