Compare commits

..

22 Commits

Author SHA1 Message Date
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
16 changed files with 356 additions and 70 deletions

View File

@ -102,7 +102,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity') result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity') self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 66) self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self): def test_bandcamp_album(self):
dl = FakeYDL() dl = FakeYDL()

View File

@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeChannelIE(dl) ie = YoutubeChannelIE(dl)
#test paginated channel #test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90) self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel #test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18) self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self): def test_youtube_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeUserIE(dl) ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320) self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self): def test_youtube_safe_search(self):

View File

@ -97,6 +97,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at. playlistend: Playlist item to end at.
matchtitle: Download only matching titles. matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles. rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout. logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file writeinfojson: Write the video description to a .info.json file
@ -192,7 +193,9 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False): def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False): if self.params.get('logger'):
self.params['logger'].debug(message)
elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol] terminator = [u'\n', u''][skip_eol]
output = message + terminator output = message + terminator
write_string(output, self._screen_file) write_string(output, self._screen_file)
@ -200,10 +203,13 @@ class YoutubeDL(object):
def to_stderr(self, message): def to_stderr(self, message):
"""Print message to stderr.""" """Print message to stderr."""
assert type(message) == type(u'') assert type(message) == type(u'')
output = message + u'\n' if self.params.get('logger'):
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr self.params['logger'].error(message)
output = output.encode(preferredencoding()) else:
sys.stderr.write(output) output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
@ -355,15 +361,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict): def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """ """ Returns None iff the file should be downloaded """
title = info_dict['title'] if 'title' in info_dict:
matchtitle = self.params.get('matchtitle', False) # This can happen when we're just evaluating the playlist
if matchtitle: title = info_dict['title']
if not re.search(matchtitle, title, re.IGNORECASE): matchtitle = self.params.get('matchtitle', False)
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' if matchtitle:
rejecttitle = self.params.get('rejecttitle', False) if not re.search(matchtitle, title, re.IGNORECASE):
if rejecttitle: return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
if re.search(rejecttitle, title, re.IGNORECASE): rejecttitle = self.params.get('rejecttitle', False)
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None) date = info_dict.get('upload_date', None)
if date is not None: if date is not None:
dateRange = self.params.get('daterange', DateRange()) dateRange = self.params.get('daterange', DateRange())
@ -374,8 +382,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0): if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted' return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive' return (u'%s has already been recorded in archive'
% info_dict) % info_dict.get('title', info_dict.get('id', u'video')))
return None return None
@staticmethod @staticmethod
@ -454,7 +462,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'), ie_key=ie_result.get('ie_key'),
extra_info=extra_info) extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -484,6 +492,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,
extra_info=extra) extra_info=extra)
@ -810,7 +824,16 @@ class YoutubeDL(object):
fn = self.params.get('download_archive') fn = self.params.get('download_archive')
if fn is None: if fn is None:
return False return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id'] extractor = info_dict.get('extractor_id')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return False # Incomplete video information
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = extractor.lower()
vid_id = extractor + u' ' + info_dict['id']
try: try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file: with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file: for line in archive_file:

View File

@ -35,6 +35,7 @@ __authors__ = (
'Jelle van der Waa', 'Jelle van der Waa',
'Marcin Cieślak', 'Marcin Cieślak',
'Anton Larionov', 'Anton Larionov',
'Takuya Tsuchida',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

View File

@ -98,6 +98,7 @@ from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
@ -156,6 +157,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
from .wat import WatIE from .wat import WatIE
from .websurg import WeBSurgIE from .websurg import WeBSurgIE

View File

@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
}, },
u'skip': u'There is a limit of 200 free downloads / month for the test song' u'skip': u'There is a limit of 200 free downloads / month for the test song'
}, {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
json_code = m_trackinfo.group(1) json_code = m_trackinfo.group(1)
data = json.loads(json_code) data = json.loads(json_code)
entries = []
for d in data: for d in data:
formats = [{ formats = [{
'format_id': 'format_id', 'format_id': 'format_id',
'url': format_url, 'url': format_url,
'ext': format_id.partition('-')[0] 'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())] } for format_id, format_url in sorted(d['file'].items())]
entries.append({ return {
'id': compat_str(d['id']), 'id': compat_str(d['id']),
'title': d['title'], 'title': d['title'],
'formats': formats, 'formats': formats,
}) }
return self.playlist_result(entries, title, title)
else: else:
raise ExtractorError(u'No free songs found') raise ExtractorError(u'No free songs found')
@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album' IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
title = mobj.group('title') title = mobj.group('title')

View File

@ -75,14 +75,17 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'], params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], 'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
} }
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey') def find_param(name):
return find_xpath_attr(object_doc, './param', 'name', name)
playerKey = find_param('playerKey')
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
params['playerKey'] = playerKey.attrib['value'] params['playerKey'] = playerKey.attrib['value']
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') # The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None: if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value'] params['@videoPlayer'] = videoPlayer.attrib['value']
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL') linkBase = find_param('linkBaseURL')
if linkBase is not None: if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value'] params['linkBaseURL'] = linkBase.attrib['value']
data = compat_urllib_parse.urlencode(params) data = compat_urllib_parse.urlencode(params)

View File

@ -229,12 +229,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in') self.to_screen(u'Logging in')
#Methods for following #608 #Methods for following #608
def url_result(self, url, ie=None): def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed""" """Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info #TODO: ie should be the class used for getting the info
video_info = {'_type': 'url', video_info = {'_type': 'url',
'url': url, 'url': url,
'ie_key': ie} 'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None): def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist""" """Returns a playlist"""

View File

@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4', u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': { u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly" u"title": u"How to Tie a Square Knot Properly"

View File

@ -0,0 +1,131 @@
# encoding: utf-8
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
)
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info_webpage = self._download_webpage(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info_webpage = self._download_webpage(
url, video_id, note=u'Downloading user information')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

View File

@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
u'title': u'youtube-dl test video \'/\\ ä ↭', u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9, u'duration': 9,
}, },
u'skip': u'Only available from the EU'
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,91 @@
import re
from ..utils import (
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4',
u'md5': u'a21454021c2646f5433514177e2caa5f',
u'info_dict': {
u'title': u'Heirs Episode 14',
u'uploader': u'SBS',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
u'uploader')
if uploader is not None:
uploader = uploader.strip()
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip()
RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(info_url, video_id)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None
else None
)
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumbnail,
'age_limit': age_limit,
'uploader': uploader,
'subtitles': video_subtitles,
'upload_date': upload_date,
}
def _get_available_subtitles(self, video_id, info_webpage):
res = {}
for sturl in re.findall(r'<track src="([^"]+)"/>'):
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
res[m.group('lang')] = sturl
return res

View File

@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
video_id = query_dict['v'][0] video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'): if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') return self.url_result(video_id, 'Youtube', video_id=video_id)
else: else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page) playlist_title = self._og_search_title(page)
url_results = [self.url_result(vid, 'Youtube') for vid in ids] url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title) return self.playlist_result(url_results, playlist_id, playlist_title)
@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] for video_id in video_ids]
return [self.playlist_result(url_entries, channel_id)] return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE: if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break break
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] self.url_result(video_id, 'Youtube', video_id=video_id)
return [self.playlist_result(url_results, playlist_title = username)] for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches' IE_DESC = u'YouTube.com searches'
@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n: if len(video_ids) > n:
video_ids = video_ids[:n] video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html'] feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids) ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
if info['paging'] is None: if info['paging'] is None:
break break
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)

View File

@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False if signature != sha256(message).digest(): return False
return True return True
def update_self(to_screen, verbose): def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository""" """Update the program file with the latest version from the repository"""
@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
return return
version_id = versions_info['latest'] version_id = versions_info['latest']
def version_tuple(version_str):
return tuple(map(int, version_str.split('.')))
if version_tuple(__version__) >= version_tuple(version_id):
to_screen(u'youtube-dl is up to date (%s)' % __version__)
return
to_screen(u'Updating to version ' + version_id + '...') to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id] version = versions_info['versions'][version_id]

View File

@ -12,6 +12,7 @@ import os
import pipes import pipes
import platform import platform
import re import re
import ssl
import socket import socket
import sys import sys
import traceback import traceback
@ -535,13 +536,31 @@ def formatSeconds(secs):
else: else:
return '%d' % secs return '%d' % secs
def make_HTTPS_handler(opts): def make_HTTPS_handler(opts):
if sys.version_info < (3,2): if sys.version_info < (3, 2):
# Python's 2.x handler is very simplistic import httplib
return compat_urllib_request.HTTPSHandler()
class HTTPSConnectionV3(httplib.HTTPSConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError as e:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
else: else:
import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.set_default_verify_paths() context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE context.verify_mode = (ssl.CERT_NONE

View File

@ -1,2 +1,2 @@
__version__ = '2013.11.22.1' __version__ = '2013.11.24'