Compare commits
29 Commits
2014.03.04
...
2014.03.07
Author | SHA1 | Date | |
---|---|---|---|
|
dae313e725 | ||
|
b74fa8cd2c | ||
|
94eae04c94 | ||
|
16ff7ebc77 | ||
|
c361c505b0 | ||
|
d37c07c575 | ||
|
9d6105c9f0 | ||
|
8dec03ecba | ||
|
826547870b | ||
|
52d6a9a61d | ||
|
ad242b5fbc | ||
|
3524175625 | ||
|
7b9965ea93 | ||
|
0a5bce566f | ||
|
8012bd2424 | ||
|
f55a1f0a88 | ||
|
bacac173a9 | ||
|
ca1fee34f2 | ||
|
6dadaa9930 | ||
|
553f6e4633 | ||
|
652bee05f0 | ||
|
d63516e9cd | ||
|
e477dcf649 | ||
|
9d3f7781f3 | ||
|
c7095dada3 | ||
|
607dbbad76 | ||
|
17b75c0de1 | ||
|
ab24f4f3be | ||
|
1b86cc41cf |
@@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video id, %(playlist)s for the playlist the
|
video id, %(playlist)s for the playlist the
|
||||||
video is in, %(playlist_index)s for the
|
video is in, %(playlist_index)s for the
|
||||||
position in the playlist and %% for a
|
position in the playlist and %% for a
|
||||||
literal percent. Use - to output to stdout.
|
literal percent. %(height)s and %(width)s
|
||||||
Can also be used to download to a different
|
for the width and height of the video
|
||||||
|
format. %(resolution)s for a textual
|
||||||
|
description of the resolution of the video
|
||||||
|
format. Use - to output to stdout. Can also
|
||||||
|
be used to download to a different
|
||||||
directory, for example with -o '/my/downloa
|
directory, for example with -o '/my/downloa
|
||||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in
|
--autonumber-size NUMBER Specifies the number of digits in
|
||||||
|
@@ -36,6 +36,7 @@ from youtube_dl.extractor import (
|
|||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
GoogleSearchIE,
|
GoogleSearchIE,
|
||||||
GenericIE,
|
GenericIE,
|
||||||
|
TEDIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -259,5 +260,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||||
self.assertTrue(len(result['entries']) > 10)
|
self.assertTrue(len(result['entries']) > 10)
|
||||||
|
|
||||||
|
def test_ted_playlist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = TEDIE(dl)
|
||||||
|
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], '10')
|
||||||
|
self.assertEqual(result['title'], 'Who are the hackers?')
|
||||||
|
self.assertTrue(len(result['entries']) >= 6)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
urlencode_postdata,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
bam''')
|
bam''')
|
||||||
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||||
|
|
||||||
|
def test_urlencode_postdata(self):
|
||||||
|
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||||
|
self.assertTrue(isinstance(data, bytes))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -409,6 +409,13 @@ class YoutubeDL(object):
|
|||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict.get('playlist_index') is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||||
|
if template_dict.get('resolution') is None:
|
||||||
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
|
elif template_dict.get('height'):
|
||||||
|
res = '%sp' % template_dict['height']
|
||||||
|
elif template_dict.get('width'):
|
||||||
|
res = '?x%d' % template_dict['width']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
compat_str(v),
|
compat_str(v),
|
||||||
|
@@ -430,6 +430,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||||
|
'%(height)s and %(width)s for the width and height of the video format. '
|
||||||
|
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||||
filesystem.add_option('--autonumber-size',
|
filesystem.add_option('--autonumber-size',
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -22,7 +24,7 @@ class RtmpFD(FileDownloader):
|
|||||||
proc_stderr_closed = False
|
proc_stderr_closed = False
|
||||||
while not proc_stderr_closed:
|
while not proc_stderr_closed:
|
||||||
# read line from stderr
|
# read line from stderr
|
||||||
line = u''
|
line = ''
|
||||||
while True:
|
while True:
|
||||||
char = proc.stderr.read(1)
|
char = proc.stderr.read(1)
|
||||||
if not char:
|
if not char:
|
||||||
@@ -46,7 +48,7 @@ class RtmpFD(FileDownloader):
|
|||||||
data_len = None
|
data_len = None
|
||||||
if percent > 0:
|
if percent > 0:
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
data_len_str = u'~' + format_bytes(data_len)
|
data_len_str = '~' + format_bytes(data_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
cursor_in_new_line = False
|
cursor_in_new_line = False
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
@@ -76,12 +78,12 @@ class RtmpFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
elif self.params.get('verbose', False):
|
elif self.params.get('verbose', False):
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen(u'')
|
self.to_screen('')
|
||||||
cursor_in_new_line = True
|
cursor_in_new_line = True
|
||||||
self.to_screen(u'[rtmpdump] '+line)
|
self.to_screen('[rtmpdump] '+line)
|
||||||
proc.wait()
|
proc.wait()
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen(u'')
|
self.to_screen('')
|
||||||
return proc.returncode
|
return proc.returncode
|
||||||
|
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
@@ -102,7 +104,7 @@ class RtmpFD(FileDownloader):
|
|||||||
try:
|
try:
|
||||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
@@ -127,7 +129,7 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
if conn:
|
if conn:
|
||||||
basic_args += ['--conn', conn]
|
basic_args += ['--conn', conn]
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||||
|
|
||||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
# Windows subprocess module does not actually support Unicode
|
# Windows subprocess module does not actually support Unicode
|
||||||
@@ -150,26 +152,35 @@ class RtmpFD(FileDownloader):
|
|||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
except ImportError:
|
except ImportError:
|
||||||
shell_quote = repr
|
shell_quote = repr
|
||||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||||
|
|
||||||
|
RD_SUCCESS = 0
|
||||||
|
RD_FAILED = 1
|
||||||
|
RD_INCOMPLETE = 2
|
||||||
|
RD_NO_CONNECT = 3
|
||||||
|
|
||||||
retval = run_rtmpdump(args)
|
retval = run_rtmpdump(args)
|
||||||
|
|
||||||
while (retval == 2 or retval == 1) and not test:
|
if retval == RD_NO_CONNECT:
|
||||||
|
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||||
|
return False
|
||||||
|
|
||||||
|
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == 1:
|
if prevsize == cursize and retval == RD_FAILED:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = 0
|
retval = RD_SUCCESS
|
||||||
break
|
break
|
||||||
if retval == 0 or (test and retval == 2):
|
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
@@ -179,6 +190,6 @@ class RtmpFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.to_stderr(u"\n")
|
self.to_stderr('\n')
|
||||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
self.report_error('rtmpdump exited with code %d' % retval)
|
||||||
return False
|
return False
|
||||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
int_or_none,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if video_url is not None:
|
if video_url is not None:
|
||||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||||
if m_size is not None:
|
if m_size is not None:
|
||||||
width, height = m_size.group(1), m_size.group(2)
|
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
||||||
else:
|
else:
|
||||||
width, height = None, None
|
width, height = None, None
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
urlencode_postdata,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@@ -35,8 +36,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '120708114770723',
|
'id': '120708114770723',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
u"duration": 279,
|
'duration': 279,
|
||||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
'title': 'PEOPLE ARE AWESOME 2013'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,8 +52,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||||
self.report_login()
|
login_page = self._download_webpage(login_page_req, None,
|
||||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
note='Downloading login page',
|
||||||
errnote='Unable to download login page')
|
errnote='Unable to download login page')
|
||||||
lsd = self._search_regex(
|
lsd = self._search_regex(
|
||||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||||
@@ -70,23 +71,25 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timezone': '-60',
|
'timezone': '-60',
|
||||||
'trynum': '1',
|
'trynum': '1',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
try:
|
||||||
login_results = compat_urllib_request.urlopen(request).read()
|
login_results = self._download_webpage(request, None,
|
||||||
|
note='Logging in', errnote='unable to fetch login page')
|
||||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||||
return
|
return
|
||||||
|
|
||||||
check_form = {
|
check_form = {
|
||||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
|
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||||
'name_action_selected': 'dont_save',
|
'name_action_selected': 'dont_save',
|
||||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
|
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||||
}
|
}
|
||||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
check_response = self._download_webpage(check_req, None,
|
||||||
|
note='Confirming login')
|
||||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
@@ -116,7 +116,24 @@ class GenericIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': False,
|
'skip_download': False,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
# embed.ly video
|
||||||
|
{
|
||||||
|
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9ODmcdjQcHQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
|
||||||
|
'upload_date': '20140225',
|
||||||
|
'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
|
||||||
|
'uploader': 'Tested',
|
||||||
|
'uploader_id': 'testedcom',
|
||||||
|
},
|
||||||
|
# No need to test YoutubeIE here
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@@ -211,7 +228,7 @@ class GenericIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
assert ':' in default_search
|
assert ':' in default_search
|
||||||
return self.url_result(default_search + url)
|
return self.url_result(default_search + url)
|
||||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||||
|
|
||||||
self.to_screen('%s: Requesting header' % video_id)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
|
|
||||||
@@ -407,6 +424,14 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||||
|
|
||||||
|
# Look for embed.ly
|
||||||
|
mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
@@ -8,7 +8,8 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
ExtractorError
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -19,7 +20,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||||
_NETRC_MACHINE = 'lynda'
|
_NETRC_MACHINE = 'lynda'
|
||||||
|
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
|
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||||
|
|
||||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
@@ -55,13 +56,29 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
duration = video_json['DurationInSeconds']
|
duration = video_json['DurationInSeconds']
|
||||||
title = video_json['Title']
|
title = video_json['Title']
|
||||||
|
|
||||||
formats = [{'url': fmt['Url'],
|
formats = []
|
||||||
|
|
||||||
|
fmts = video_json.get('Formats')
|
||||||
|
if fmts:
|
||||||
|
formats.extend([
|
||||||
|
{
|
||||||
|
'url': fmt['Url'],
|
||||||
'ext': fmt['Extension'],
|
'ext': fmt['Extension'],
|
||||||
'width': fmt['Width'],
|
'width': fmt['Width'],
|
||||||
'height': fmt['Height'],
|
'height': fmt['Height'],
|
||||||
'filesize': fmt['FileSize'],
|
'filesize': fmt['FileSize'],
|
||||||
'format_id': str(fmt['Resolution'])
|
'format_id': str(fmt['Resolution'])
|
||||||
} for fmt in video_json['Formats']]
|
} for fmt in fmts])
|
||||||
|
|
||||||
|
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||||
|
if prioritized_streams:
|
||||||
|
formats.extend([
|
||||||
|
{
|
||||||
|
'url': video_url,
|
||||||
|
'width': int_or_none(format_id),
|
||||||
|
'format_id': format_id,
|
||||||
|
} for format_id, video_url in prioritized_streams['0'].items()
|
||||||
|
])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@@ -179,6 +196,9 @@ class LyndaCourseIE(InfoExtractor):
|
|||||||
videos = []
|
videos = []
|
||||||
(username, _) = self._get_login_info()
|
(username, _) = self._get_login_info()
|
||||||
|
|
||||||
|
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||||
|
# by single video API anymore
|
||||||
|
|
||||||
for chapter in course_json['Chapters']:
|
for chapter in course_json['Chapters']:
|
||||||
for video in chapter['Videos']:
|
for video in chapter['Videos']:
|
||||||
if username is None and video['HasAccess'] is False:
|
if username is None and video['HasAccess'] is False:
|
||||||
|
@@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
'skip': 'Seems to be broken',
|
'skip': 'Seems to be broken',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
|
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2437108',
|
'id': '2429369',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Folge 48: Gold Rogers Heimat',
|
'title': 'Countdown für die Autowerkstatt',
|
||||||
'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
|
'description': 'md5:809fc051a457b5d8666013bc40698817',
|
||||||
'upload_date': '20140226',
|
'upload_date': '20140223',
|
||||||
'duration': 1401.48,
|
'duration': 2595.04,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
|
@@ -217,7 +217,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||||
|
|
||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||||
IE_NAME = 'soundcloud:set'
|
IE_NAME = 'soundcloud:set'
|
||||||
# it's in tests/test_playlists.py
|
# it's in tests/test_playlists.py
|
||||||
_TESTS = []
|
_TESTS = []
|
||||||
|
@@ -6,115 +6,111 @@ import re
|
|||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
RegexNotFoundError,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TEDIE(SubtitlesInfoExtractor):
|
class TEDIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL=r'''http://www\.ted\.com/
|
_VALID_URL = r'''(?x)http://www\.ted\.com/
|
||||||
(
|
(
|
||||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||||
|
|
|
|
||||||
((?P<type_talk>talks)) # We have a simple talk
|
((?P<type_talk>talks)) # We have a simple talk
|
||||||
)
|
)
|
||||||
(/lang/(.*?))? # The url may contain the language
|
(/lang/(.*?))? # The url may contain the language
|
||||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||||
'file': '102.mp4',
|
|
||||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
'id': '102',
|
||||||
"title": "Dan Dennett: The illusion of consciousness"
|
'ext': 'mp4',
|
||||||
|
'title': 'The illusion of consciousness',
|
||||||
|
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||||
|
'argument that not only don\'t we understand our own '
|
||||||
|
'consciousness, but that half the time our brains are '
|
||||||
|
'actively fooling us.'),
|
||||||
|
'uploader': 'Dan Dennett',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
_FORMATS_PREFERENCE = {
|
||||||
def suitable(cls, url):
|
'low': 1,
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
'medium': 2,
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
'high': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_info(self, webpage):
|
||||||
|
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||||
|
webpage, 'info json')
|
||||||
|
return json.loads(info_json)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
|
name = m.group('name')
|
||||||
if m.group('type_talk'):
|
if m.group('type_talk'):
|
||||||
return self._talk_info(url)
|
return self._talk_info(url, name)
|
||||||
else :
|
else:
|
||||||
playlist_id=m.group('playlist_id')
|
return self._playlist_videos_info(url, name)
|
||||||
name=m.group('name')
|
|
||||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
|
||||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
|
||||||
|
|
||||||
|
def _playlist_videos_info(self, url, name):
|
||||||
def _playlist_videos_info(self, url, name, playlist_id):
|
|
||||||
'''Returns the videos of the playlist'''
|
'''Returns the videos of the playlist'''
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(url, name,
|
||||||
url, playlist_id, 'Downloading playlist webpage')
|
'Downloading playlist webpage')
|
||||||
matches = re.finditer(
|
info = self._extract_info(webpage)
|
||||||
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
playlist_info = info['playlist']
|
||||||
webpage)
|
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
|
||||||
webpage, 'playlist title')
|
|
||||||
|
|
||||||
playlist_entries = [
|
playlist_entries = [
|
||||||
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||||
for m in matches
|
for talk in info['talks']
|
||||||
]
|
]
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
playlist_entries,
|
||||||
|
playlist_id=compat_str(playlist_info['id']),
|
||||||
|
playlist_title=playlist_info['title'])
|
||||||
|
|
||||||
def _talk_info(self, url, video_id=0):
|
def _talk_info(self, url, video_name):
|
||||||
"""Return the video for the talk in the url"""
|
webpage = self._download_webpage(url, video_name)
|
||||||
m = re.match(self._VALID_URL, url,re.VERBOSE)
|
|
||||||
video_name = m.group('name')
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
|
||||||
self.report_extraction(video_name)
|
self.report_extraction(video_name)
|
||||||
# If the url includes the language we get the title translated
|
|
||||||
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
talk_info = self._extract_info(webpage)['talks'][0]
|
||||||
webpage, 'title')
|
|
||||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
|
||||||
webpage, 'json data')
|
|
||||||
info = json.loads(json_data)
|
|
||||||
desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
|
|
||||||
webpage, 'description', flags = re.DOTALL)
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
|
|
||||||
webpage, 'thumbnail')
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': stream['file'],
|
'url': format_url,
|
||||||
'format': stream['id']
|
'format_id': format_id,
|
||||||
} for stream in info['htmlStreams']]
|
'format': format_id,
|
||||||
|
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
|
||||||
video_id = info['id']
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = compat_str(talk_info['id'])
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, talk_info)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(video_id, webpage)
|
self._list_available_subtitles(video_id, talk_info)
|
||||||
return
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': talk_info['title'],
|
||||||
'thumbnail': thumbnail,
|
'uploader': talk_info['speaker'],
|
||||||
'description': desc,
|
'thumbnail': talk_info['thumb'],
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, talk_info):
|
||||||
try:
|
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
|
||||||
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
if languages:
|
||||||
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
sub_lang_list = {}
|
||||||
if languages:
|
for l in languages:
|
||||||
sub_lang_list = {}
|
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||||
for l in languages:
|
sub_lang_list[l] = url
|
||||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
return sub_lang_list
|
||||||
sub_lang_list[l] = url
|
else:
|
||||||
return sub_lang_list
|
|
||||||
except RegexNotFoundError:
|
|
||||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
@@ -14,19 +14,32 @@ from ..utils import (
|
|||||||
class TvigleIE(InfoExtractor):
|
class TvigleIE(InfoExtractor):
|
||||||
IE_NAME = 'tvigle'
|
IE_NAME = 'tvigle'
|
||||||
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?video=(?P<id>\d+)'
|
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
{
|
||||||
'md5': '09afba4616666249f087efc6dcf83cb3',
|
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||||
'info_dict': {
|
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||||
'id': '503081',
|
'info_dict': {
|
||||||
'ext': 'flv',
|
'id': '503081',
|
||||||
'title': 'Брат 2 ',
|
'ext': 'flv',
|
||||||
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
'title': 'Брат 2 ',
|
||||||
'upload_date': '20110919',
|
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||||
}
|
'upload_date': '20110919',
|
||||||
}
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||||
|
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '676433',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||||
|
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||||
|
'upload_date': '20121218',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
class VestiIE(InfoExtractor):
|
class VestiIE(InfoExtractor):
|
||||||
IE_NAME = 'vesti'
|
IE_NAME = 'vesti'
|
||||||
IE_DESC = 'Вести.Ru'
|
IE_DESC = 'Вести.Ru'
|
||||||
_VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
|
_VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia\.tv)/(?P<id>.+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -30,6 +30,20 @@ class VestiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.vesti.ru/doc.html?id=1349233',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '773865',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Участники митинга штурмуют Донецкую областную администрацию',
|
||||||
|
'description': 'md5:1a160e98b3195379b4c849f2f4958009',
|
||||||
|
'duration': 210,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -44,6 +58,48 @@ class VestiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://hitech.vesti.ru/news/view/id/4000',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '766888',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
|
||||||
|
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
|
||||||
|
'duration': 279,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '771852',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
|
||||||
|
'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
|
||||||
|
'duration': 3096,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://russia.tv/brand/show/brand_id/57638',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '774016',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Чужой в семье Сталина',
|
||||||
|
'description': '',
|
||||||
|
'duration': 2539,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -81,16 +137,26 @@ class VestiIE(InfoExtractor):
|
|||||||
|
|
||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page)
|
mobj = re.search(
|
||||||
|
r'<meta property="og:video" content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
|
||||||
|
page)
|
||||||
|
if mobj:
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
||||||
|
'Downloading video page')
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<meta property="og:video" content="http://player\.rutv\.ru/flash2v/container\.swf\?id=(?P<id>\d+)', page)
|
||||||
if mobj:
|
if mobj:
|
||||||
video_type = 'video'
|
video_type = 'video'
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
else:
|
else:
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page)
|
r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>',
|
||||||
|
page)
|
||||||
|
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ExtractorError('No media found')
|
raise ExtractorError('No media found', expected=True)
|
||||||
|
|
||||||
video_type = mobj.group('type')
|
video_type = mobj.group('type')
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
@@ -113,8 +179,8 @@ class VestiIE(InfoExtractor):
|
|||||||
priority_transport = playlist['priority_transport']
|
priority_transport = playlist['priority_transport']
|
||||||
|
|
||||||
thumbnail = media['picture']
|
thumbnail = media['picture']
|
||||||
width = media['width']
|
width = int_or_none(media['width'])
|
||||||
height = media['height']
|
height = int_or_none(media['height'])
|
||||||
description = media['anons']
|
description = media['anons']
|
||||||
title = media['title']
|
title = media['title']
|
||||||
duration = int_or_none(media.get('duration'))
|
duration = int_or_none(media.get('duration'))
|
||||||
|
@@ -7,19 +7,24 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
parse_duration,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||||
'file': 'kVTUy_G222_.mp4',
|
|
||||||
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "strange erotica",
|
'id': 'kVTUy_G222_',
|
||||||
"description": "surreal gay themed erotica...almost an ET kind of thing",
|
'ext': 'mp4',
|
||||||
"uploader": "greenshowers",
|
'title': 'strange erotica',
|
||||||
"age_limit": 18,
|
'description': 'surreal gay themed erotica...almost an ET kind of thing',
|
||||||
|
'uploader': 'greenshowers',
|
||||||
|
'duration': 450,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
|
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
video_description = self._html_search_regex(
|
||||||
|
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
||||||
|
video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
|
||||||
|
if comment_count:
|
||||||
|
comment_count = str_to_int(comment_count)
|
||||||
|
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
@@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'format': format,
|
'format': format,
|
||||||
|
@@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd):
|
|||||||
|
|
||||||
with contextlib.closing(batch_fd) as fd:
|
with contextlib.closing(batch_fd) as fd:
|
||||||
return [url for url in map(fixup, fd) if url]
|
return [url for url in map(fixup, fd) if url]
|
||||||
|
|
||||||
|
|
||||||
|
def urlencode_postdata(*args, **kargs):
|
||||||
|
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.03.04.1'
|
__version__ = '2014.03.07.1'
|
||||||
|
Reference in New Issue
Block a user