Compare commits
31 Commits
2013.10.28
...
2013.11.02
Author | SHA1 | Date | |
---|---|---|---|
|
aa2484e390 | ||
|
8eddf3e91d | ||
|
60d142aa8d | ||
|
66cf3ac342 | ||
|
5f1ea943ab | ||
|
0ef7ad5cd4 | ||
|
9f1109a564 | ||
|
33b1d9595d | ||
|
7193498811 | ||
|
72321ead7b | ||
|
b5d0d817bc | ||
|
94badb2599 | ||
|
b9a836515f | ||
|
21c924f406 | ||
|
e54fd4b23b | ||
|
57dd9a8f2f | ||
|
912cbf5d4e | ||
|
43d7895ea0 | ||
|
f7ff55aa78 | ||
|
795f28f871 | ||
|
f6cc16f5d8 | ||
|
321a01f971 | ||
|
646e17a53d | ||
|
dd508b7c4f | ||
|
2563bcc85c | ||
|
702665c085 | ||
|
369a759acc | ||
|
79b3f61228 | ||
|
216d71d001 | ||
|
78a3a9f89e | ||
|
a7685f3bf4 |
1
setup.py
1
setup.py
@@ -11,6 +11,7 @@ try:
|
|||||||
setuptools_available = True
|
setuptools_available = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
|
setuptools_available = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# This will create an exe that needs Microsoft Visual C++ 2008
|
# This will create an exe that needs Microsoft Visual C++ 2008
|
||||||
|
@@ -5,9 +5,11 @@ import json
|
|||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import types
|
import types
|
||||||
|
import sys
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.utils import preferredencoding
|
||||||
|
|
||||||
|
|
||||||
def global_setup():
|
def global_setup():
|
||||||
@@ -33,6 +35,21 @@ def try_rm(filename):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def report_warning(message):
|
||||||
|
'''
|
||||||
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
|
'''
|
||||||
|
if sys.stderr.isatty() and os.name != 'nt':
|
||||||
|
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||||
|
else:
|
||||||
|
_msg_header = u'WARNING:'
|
||||||
|
output = u'%s %s\n' % (_msg_header, message)
|
||||||
|
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
||||||
|
output = output.encode(preferredencoding())
|
||||||
|
sys.stderr.write(output)
|
||||||
|
|
||||||
|
|
||||||
class FakeYDL(YoutubeDL):
|
class FakeYDL(YoutubeDL):
|
||||||
def __init__(self, override=None):
|
def __init__(self, override=None):
|
||||||
# Different instances of the downloader can't share the same dictionary
|
# Different instances of the downloader can't share the same dictionary
|
||||||
|
@@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_limit(self):
|
def test_format_limit(self):
|
||||||
formats = [
|
formats = [
|
||||||
{u'format_id': u'meh'},
|
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
|
||||||
{u'format_id': u'good'},
|
{u'format_id': u'good', u'url': u'http://example.com/good'},
|
||||||
{u'format_id': u'great'},
|
{u'format_id': u'great', u'url': u'http://example.com/great'},
|
||||||
{u'format_id': u'excellent'},
|
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
|
||||||
]
|
]
|
||||||
info_dict = {
|
info_dict = {
|
||||||
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||||
|
@@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
|
|||||||
return info_dict
|
return info_dict
|
||||||
def getSubtitles(self):
|
def getSubtitles(self):
|
||||||
info_dict = self.getInfoDict()
|
info_dict = self.getInfoDict()
|
||||||
return info_dict[0]['subtitles']
|
return info_dict['subtitles']
|
||||||
def test_no_writesubtitles(self):
|
def test_no_writesubtitles(self):
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(subtitles, None)
|
self.assertEqual(subtitles, None)
|
||||||
|
@@ -6,7 +6,14 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, get_testcases, global_setup, try_rm, md5
|
from test.helper import (
|
||||||
|
get_params,
|
||||||
|
get_testcases,
|
||||||
|
global_setup,
|
||||||
|
try_rm,
|
||||||
|
md5,
|
||||||
|
report_warning
|
||||||
|
)
|
||||||
global_setup()
|
global_setup()
|
||||||
|
|
||||||
|
|
||||||
@@ -19,6 +26,7 @@ import youtube_dl.YoutubeDL
|
|||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
compat_HTTPError,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
@@ -60,9 +68,12 @@ def generator(test_case):
|
|||||||
if not ie._WORKING:
|
if not ie._WORKING:
|
||||||
print_skipping('IE marked as not _WORKING')
|
print_skipping('IE marked as not _WORKING')
|
||||||
return
|
return
|
||||||
if 'playlist' not in test_case and not test_case['file']:
|
if 'playlist' not in test_case:
|
||||||
print_skipping('No output file specified')
|
info_dict = test_case.get('info_dict', {})
|
||||||
return
|
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||||
|
print_skipping('The output file cannot be know, the "file" '
|
||||||
|
'key is missing or the info_dict is incomplete')
|
||||||
|
return
|
||||||
if 'skip' in test_case:
|
if 'skip' in test_case:
|
||||||
print_skipping(test_case['skip'])
|
print_skipping(test_case['skip'])
|
||||||
return
|
return
|
||||||
@@ -77,35 +88,47 @@ def generator(test_case):
|
|||||||
finished_hook_called.add(status['filename'])
|
finished_hook_called.add(status['filename'])
|
||||||
ydl.fd.add_progress_hook(_hook)
|
ydl.fd.add_progress_hook(_hook)
|
||||||
|
|
||||||
|
def get_tc_filename(tc):
|
||||||
|
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||||
|
|
||||||
test_cases = test_case.get('playlist', [test_case])
|
test_cases = test_case.get('playlist', [test_case])
|
||||||
for tc in test_cases:
|
def try_rm_tcs_files():
|
||||||
try_rm(tc['file'])
|
for tc in test_cases:
|
||||||
try_rm(tc['file'] + '.part')
|
tc_filename = get_tc_filename(tc)
|
||||||
try_rm(tc['file'] + '.info.json')
|
try_rm(tc_filename)
|
||||||
|
try_rm(tc_filename + '.part')
|
||||||
|
try_rm(tc_filename + '.info.json')
|
||||||
|
try_rm_tcs_files()
|
||||||
try:
|
try:
|
||||||
for retry in range(1, RETRIES + 1):
|
try_num = 1
|
||||||
|
while True:
|
||||||
try:
|
try:
|
||||||
ydl.download([test_case['url']])
|
ydl.download([test_case['url']])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
if retry == RETRIES: raise
|
|
||||||
|
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
|
if try_num == RETRIES:
|
||||||
|
report_warning(u'Failed due to network errors, skipping...')
|
||||||
|
return
|
||||||
|
|
||||||
|
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||||
|
|
||||||
|
try_num += 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
|
tc_filename = get_tc_filename(tc)
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
|
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||||
self.assertTrue(tc['file'] in finished_hook_called)
|
self.assertTrue(tc_filename in finished_hook_called)
|
||||||
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
|
self.assertTrue(os.path.exists(tc_filename + '.info.json'))
|
||||||
if 'md5' in tc:
|
if 'md5' in tc:
|
||||||
md5_for_file = _file_md5(tc['file'])
|
md5_for_file = _file_md5(tc_filename)
|
||||||
self.assertEqual(md5_for_file, tc['md5'])
|
self.assertEqual(md5_for_file, tc['md5'])
|
||||||
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
@@ -126,10 +149,7 @@ def generator(test_case):
|
|||||||
for key in ('id', 'url', 'title', 'ext'):
|
for key in ('id', 'url', 'title', 'ext'):
|
||||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||||
finally:
|
finally:
|
||||||
for tc in test_cases:
|
try_rm_tcs_files()
|
||||||
try_rm(tc['file'])
|
|
||||||
try_rm(tc['file'] + '.part')
|
|
||||||
try_rm(tc['file'] + '.info.json')
|
|
||||||
|
|
||||||
return test_template
|
return test_template
|
||||||
|
|
||||||
|
@@ -272,7 +272,7 @@ class YoutubeDL(object):
|
|||||||
autonumber_size = 5
|
autonumber_size = 5
|
||||||
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict['playlist_index'] is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
@@ -462,7 +462,7 @@ class YoutubeDL(object):
|
|||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']:
|
if info_dict['extractor'] in [u'youtube', u'Youku']:
|
||||||
if download:
|
if download:
|
||||||
self.process_info(info_dict)
|
self.process_info(info_dict)
|
||||||
return info_dict
|
return info_dict
|
||||||
@@ -482,7 +482,7 @@ class YoutubeDL(object):
|
|||||||
format['format'] = u'{id} - {res}{note}'.format(
|
format['format'] = u'{id} - {res}{note}'.format(
|
||||||
id=format['format_id'],
|
id=format['format_id'],
|
||||||
res=self.format_resolution(format),
|
res=self.format_resolution(format),
|
||||||
note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||||
)
|
)
|
||||||
# Automatically determine file extension if missing
|
# Automatically determine file extension if missing
|
||||||
if 'ext' not in format:
|
if 'ext' not in format:
|
||||||
@@ -524,7 +524,8 @@ class YoutubeDL(object):
|
|||||||
formats_to_download = [selected_format]
|
formats_to_download = [selected_format]
|
||||||
break
|
break
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
raise ExtractorError(u'requested format not available')
|
raise ExtractorError(u'requested format not available',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
if download:
|
if download:
|
||||||
if len(formats_to_download) > 1:
|
if len(formats_to_download) > 1:
|
||||||
@@ -574,9 +575,9 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('forceurl', False):
|
if self.params.get('forceurl', False):
|
||||||
# For RTMP URLs, also include the playpath
|
# For RTMP URLs, also include the playpath
|
||||||
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
|
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
|
||||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||||
compat_print(info_dict['thumbnail'])
|
compat_print(info_dict['thumbnail'])
|
||||||
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||||
compat_print(info_dict['description'])
|
compat_print(info_dict['description'])
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
compat_print(filename)
|
compat_print(filename)
|
||||||
@@ -758,6 +759,8 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_resolution(format, default='unknown'):
|
def format_resolution(format, default='unknown'):
|
||||||
|
if format.get('_resolution') is not None:
|
||||||
|
return format['_resolution']
|
||||||
if format.get('height') is not None:
|
if format.get('height') is not None:
|
||||||
if format.get('width') is not None:
|
if format.get('width') is not None:
|
||||||
res = u'%sx%s' % (format['width'], format['height'])
|
res = u'%sx%s' % (format['width'], format['height'])
|
||||||
@@ -768,19 +771,23 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
formats_s = []
|
def line(format):
|
||||||
for format in info_dict.get('formats', [info_dict]):
|
return (u'%-15s%-10s%-12s%s' % (
|
||||||
formats_s.append(u'%-15s%-7s %-15s%s' % (
|
|
||||||
format['format_id'],
|
format['format_id'],
|
||||||
format['ext'],
|
format['ext'],
|
||||||
format.get('format_note', ''),
|
|
||||||
self.format_resolution(format),
|
self.format_resolution(format),
|
||||||
|
format.get('format_note', ''),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if len(formats_s) != 1:
|
|
||||||
formats_s[0] += ' (worst)'
|
formats = info_dict.get('formats', [info_dict])
|
||||||
formats_s[-1] += ' (best)'
|
formats_s = list(map(line, formats))
|
||||||
formats_s = "\n".join(formats_s)
|
if len(formats) > 1:
|
||||||
self.to_screen(u'[info] Available formats for %s:\n'
|
formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
|
||||||
u'format code extension note resolution\n%s' % (
|
formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
|
||||||
info_dict['id'], formats_s))
|
|
||||||
|
header_line = line({
|
||||||
|
'format_id': u'format code', 'ext': u'extension',
|
||||||
|
'_resolution': u'resolution', 'format_note': u'note'})
|
||||||
|
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||||
|
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||||
|
@@ -83,6 +83,7 @@ from .mit import TechTVMITIE, MITIE
|
|||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
@@ -141,6 +142,7 @@ from .videofyme import VideofyMeIE
|
|||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .vimeo import VimeoIE, VimeoChannelIE
|
from .vimeo import VimeoIE, VimeoChannelIE
|
||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
|
from .vk import VKIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .websurg import WeBSurgIE
|
from .websurg import WeBSurgIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
|
@@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = player_info['VSR'].values()
|
all_formats = player_info['VSR'].values()
|
||||||
|
# Some formats use the m3u8 protocol
|
||||||
|
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
||||||
def _match_lang(f):
|
def _match_lang(f):
|
||||||
if f.get('versionCode') is None:
|
if f.get('versionCode') is None:
|
||||||
return True
|
return True
|
||||||
@@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||||
# Some formats may not be in the same language as the url
|
# Some formats may not be in the same language as the url
|
||||||
formats = filter(_match_lang, formats)
|
formats = filter(_match_lang, all_formats)
|
||||||
# Some formats use the m3u8 protocol
|
|
||||||
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
|
|
||||||
# We order the formats by quality
|
|
||||||
formats = list(formats) # in python3 filter returns an iterator
|
formats = list(formats) # in python3 filter returns an iterator
|
||||||
|
if not formats:
|
||||||
|
# Some videos are only available in the 'Originalversion'
|
||||||
|
# they aren't tagged as being in French or German
|
||||||
|
if all(f['versionCode'] == 'VO' for f in all_formats):
|
||||||
|
formats = all_formats
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'The formats list is empty')
|
||||||
|
# We order the formats by quality
|
||||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||||
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||||
else:
|
else:
|
||||||
|
@@ -63,7 +63,7 @@ class InfoExtractor(object):
|
|||||||
* ext Will be calculated from url if missing
|
* ext Will be calculated from url if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
Calculated from the format_id, width, height
|
Calculated from the format_id, width, height.
|
||||||
and format_note fields if missing.
|
and format_note fields if missing.
|
||||||
* format_id A short description of the format
|
* format_id A short description of the format
|
||||||
("mp4_h264_opus" or "19")
|
("mp4_h264_opus" or "19")
|
||||||
|
@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
"""Build a request with the family filter disabled"""
|
"""Build a request with the family filter disabled"""
|
||||||
request = compat_urllib_request.Request(url)
|
request = compat_urllib_request.Request(url)
|
||||||
request.add_header('Cookie', 'family_filter=off')
|
request.add_header('Cookie', 'family_filter=off')
|
||||||
|
request.add_header('Cookie', 'ff=off')
|
||||||
return request
|
return request
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
@@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
},
|
},
|
||||||
u'skip': u'VEVO is only available in some countries',
|
u'skip': u'VEVO is only available in some countries',
|
||||||
},
|
},
|
||||||
|
# age-restricted video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||||
|
u'file': u'xyh2zz.mp4',
|
||||||
|
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||||
|
u'uploader': 'HotWaves1012',
|
||||||
|
u'age_limit': 18,
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||||
# Looking for official user
|
# Looking for official user
|
||||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||||
webpage, 'video uploader')
|
webpage, 'video uploader', fatal=False)
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
video_upload_date = None
|
video_upload_date = None
|
||||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||||
@@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self._list_available_subtitles(video_id)
|
self._list_available_subtitles(video_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'thumbnail': info['thumbnail_url']
|
'thumbnail': info['thumbnail_url'],
|
||||||
}]
|
'age_limit': age_limit,
|
||||||
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id):
|
||||||
try:
|
try:
|
||||||
|
@@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):
|
|||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
player = get_meta_content('twitter:player', webpage)
|
config_json = self._search_regex(r'window.config = ({.*?});',
|
||||||
if player is None:
|
webpage, u'window config')
|
||||||
raise ExtractorError('Couldn\'t extract event api url')
|
info = json.loads(config_json)['event']
|
||||||
api_url = player.replace('/player', '')
|
|
||||||
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
|
|
||||||
info = json.loads(self._download_webpage(api_url, event_name,
|
|
||||||
u'Downloading event info'))
|
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||||
|
@@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor):
|
|||||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||||
IE_NAME = u'metacafe'
|
IE_NAME = u'metacafe'
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
|
# Youtube video
|
||||||
|
{
|
||||||
u"add_ie": ["Youtube"],
|
u"add_ie": ["Youtube"],
|
||||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||||
u"file": u"_aUehQsCQtM.mp4",
|
u"file": u"_aUehQsCQtM.mp4",
|
||||||
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
|
|||||||
u"uploader_id": u"PBS"
|
u"uploader_id": u"PBS"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Normal metacafe video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||||
|
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'11121940',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||||
|
u'uploader': u'ign',
|
||||||
|
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# AnyClip video
|
||||||
{
|
{
|
||||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||||
u"uploader": u"anyclip",
|
u"uploader": u"anyclip",
|
||||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
|
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
|
||||||
}
|
},
|
||||||
}]
|
},
|
||||||
|
# age-restricted video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||||
|
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'5186653',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||||
|
u'uploader': u'Dwayne Pipe',
|
||||||
|
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||||
|
u'age_limit': 18,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'submit': "Continue - I'm over 18",
|
'submit': "Continue - I'm over 18",
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
try:
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
compat_urllib_request.urlopen(request).read()
|
compat_urllib_request.urlopen(request).read()
|
||||||
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||||
req.headers['Cookie'] = 'flashVersion=0;'
|
|
||||||
|
# AnyClip videos require the flashversion cookie so that we get the link
|
||||||
|
# to the mp4 file
|
||||||
|
mobj_an = re.match(r'^an-(.*?)$', video_id)
|
||||||
|
if mobj_an:
|
||||||
|
req.headers['Cookie'] = 'flashVersion=0;'
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
|
|||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, u'uploader nickname', fatal=False)
|
webpage, u'uploader nickname', fatal=False)
|
||||||
|
|
||||||
|
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||||
|
age_limit = 18
|
||||||
|
else:
|
||||||
|
age_limit = 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_ext,
|
'ext': video_ext,
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@@ -80,6 +80,8 @@ class MTVIE(InfoExtractor):
|
|||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||||
|
# Remove the templates, like &device={device}
|
||||||
|
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
|
||||||
if 'acceptMethods' not in mediagen_url:
|
if 'acceptMethods' not in mediagen_url:
|
||||||
mediagen_url += '&acceptMethods=fms'
|
mediagen_url += '&acceptMethods=fms'
|
||||||
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
||||||
|
48
youtube_dl/extractor/myspace.py
Normal file
48
youtube_dl/extractor/myspace.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MySpaceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'100008689',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Viva La Vida',
|
||||||
|
u'description': u'The official Viva La Vida video, directed by Hype Williams',
|
||||||
|
u'uploader': u'Coldplay',
|
||||||
|
u'uploader_id': u'coldplay',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||||
|
u'context'))
|
||||||
|
video = context['video']
|
||||||
|
rtmp_url, play_path = video['streamUrl'].split(';', 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(video['mediaId']),
|
||||||
|
'title': video['title'],
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': play_path,
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': video['description'],
|
||||||
|
'thumbnail': video['imageUrl'],
|
||||||
|
'uploader': video['artistName'],
|
||||||
|
'uploader_id': video['artistUsername'],
|
||||||
|
}
|
@@ -5,7 +5,7 @@ import datetime
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
compat_HTTPError,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):
|
|||||||
(currently used by MTVIE)
|
(currently used by MTVIE)
|
||||||
"""
|
"""
|
||||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
u'file': u'GB1101300280.mp4',
|
||||||
|
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20130624",
|
u"upload_date": u"20130624",
|
||||||
u"uploader": u"Hurts",
|
u"uploader": u"Hurts",
|
||||||
u"title": u"Somebody to Die For",
|
u"title": u"Somebody to Die For",
|
||||||
u'duration': 230,
|
u"duration": 230,
|
||||||
|
u"width": 1920,
|
||||||
|
u"height": 1080,
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _formats_from_json(self, video_info):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
|
||||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
video_info = json.loads(info_json)['video']
|
|
||||||
last_version = {'version': -1}
|
last_version = {'version': -1}
|
||||||
for version in video_info['videoVersions']:
|
for version in video_info['videoVersions']:
|
||||||
# These are the HTTP downloads, other types are for different manifests
|
# These are the HTTP downloads, other types are for different manifests
|
||||||
@@ -50,17 +46,74 @@ class VevoIE(InfoExtractor):
|
|||||||
# Already sorted from worst to best quality
|
# Already sorted from worst to best quality
|
||||||
for rend in renditions.findall('rendition'):
|
for rend in renditions.findall('rendition'):
|
||||||
attr = rend.attrib
|
attr = rend.attrib
|
||||||
f_url = attr['url']
|
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': f_url,
|
'url': attr['url'],
|
||||||
'ext': determine_ext(f_url),
|
'format_id': attr['name'],
|
||||||
|
'format_note': format_note,
|
||||||
'height': int(attr['frameheight']),
|
'height': int(attr['frameheight']),
|
||||||
'width': int(attr['frameWidth']),
|
'width': int(attr['frameWidth']),
|
||||||
})
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
date_epoch = int(self._search_regex(
|
def _formats_from_smil(self, smil_xml):
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
|
formats = []
|
||||||
upload_date = datetime.datetime.fromtimestamp(date_epoch)
|
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
|
||||||
|
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||||
|
for el in els:
|
||||||
|
src = el.attrib['src']
|
||||||
|
m = re.match(r'''(?xi)
|
||||||
|
(?P<ext>[a-z0-9]+):
|
||||||
|
(?P<path>
|
||||||
|
[/a-z0-9]+ # The directory and main part of the URL
|
||||||
|
_(?P<cbr>[0-9]+)k
|
||||||
|
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||||
|
_(?P<vcodec>[a-z0-9]+)
|
||||||
|
_(?P<vbr>[0-9]+)
|
||||||
|
_(?P<acodec>[a-z0-9]+)
|
||||||
|
_(?P<abr>[0-9]+)
|
||||||
|
\.[a-z0-9]+ # File extension
|
||||||
|
)''', src)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
|
format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
|
||||||
|
m.groupdict())
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': u'SMIL_' + m.group('cbr'),
|
||||||
|
'format_note': format_note,
|
||||||
|
'ext': m.group('ext'),
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
|
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
||||||
|
video_info = json.loads(info_json)['video']
|
||||||
|
|
||||||
|
formats = self._formats_from_json(video_info)
|
||||||
|
try:
|
||||||
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
|
u'Downloading SMIL info')
|
||||||
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
|
raise
|
||||||
|
self._downloader.report_warning(
|
||||||
|
u'Cannot download SMIL information, falling back to JSON ..')
|
||||||
|
|
||||||
|
timestamp_ms = int(self._search_regex(
|
||||||
|
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
@@ -71,7 +124,4 @@ class VevoIE(InfoExtractor):
|
|||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
@@ -27,7 +27,7 @@ class VimeoIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
u'url': u'http://vimeo.com/56015672#at=0',
|
u'url': u'http://vimeo.com/56015672#at=0',
|
||||||
u'file': u'56015672.mp4',
|
u'file': u'56015672.mp4',
|
||||||
u'md5': u'ae7a1d8b183758a0506b0622f37dfa14',
|
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20121220",
|
u"upload_date": u"20121220",
|
||||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||||
|
45
youtube_dl/extractor/vk.py
Normal file
45
youtube_dl/extractor/vk.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VKIE(InfoExtractor):
|
||||||
|
IE_NAME = u'vk.com'
|
||||||
|
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||||
|
u'md5': u'0deae91935c54e00003c2a00646315f0',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'162222515',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'ProtivoGunz - Хуёвая песня',
|
||||||
|
u'uploader': u'Noize MC',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
||||||
|
info_page = self._download_webpage(info_url, video_id)
|
||||||
|
m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
|
||||||
|
if m_yt is not None:
|
||||||
|
self.to_screen(u'Youtube video detected')
|
||||||
|
return self.url_result(m_yt.group(1), 'Youtube')
|
||||||
|
vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
|
||||||
|
vars = json.loads(vars_json)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(vars['vid']),
|
||||||
|
'url': vars['url240'],
|
||||||
|
'title': unescapeHTML(vars['md_title']),
|
||||||
|
'thumbnail': vars['jpg'],
|
||||||
|
'uploader': vars['md_author'],
|
||||||
|
}
|
@@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
galx = None
|
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
|
||||||
dsh = None
|
login_page, u'Login GALX parameter')
|
||||||
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
|
|
||||||
if match:
|
|
||||||
galx = match.group(1)
|
|
||||||
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
|
|
||||||
if match:
|
|
||||||
dsh = match.group(1)
|
|
||||||
|
|
||||||
# Log in
|
# Log in
|
||||||
login_form_strs = {
|
login_form_strs = {
|
||||||
@@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
u'checkConnection': u'',
|
u'checkConnection': u'',
|
||||||
u'checkedDomains': u'youtube',
|
u'checkedDomains': u'youtube',
|
||||||
u'dnConn': u'',
|
u'dnConn': u'',
|
||||||
u'dsh': dsh,
|
|
||||||
u'pstMsg': u'0',
|
u'pstMsg': u'0',
|
||||||
u'rmShown': u'1',
|
u'rmShown': u'1',
|
||||||
u'secTok': u'',
|
u'secTok': u'',
|
||||||
@@ -1118,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
'fmt': self._downloader.params.get('subtitlesformat'),
|
'fmt': self._downloader.params.get('subtitlesformat'),
|
||||||
'name': l[0],
|
'name': l[0].encode('utf-8'),
|
||||||
})
|
})
|
||||||
url = u'http://www.youtube.com/api/timedtext?' + params
|
url = u'http://www.youtube.com/api/timedtext?' + params
|
||||||
sub_lang_list[lang] = url
|
sub_lang_list[lang] = url
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.10.28'
|
__version__ = '2013.11.02'
|
||||||
|
Reference in New Issue
Block a user