Compare commits

..

31 Commits

Author SHA1 Message Date
Philipp Hagemeister
aa2484e390 release 2013.11.02 2013-11-02 11:21:36 +01:00
Philipp Hagemeister
8eddf3e91d [youtube] Encode subtitle track name in request (Fixes #1700) 2013-11-02 11:21:05 +01:00
Jaime Marquínez Ferrándiz
60d142aa8d Add an extractor for vk.com (closes #1635) 2013-11-01 22:34:18 +01:00
Jaime Marquínez Ferrándiz
66cf3ac342 [metacafe] Fix support for age-restricted videos (fixes #1696)
The 'Content-Type' header must be set for disabling the family filter.
The 'flashversion' cookie  is only needed for AnyClip videos.
Added tests for standard metacafe videos and for age-restricted videos.
Also set the 'age_limit' field.
2013-11-01 11:56:15 +01:00
Jaime Marquínez Ferrándiz
5f1ea943ab [livestream] fix the extraction of events
It now uses a json dictionary from the webpage.
2013-10-31 08:07:26 +01:00
Jaime Marquínez Ferrándiz
0ef7ad5cd4 Fix the test for dailymotion subtitles
The extractor returns a single info_dict now.
2013-10-31 07:55:03 +01:00
Philipp Hagemeister
9f1109a564 [dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 00:20:49 +01:00
Philipp Hagemeister
33b1d9595d release 2013.10.30 2013-10-30 01:17:20 +01:00
Philipp Hagemeister
7193498811 Use index in formt string (Fixes vevo test on Python 2.6) 2013-10-30 01:17:00 +01:00
Philipp Hagemeister
72321ead7b [vevo] Readd support for SMIL (Fixes #1683) 2013-10-30 01:14:17 +01:00
Philipp Hagemeister
b5d0d817bc Remove superfluous space 2013-10-30 01:09:44 +01:00
Philipp Hagemeister
94badb2599 Fix output indenting for --list-formats 2013-10-30 01:09:26 +01:00
Filippo Valsorda
b9a836515f Update the Vimeo test vector md5
confirmed that this is indeed the first 10241 (we went off by one with
byte range 0-10240) of the full, playing mp4, so they probably
reencoded or something
2013-10-29 16:44:35 -04:00
Jaime Marquínez Ferrándiz
21c924f406 [arte] Download the 'Originalversion' version if it's the only one available (fixes #1682) 2013-10-29 20:58:49 +01:00
Philipp Hagemeister
e54fd4b23b [vevo] Add more format details 2013-10-29 15:10:09 +01:00
Philipp Hagemeister
57dd9a8f2f Nicer --list-formats output 2013-10-29 15:09:45 +01:00
Philipp Hagemeister
912cbf5d4e [vevo] Fix timestamp handling
( / 1000 is implicit float division )
2013-10-29 14:00:23 +01:00
Philipp Hagemeister
43d7895ea0 release 2013.10.29 2013-10-29 06:48:39 +01:00
Philipp Hagemeister
f7ff55aa78 Merge remote-tracking branch 'origin/master' 2013-10-29 06:48:18 +01:00
Philipp Hagemeister
795f28f871 [youtube] Fix login (Fixes #1681) 2013-10-29 06:45:54 +01:00
Filippo Valsorda
f6cc16f5d8 [tests] a HTTP 503 is a transient issue 2013-10-28 19:07:16 -04:00
Jaime Marquínez Ferrándiz
321a01f971 [mtv] Remove the templates from the mediagen url 2013-10-28 23:37:01 +01:00
Philipp Hagemeister
646e17a53d Fix YouTubeDL test 2013-10-28 23:18:13 +01:00
Filippo Valsorda
dd508b7c4f [tests] don't fail on network errors
This is suboptimal, but at least this way we will need to look at the logs
only to check for network errors that happen too often, instead of
parsing a ton of lines each time to see if there is some true test failing
2013-10-28 18:03:26 -04:00
Jaime Marquínez Ferrándiz
2563bcc85c Add an extractor for MySpace (closes #1666) 2013-10-28 22:02:17 +01:00
Jaime Marquínez Ferrándiz
702665c085 tests: build the filename from the info_dict if the 'file' key is missing
It will need to have the 'id' and 'ext' keys to work.
2013-10-28 22:01:37 +01:00
Jaime Marquínez Ferrándiz
369a759acc setup.py: Make sure the setuptools_available variable is set
Otherwise it would crash if it can't import setuptools.
2013-10-28 16:54:48 +01:00
Philipp Hagemeister
79b3f61228 Merge pull request #1675 from rzhxeo/fix
Check if description and thumbnail are None to prevent crash
2013-10-28 08:35:40 -07:00
rzhxeo
216d71d001 Check if description and thumbnail are None to prevent crash 2013-10-28 16:28:35 +01:00
Philipp Hagemeister
78a3a9f89e Make "requested format not available" expected (#1655) 2013-10-28 11:41:59 +01:00
Philipp Hagemeister
a7685f3bf4 mixcloud does not do any format selection 2013-10-28 11:41:32 +01:00
19 changed files with 344 additions and 100 deletions

View File

@@ -11,6 +11,7 @@ try:
setuptools_available = True
except ImportError:
from distutils.core import setup
setuptools_available = False
try:
# This will create an exe that needs Microsoft Visual C++ 2008

View File

@@ -5,9 +5,11 @@ import json
import os.path
import re
import types
import sys
import youtube_dl.extractor
from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding
def global_setup():
@@ -33,6 +35,21 @@ def try_rm(filename):
raise
def report_warning(message):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
output = u'%s %s\n' % (_msg_header, message)
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
output = output.encode(preferredencoding())
sys.stderr.write(output)
class FakeYDL(YoutubeDL):
def __init__(self, override=None):
# Different instances of the downloader can't share the same dictionary

View File

@@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_limit(self):
formats = [
{u'format_id': u'meh'},
{u'format_id': u'good'},
{u'format_id': u'great'},
{u'format_id': u'excellent'},
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
{u'format_id': u'good', u'url': u'http://example.com/good'},
{u'format_id': u'great', u'url': u'http://example.com/great'},
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
]
info_dict = {
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}

View File

@@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
return info_dict['subtitles']
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)

View File

@@ -6,7 +6,14 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, get_testcases, global_setup, try_rm, md5
from test.helper import (
get_params,
get_testcases,
global_setup,
try_rm,
md5,
report_warning
)
global_setup()
@@ -19,6 +26,7 @@ import youtube_dl.YoutubeDL
from youtube_dl.utils import (
compat_str,
compat_urllib_error,
compat_HTTPError,
DownloadError,
ExtractorError,
UnavailableVideoError,
@@ -60,9 +68,12 @@ def generator(test_case):
if not ie._WORKING:
print_skipping('IE marked as not _WORKING')
return
if 'playlist' not in test_case and not test_case['file']:
print_skipping('No output file specified')
return
if 'playlist' not in test_case:
info_dict = test_case.get('info_dict', {})
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
print_skipping('The output file cannot be know, the "file" '
'key is missing or the info_dict is incomplete')
return
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
@@ -77,35 +88,47 @@ def generator(test_case):
finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook)
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
def try_rm_tcs_files():
for tc in test_cases:
tc_filename = get_tc_filename(tc)
try_rm(tc_filename)
try_rm(tc_filename + '.part')
try_rm(tc_filename + '.info.json')
try_rm_tcs_files()
try:
for retry in range(1, RETRIES + 1):
try_num = 1
while True:
try:
ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err:
if retry == RETRIES: raise
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
if try_num == RETRIES:
report_warning(u'Failed due to network errors, skipping...')
return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
try_num += 1
else:
break
for tc in test_cases:
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc_filename in finished_hook_called)
self.assertTrue(os.path.exists(tc_filename + '.info.json'))
if 'md5' in tc:
md5_for_file = _file_md5(tc['file'])
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
@@ -126,10 +149,7 @@ def generator(test_case):
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
finally:
for tc in test_cases:
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
try_rm_tcs_files()
return test_template

View File

@@ -272,7 +272,7 @@ class YoutubeDL(object):
autonumber_size = 5
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
template_dict['autonumber'] = autonumber_templ % self._num_downloads
if template_dict['playlist_index'] is not None:
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
sanitize = lambda k, v: sanitize_filename(
@@ -462,7 +462,7 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
# This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']:
if info_dict['extractor'] in [u'youtube', u'Youku']:
if download:
self.process_info(info_dict)
return info_dict
@@ -482,7 +482,7 @@ class YoutubeDL(object):
format['format'] = u'{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
)
# Automatically determine file extension if missing
if 'ext' not in format:
@@ -524,7 +524,8 @@ class YoutubeDL(object):
formats_to_download = [selected_format]
break
if not formats_to_download:
raise ExtractorError(u'requested format not available')
raise ExtractorError(u'requested format not available',
expected=True)
if download:
if len(formats_to_download) > 1:
@@ -574,9 +575,9 @@ class YoutubeDL(object):
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
compat_print(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and 'description' in info_dict:
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
compat_print(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
compat_print(filename)
@@ -758,6 +759,8 @@ class YoutubeDL(object):
@staticmethod
def format_resolution(format, default='unknown'):
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('height') is not None:
if format.get('width') is not None:
res = u'%sx%s' % (format['width'], format['height'])
@@ -768,19 +771,23 @@ class YoutubeDL(object):
return res
def list_formats(self, info_dict):
formats_s = []
for format in info_dict.get('formats', [info_dict]):
formats_s.append(u'%-15s%-7s %-15s%s' % (
def line(format):
return (u'%-15s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
format.get('format_note', ''),
self.format_resolution(format),
format.get('format_note', ''),
)
)
if len(formats_s) != 1:
formats_s[0] += ' (worst)'
formats_s[-1] += ' (best)'
formats_s = "\n".join(formats_s)
self.to_screen(u'[info] Available formats for %s:\n'
u'format code extension note resolution\n%s' % (
info_dict['id'], formats_s))
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
if len(formats) > 1:
formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'})
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))

View File

@@ -83,6 +83,7 @@ from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
@@ -141,6 +142,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .vk import VKIE
from .wat import WatIE
from .websurg import WeBSurgIE
from .weibo import WeiboIE

View File

@@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor):
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
formats = player_info['VSR'].values()
all_formats = player_info['VSR'].values()
# Some formats use the m3u8 protocol
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
def _match_lang(f):
if f.get('versionCode') is None:
return True
@@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# Some formats use the m3u8 protocol
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
# We order the formats by quality
formats = filter(_match_lang, all_formats)
formats = list(formats) # in python3 filter returns an iterator
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' for f in all_formats):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
# We order the formats by quality
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:

View File

@@ -63,7 +63,7 @@ class InfoExtractor(object):
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")

View File

@@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
request.add_header('Cookie', 'ff=off')
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
},
u'skip': u'VEVO is only available in some countries',
},
# age-restricted video
{
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
u'file': u'xyh2zz.mp4',
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
u'info_dict': {
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
u'uploader': 'HotWaves1012',
u'age_limit': 18,
}
}
]
def _real_extract(self, url):
@@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
webpage, 'video uploader')
webpage, 'video uploader', fatal=False)
age_limit = self._rta_search(webpage)
video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self._list_available_subtitles(video_id)
return
return [{
return {
'id': video_id,
'formats': formats,
'uploader': video_uploader,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]
'thumbnail': info['thumbnail_url'],
'age_limit': age_limit,
}
def _get_available_subtitles(self, video_id):
try:

View File

@@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):
if video_id is None:
# This is an event page:
player = get_meta_content('twitter:player', webpage)
if player is None:
raise ExtractorError('Couldn\'t extract event api url')
api_url = player.replace('/player', '')
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
info = json.loads(self._download_webpage(api_url, event_name,
u'Downloading event info'))
config_json = self._search_regex(r'window.config = ({.*?});',
webpage, u'window config')
info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data'] if video_data['type'] == u'video']
return self.playlist_result(videos, info['id'], info['full_name'])

View File

@@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor):
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = u'metacafe'
_TESTS = [{
_TESTS = [
# Youtube video
{
u"add_ie": ["Youtube"],
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
u"file": u"_aUehQsCQtM.mp4",
@@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
u"uploader_id": u"PBS"
}
},
# Normal metacafe video
{
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
u'info_dict': {
u'id': u'11121940',
u'ext': u'mp4',
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
u'uploader': u'ign',
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
},
},
# AnyClip video
{
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
u"file": u"an-dVVXnuY7Jh77J.mp4",
u"info_dict": {
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
u"uploader": u"anyclip",
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
}
}]
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
},
},
# age-restricted video
{
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
u'info_dict': {
u'id': u'5186653',
u'ext': u'mp4',
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
u'uploader': u'Dwayne Pipe',
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
u'age_limit': 18,
},
},
]
def report_disclaimer(self):
@@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
'submit': "Continue - I'm over 18",
}
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
self.report_age_confirmation()
compat_urllib_request.urlopen(request).read()
@@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
# Retrieve video webpage to extract further information
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
req.headers['Cookie'] = 'flashVersion=0;'
# AnyClip videos require the flashversion cookie so that we get the link
# to the mp4 file
mobj_an = re.match(r'^an-(.*?)$', video_id)
if mobj_an:
req.headers['Cookie'] = 'flashVersion=0;'
webpage = self._download_webpage(req, video_id)
# Extract URL, uploader and title from webpage
@@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False)
if re.search(r'"contentRating":"restricted"', webpage) is not None:
age_limit = 18
else:
age_limit = 0
return {
'_type': 'video',
'id': video_id,
@@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
'upload_date': None,
'title': video_title,
'ext': video_ext,
'age_limit': age_limit,
}

View File

@@ -80,6 +80,8 @@ class MTVIE(InfoExtractor):
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
# Remove the templates, like &device={device}
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,

View File

@@ -0,0 +1,48 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
)
class MySpaceIE(InfoExtractor):
_VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
_TEST = {
u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
u'info_dict': {
u'id': u'100008689',
u'ext': u'flv',
u'title': u'Viva La Vida',
u'description': u'The official Viva La Vida video, directed by Hype Williams',
u'uploader': u'Coldplay',
u'uploader_id': u'coldplay',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
u'context'))
video = context['video']
rtmp_url, play_path = video['streamUrl'].split(';', 1)
return {
'id': compat_str(video['mediaId']),
'title': video['title'],
'url': rtmp_url,
'play_path': play_path,
'ext': 'flv',
'description': video['description'],
'thumbnail': video['imageUrl'],
'uploader': video['artistName'],
'uploader_id': video['artistUsername'],
}

View File

@@ -5,7 +5,7 @@ import datetime
from .common import InfoExtractor
from ..utils import (
determine_ext,
compat_HTTPError,
ExtractorError,
)
@@ -16,26 +16,22 @@ class VevoIE(InfoExtractor):
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
_TEST = {
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
u'info_dict': {
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody to Die For",
u'duration': 230,
u"duration": 230,
u"width": 1920,
u"height": 1080,
}
}
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
self.report_extraction(video_id)
video_info = json.loads(info_json)['video']
def _formats_from_json(self, video_info):
last_version = {'version': -1}
for version in video_info['videoVersions']:
# These are the HTTP downloads, other types are for different manifests
@@ -50,17 +46,74 @@ class VevoIE(InfoExtractor):
# Already sorted from worst to best quality
for rend in renditions.findall('rendition'):
attr = rend.attrib
f_url = attr['url']
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
formats.append({
'url': f_url,
'ext': determine_ext(f_url),
'url': attr['url'],
'format_id': attr['name'],
'format_note': format_note,
'height': int(attr['frameheight']),
'width': int(attr['frameWidth']),
})
return formats
date_epoch = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
upload_date = datetime.datetime.fromtimestamp(date_epoch)
def _formats_from_smil(self, smil_xml):
formats = []
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
for el in els:
src = el.attrib['src']
m = re.match(r'''(?xi)
(?P<ext>[a-z0-9]+):
(?P<path>
[/a-z0-9]+ # The directory and main part of the URL
_(?P<cbr>[0-9]+)k
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
_(?P<vcodec>[a-z0-9]+)
_(?P<vbr>[0-9]+)
_(?P<acodec>[a-z0-9]+)
_(?P<abr>[0-9]+)
\.[a-z0-9]+ # File extension
)''', src)
if not m:
continue
format_url = self._SMIL_BASE_URL + m.group('path')
format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
m.groupdict())
formats.append({
'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'),
'format_note': format_note,
'ext': m.group('ext'),
'width': int(m.group('width')),
'height': int(m.group('height')),
})
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
video_info = json.loads(info_json)['video']
formats = self._formats_from_json(video_info)
try:
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
self._SMIL_BASE_URL, video_id, video_id.lower())
smil_xml = self._download_webpage(smil_url, video_id,
u'Downloading SMIL info')
formats.extend(self._formats_from_smil(smil_xml))
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
self._downloader.report_warning(
u'Cannot download SMIL information, falling back to JSON ..')
timestamp_ms = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
info = {
'id': video_id,
'title': video_info['title'],
@@ -71,7 +124,4 @@ class VevoIE(InfoExtractor):
'duration': video_info['duration'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@@ -27,7 +27,7 @@ class VimeoIE(InfoExtractor):
{
u'url': u'http://vimeo.com/56015672#at=0',
u'file': u'56015672.mp4',
u'md5': u'ae7a1d8b183758a0506b0622f37dfa14',
u'md5': u'8879b6cc097e987f02484baf890129e5',
u'info_dict': {
u"upload_date": u"20121220",
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",

View File

@@ -0,0 +1,45 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
unescapeHTML,
)
class VKIE(InfoExtractor):
IE_NAME = u'vk.com'
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
_TEST = {
u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
u'md5': u'0deae91935c54e00003c2a00646315f0',
u'info_dict': {
u'id': u'162222515',
u'ext': u'flv',
u'title': u'ProtivoGunz - Хуёвая песня',
u'uploader': u'Noize MC',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
if m_yt is not None:
self.to_screen(u'Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars')
vars = json.loads(vars_json)
return {
'id': compat_str(vars['vid']),
'url': vars['url240'],
'title': unescapeHTML(vars['md_title']),
'thumbnail': vars['jpg'],
'uploader': vars['md_author'],
}

View File

@@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return False
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
login_page, u'Login GALX parameter')
# Log in
login_form_strs = {
@@ -95,7 +89,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
@@ -1118,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'lang': lang,
'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat'),
'name': l[0],
'name': l[0].encode('utf-8'),
})
url = u'http://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url

View File

@@ -1,2 +1,2 @@
__version__ = '2013.10.28'
__version__ = '2013.11.02'