Merge branch 'master' of github.com:rg3/youtube-dl

Conflicts:
	youtube_dl/extractor/mtv.py
This commit is contained in:
Philipp Hagemeister 2014-01-22 00:21:27 +01:00
commit 6562df768d
4 changed files with 65 additions and 13 deletions

View File

@ -119,7 +119,10 @@ from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mpora import MporaIE from .mpora import MporaIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
from .mtv import MTVIE from .mtv import (
MTVIE,
MTVIggyIE,
)
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
from .myspace import MySpaceIE from .myspace import MySpaceIE
from .myspass import MySpassIE from .myspass import MySpassIE
@ -171,6 +174,7 @@ from .southparkstudios import (
from .space import SpaceIE from .space import SpaceIE
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .spike import SpikeIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE

View File

@ -90,9 +90,12 @@ class BrightcoveIE(InfoExtractor):
object_doc = xml.etree.ElementTree.fromstring(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str)
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
flashvars = dict( if fv_el is not None:
(k, v[0]) flashvars = dict(
for k, v in compat_parse_qs(fv_el.attrib['value']).items()) (k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
else:
flashvars = {}
def find_param(name): def find_param(name):
if name in flashvars: if name in flashvars:
@ -131,7 +134,7 @@ class BrightcoveIE(InfoExtractor):
m_brightcove = re.search( m_brightcove = re.search(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
:[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
@ -230,6 +233,6 @@ class BrightcoveIE(InfoExtractor):
else: else:
return ad_info return ad_info
if 'url' not in info: if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -9,6 +8,8 @@ from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
url_basename,
RegexNotFoundError,
) )
@ -38,10 +39,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
else: else:
return thumb_node.attrib['url'] return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml): def _extract_video_formats(self, mdoc):
if '/error_country_block.swf' in metadataXml: if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
raise ExtractorError('This video is not available from your country.', expected=True) raise ExtractorError('This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
formats = [] formats = []
for rendition in mdoc.findall('.//rendition'): for rendition in mdoc.findall('.//rendition'):
@ -67,8 +67,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
if 'acceptMethods' not in mediagen_url: if 'acceptMethods' not in mediagen_url:
mediagen_url += '&acceptMethods=fms' mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,
'Downloading video urls') mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls')
description_node = itemdoc.find('description') description_node = itemdoc.find('description')
if description_node is not None: if description_node is not None:
@ -91,7 +92,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
return { return {
'title': title, 'title': title,
'formats': self._extract_video_formats(mediagen_page), 'formats': self._extract_video_formats(mediagen_doc),
'id': video_id, 'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description, 'description': description,
@ -106,6 +107,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
'Downloading info', transform_source=fix_xml_ampersands) 'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
try:
# the url is in the format http://media.mtvnservices.com/fb/{mgid}.swf
fb_url = self._og_search_video_url(webpage)
mgid = url_basename(fb_url).rpartition('.')[0]
except RegexNotFoundError:
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
return self._get_videos_info(mgid)
class MTVIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)^https?:// _VALID_URL = r'''(?x)^https?://
@ -158,3 +170,17 @@ class MTVIE(MTVServicesInfoExtractor):
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
return self._get_videos_info(uri) return self._get_videos_info(uri)
class MTVIggyIE(MTVServicesInfoExtractor):
IE_NAME = 'mtviggy.com'
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
_TEST = {
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
'info_dict': {
'id': '984696',
'ext': 'mp4',
'title': 'Short',
}
}
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'

View File

@ -0,0 +1,19 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
_TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
'md5': '1a9265f32b0c375793d6c4ce45255256',
'info_dict': {
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
'ext': 'mp4',
'title': 'Can Allen Ride A Hundred Year-Old Motorcycle?',
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
},
}
_FEED_URL = 'http://www.spike.com/feeds/mrss/'