Compare commits
1 Commits
2013.11.28
...
rtmp_test
Author | SHA1 | Date | |
---|---|---|---|
00b350d209 |
@ -3,6 +3,9 @@ python:
|
|||||||
- "2.6"
|
- "2.6"
|
||||||
- "2.7"
|
- "2.7"
|
||||||
- "3.3"
|
- "3.3"
|
||||||
|
before_install:
|
||||||
|
- sudo apt-get update -qq
|
||||||
|
- sudo apt-get install -qq rtmpdump
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
|
@ -1,21 +1,10 @@
|
|||||||
__youtube_dl()
|
__youtube_dl()
|
||||||
{
|
{
|
||||||
local cur prev opts fileopts diropts keywords
|
local cur prev opts
|
||||||
COMPREPLY=()
|
COMPREPLY=()
|
||||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||||
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
|
||||||
opts="{{flags}}"
|
opts="{{flags}}"
|
||||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
||||||
fileopts="-a|--batch-file|--download-archive|--cookies"
|
|
||||||
diropts="--cache-dir"
|
|
||||||
|
|
||||||
if [[ ${prev} =~ ${fileopts} ]]; then
|
|
||||||
COMPREPLY=( $(compgen -f -- ${cur}) )
|
|
||||||
return 0
|
|
||||||
elif [[ ${prev} =~ ${diropts} ]]; then
|
|
||||||
COMPREPLY=( $(compgen -d -- ${cur}) )
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${cur} =~ : ]]; then
|
if [[ ${cur} =~ : ]]; then
|
||||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||||
|
@ -107,14 +107,5 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||||
self.assertTrue(len(result) >= 3)
|
self.assertTrue(len(result) >= 3)
|
||||||
|
|
||||||
def test_youtube_mix(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubePlaylistIE(dl)
|
|
||||||
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertTrue(len(entries) >= 20)
|
|
||||||
original_video = entries[0]
|
|
||||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -155,7 +155,6 @@ class YoutubeDL(object):
|
|||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
self.params = params
|
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
@ -165,8 +164,9 @@ class YoutubeDL(object):
|
|||||||
u'Assuming --restrict-filenames since file system encoding '
|
u'Assuming --restrict-filenames since file system encoding '
|
||||||
u'cannot encode all charactes. '
|
u'cannot encode all charactes. '
|
||||||
u'Set the LC_ALL environment variable to fix this.')
|
u'Set the LC_ALL environment variable to fix this.')
|
||||||
self.params['restrictfilenames'] = True
|
params['restrictfilenames'] = True
|
||||||
|
|
||||||
|
self.params = params
|
||||||
self.fd = FileDownloader(self, self.params)
|
self.fd = FileDownloader(self, self.params)
|
||||||
|
|
||||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||||
|
@ -651,7 +651,6 @@ def _real_main(argv=None):
|
|||||||
'download_archive': opts.download_archive,
|
'download_archive': opts.download_archive,
|
||||||
'cookiefile': opts.cookiefile,
|
'cookiefile': opts.cookiefile,
|
||||||
'nocheckcertificate': opts.no_check_certificate,
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
'proxy': opts.proxy,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -27,8 +28,9 @@ class AnitubeIE(InfoExtractor):
|
|||||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||||
webpage, u'key')
|
webpage, u'key')
|
||||||
|
|
||||||
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||||
key)
|
key)
|
||||||
|
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
||||||
|
|
||||||
video_title = config_xml.find('title').text
|
video_title = config_xml.find('title').text
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -77,7 +78,8 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
|
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
@ -107,8 +109,9 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract form http://liveweb.arte.tv/"""
|
"""Extract form http://liveweb.arte.tv/"""
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||||
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||||
video_id, u'Downloading information')
|
video_id, u'Downloading information')
|
||||||
|
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||||
event_doc = config_doc.find('event')
|
event_doc = config_doc.find('event')
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
url_node = event_doc.find('video').find('urlHd')
|
||||||
if url_node is None:
|
if url_node is None:
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
@ -30,10 +31,11 @@ class CanalplusIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
doc = self._download_xml(info_url,video_id,
|
info_page = self._download_webpage(info_url,video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
infos = video_info.find('INFOS')
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
|
@ -12,27 +12,21 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||||
u'file': u'19911.flv',
|
u'file': u'19911.flv',
|
||||||
|
u'md5': u'f9bb7ede54d1229c9846e197b4737e06',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'upload_date': u'20121110',
|
u'upload_date': u'20121110',
|
||||||
u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
|
u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
|
||||||
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
|
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
|
||||||
},
|
}
|
||||||
u'params': {
|
|
||||||
# rtmp download
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||||
u'file': u'521be8ef82b16.flv',
|
u'file': u'521be8ef82b16.flv',
|
||||||
|
u'md5': u'9509ee44dcaa7c1068604817c19a9e50',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'upload_date': u'20131002',
|
u'upload_date': u'20131002',
|
||||||
u'title': u'The Mummy’s Hand (1940)',
|
u'title': u'The Mummy’s Hand (1940)',
|
||||||
},
|
}
|
||||||
u'params': {
|
|
||||||
# rtmp download
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -25,14 +25,11 @@ class ClipfishIE(InfoExtractor):
|
|||||||
|
|
||||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||||
(video_id, int(time.time())))
|
(video_id, int(time.time())))
|
||||||
doc = self._download_xml(
|
info_xml = self._download_webpage(
|
||||||
info_url, video_id, note=u'Downloading info page')
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||||
title = doc.find('title').text
|
title = doc.find('title').text
|
||||||
video_url = doc.find('filename').text
|
video_url = doc.find('filename').text
|
||||||
if video_url is None:
|
|
||||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
|
||||||
raise ExtractorError(u'Cannot find video URL in document %r' %
|
|
||||||
xml_bytes)
|
|
||||||
thumbnail = doc.find('imageurl').text
|
thumbnail = doc.find('imageurl').text
|
||||||
duration_str = doc.find('duration').text
|
duration_str = doc.find('duration').text
|
||||||
m = re.match(
|
m = re.match(
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@ -32,7 +33,8 @@ class CNNIE(InfoExtractor):
|
|||||||
path = mobj.group('path')
|
path = mobj.group('path')
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
info = self._download_xml(info_url, page_title)
|
info_xml = self._download_webpage(info_url, page_title)
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in info.findall('files/file'):
|
for f in info.findall('files/file'):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .mtv import MTVIE, _media_xml_tag
|
from .mtv import MTVIE, _media_xml_tag
|
||||||
@ -157,12 +158,13 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||||
idoc = self._download_xml(indexUrl, epTitle,
|
indexXml = self._download_webpage(indexUrl, epTitle,
|
||||||
u'Downloading show index',
|
u'Downloading show index',
|
||||||
u'unable to download episode index')
|
u'unable to download episode index')
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
||||||
itemEls = idoc.findall('.//item')
|
itemEls = idoc.findall('.//item')
|
||||||
for partNum,itemEl in enumerate(itemEls):
|
for partNum,itemEl in enumerate(itemEls):
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
mediaId = itemEl.findall('./guid')[0].text
|
||||||
@ -173,9 +175,10 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
cdoc = self._download_xml(configUrl, epTitle,
|
configXml = self._download_webpage(configUrl, epTitle,
|
||||||
u'Downloading configuration for %s' % shortMediaId)
|
u'Downloading configuration for %s' % shortMediaId)
|
||||||
|
|
||||||
|
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
||||||
turls = []
|
turls = []
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
for rendition in cdoc.findall('.//rendition'):
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||||
|
@ -210,8 +210,7 @@ class InfoExtractor(object):
|
|||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
|
||||||
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -31,12 +32,14 @@ class DaumIE(InfoExtractor):
|
|||||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||||
webpage, u'full id')
|
webpage, u'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info = self._download_xml(
|
info_xml = self._download_webpage(
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
urls = self._download_xml(
|
urls_xml = self._download_webpage(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@ -46,9 +49,10 @@ class DaumIE(InfoExtractor):
|
|||||||
'vid': full_id,
|
'vid': full_id,
|
||||||
'profile': profile,
|
'profile': profile,
|
||||||
})
|
})
|
||||||
url_doc = self._download_xml(
|
url_xml = self._download_webpage(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
|
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -29,7 +30,8 @@ class DreiSatIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||||
|
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||||
|
|
||||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@ -20,8 +21,9 @@ class EbaumsWorldIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config = self._download_xml(
|
config_xml = self._download_webpage(
|
||||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||||
|
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||||
video_url = config.find('file').text
|
video_url = config.find('file').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -27,8 +28,9 @@ class FazIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||||
u'config xml url')
|
u'config xml url')
|
||||||
config = self._download_xml(config_xml_url, video_id,
|
config_xml = self._download_webpage(config_xml_url, video_id,
|
||||||
u'Downloading config xml')
|
u'Downloading config xml')
|
||||||
|
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||||
|
|
||||||
encodings = config.find('ENCODINGS')
|
encodings = config.find('ENCODINGS')
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -10,10 +11,11 @@ from ..utils import (
|
|||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
info = self._download_xml(
|
xml_desc = self._download_webpage(
|
||||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||||
'getInfosOeuvre.php?id-diffusion='
|
'getInfosOeuvre.php?id-diffusion='
|
||||||
+ video_id, video_id, 'Downloading XML config')
|
+ video_id, video_id, 'Downloading XML config')
|
||||||
|
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
||||||
|
|
||||||
manifest_url = info.find('videos/video/url').text
|
manifest_url = info.find('videos/video/url').text
|
||||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -42,8 +43,9 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
video_id = query_dic['publishedid'][0]
|
video_id = query_dic['publishedid'][0]
|
||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration = self._download_xml(url, video_id,
|
flashconfiguration_xml = self._download_webpage(url, video_id,
|
||||||
u'Downloading flash configuration')
|
u'Downloading flash configuration')
|
||||||
|
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
@ -51,8 +53,9 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info = self._download_xml(file_url, video_id,
|
info_xml = self._download_webpage(file_url, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -31,9 +32,12 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, u'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
xml_config = self._download_webpage(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, u'Downloading XML config')
|
||||||
info_json = config.find('format.json').text
|
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||||
|
info_json = self._search_regex(
|
||||||
|
r'(?sm)<format\.json>(.*?)</format\.json>',
|
||||||
|
xml_config, u'JSON information')
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -93,9 +94,10 @@ class JustinTVIE(InfoExtractor):
|
|||||||
archive_id = m.group(1)
|
archive_id = m.group(1)
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
doc = self._download_xml(api, chapter_id,
|
chapter_info_xml = self._download_webpage(api, chapter_id,
|
||||||
note=u'Downloading chapter information',
|
note=u'Downloading chapter information',
|
||||||
errnote=u'Chapter information download failed')
|
errnote=u'Chapter information download failed')
|
||||||
|
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
||||||
for a in doc.findall('.//archive'):
|
for a in doc.findall('.//archive'):
|
||||||
if archive_id == a.find('./id').text:
|
if archive_id == a.find('./id').text:
|
||||||
break
|
break
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -79,7 +80,8 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||||
|
|
||||||
info = self._download_xml(api_url, video_id)
|
api_response = self._download_webpage(api_url, video_id)
|
||||||
|
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
||||||
item = info.find('channel').find('item')
|
item = info.find('channel').find('item')
|
||||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||||
|
@ -109,8 +109,9 @@ class MTVIE(InfoExtractor):
|
|||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
||||||
u'Downloading info')
|
u'Downloading info')
|
||||||
|
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import os.path
|
import os.path
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -32,7 +33,8 @@ class MySpassIE(InfoExtractor):
|
|||||||
|
|
||||||
# get metadata
|
# get metadata
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||||
metadata = self._download_xml(metadata_url, video_id)
|
metadata_text = self._download_webpage(metadata_url, video_id)
|
||||||
|
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
||||||
|
|
||||||
# extract values from metadata
|
# extract values from metadata
|
||||||
url_flv_el = metadata.find('url_flv')
|
url_flv_el = metadata.find('url_flv')
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -37,12 +38,14 @@ class NaverIE(InfoExtractor):
|
|||||||
'protocol': 'p2p',
|
'protocol': 'p2p',
|
||||||
'inKey': key,
|
'inKey': key,
|
||||||
})
|
})
|
||||||
info = self._download_xml(
|
info_xml = self._download_webpage(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||||
video_id, u'Downloading video info')
|
video_id, u'Downloading video info')
|
||||||
urls = self._download_xml(
|
urls_xml = self._download_webpage(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import find_xpath_attr, compat_str
|
from ..utils import find_xpath_attr, compat_str
|
||||||
@ -20,8 +21,8 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
info = all_info.find('video')
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': info.find('headline').text,
|
'title': info.find('headline').text,
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -25,8 +26,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||||
path_doc = self._download_xml(path_url, video_id,
|
path_response = self._download_webpage(path_url, video_id,
|
||||||
u'Downloading final video url')
|
u'Downloading final video url')
|
||||||
|
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
||||||
video_url = path_doc.find('path').text
|
video_url = path_doc.find('path').text
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
join = compat_urlparse.urljoin
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -80,7 +81,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
# the cookies in order to be able to download the info webpage
|
# the cookies in order to be able to download the info webpage
|
||||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
|
|
||||||
video_info = self._download_xml(
|
video_info_webpage = self._download_webpage(
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
note=u'Downloading video info page')
|
note=u'Downloading video info page')
|
||||||
|
|
||||||
@ -91,6 +92,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
|
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
|
||||||
video_title = video_info.find('.//title').text
|
video_title = video_info.find('.//title').text
|
||||||
video_extension = video_info.find('.//movie_type').text
|
video_extension = video_info.find('.//movie_type').text
|
||||||
video_format = video_extension.upper()
|
video_format = video_extension.upper()
|
||||||
@ -105,11 +107,13 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_uploader = video_uploader_id
|
video_uploader = video_uploader_id
|
||||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
try:
|
try:
|
||||||
user_info = self._download_xml(
|
user_info_webpage = self._download_webpage(
|
||||||
url, video_id, note=u'Downloading user information')
|
url, video_id, note=u'Downloading user information')
|
||||||
video_uploader = user_info.find('.//nickname').text
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||||
|
else:
|
||||||
|
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
|
||||||
|
video_uploader = user_info.find('.//nickname').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -34,11 +35,12 @@ class SinaIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
||||||
video_id, u'Downloading video url')
|
video_id, u'Downloading video url')
|
||||||
image_page = self._download_webpage(
|
image_page = self._download_webpage(
|
||||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||||
video_id, u'Downloading thumbnail info')
|
video_id, u'Downloading thumbnail info')
|
||||||
|
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'url': url_doc.find('./durl/url').text,
|
'url': url_doc.find('./durl/url').text,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -32,10 +33,12 @@ class SpiegelIE(InfoExtractor):
|
|||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||||
idoc = self._download_xml(
|
xml_code = self._download_webpage(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||||
|
|
||||||
|
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -31,7 +32,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
|
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||||
|
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -39,9 +40,11 @@ class TouTvIE(InfoExtractor):
|
|||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_doc = self._download_xml(
|
streams_webpage = self._download_webpage(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note=u'Downloading stream list')
|
||||||
|
|
||||||
|
streams_doc = xml.etree.ElementTree.fromstring(
|
||||||
|
streams_webpage.encode('utf-8'))
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if u'//ad.doubleclick' not in n.text)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -35,10 +36,12 @@ class TriluliluIE(InfoExtractor):
|
|||||||
|
|
||||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||||
u'video-formats2' % log)
|
u'video-formats2' % log)
|
||||||
format_doc = self._download_xml(
|
format_str = self._download_webpage(
|
||||||
format_url, video_id,
|
format_url, video_id,
|
||||||
note=u'Downloading formats',
|
note=u'Downloading formats',
|
||||||
errnote=u'Error while downloading formats')
|
errnote=u'Error while downloading formats')
|
||||||
|
|
||||||
|
format_doc = xml.etree.ElementTree.fromstring(format_str)
|
||||||
|
|
||||||
video_url_template = (
|
video_url_template = (
|
||||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -26,8 +27,9 @@ class VideofyMeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||||
video = config.find('video')
|
video = config.find('video')
|
||||||
sources = video.find('sources')
|
sources = video.find('sources')
|
||||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||||
|
@ -5,16 +5,14 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class VideoPremiumIE(InfoExtractor):
|
class VideoPremiumIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
||||||
u'file': u'4w7oadjsf156.f4v',
|
u'file': u'4w7oadjsf156.f4v',
|
||||||
|
u'md5': u'e51e4a266aab7531c6ac06f4ffee3b0d',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
|
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
|
||||||
},
|
}
|
||||||
u'params': {
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -41,4 +39,4 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
}
|
}
|
@ -46,7 +46,7 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
|
||||||
webpage, u'items', flags=re.MULTILINE)
|
webpage, u'items', flags=re.MULTILINE)
|
||||||
items = json.loads(items_json)
|
items = json.loads(items_json)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
@ -91,13 +91,17 @@ class YahooIE(InfoExtractor):
|
|||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
||||||
|
|
||||||
return {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': meta['title'],
|
'title': meta['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(meta['description']),
|
'description': clean_html(meta['description']),
|
||||||
'thumbnail': meta['thumbnail'],
|
'thumbnail': meta['thumbnail'],
|
||||||
}
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info.update(formats[-1])
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
|
@ -11,6 +11,7 @@ import socket
|
|||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
@ -28,7 +29,6 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
get_element_by_attribute,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -1144,7 +1144,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'asrs': 1,
|
'asrs': 1,
|
||||||
})
|
})
|
||||||
list_url = caption_url + '&' + list_params
|
list_url = caption_url + '&' + list_params
|
||||||
caption_list = self._download_xml(list_url, video_id)
|
list_page = self._download_webpage(list_url, video_id)
|
||||||
|
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||||
@ -1527,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1538,24 +1539,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _ids_to_results(self, ids):
|
|
||||||
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
|
||||||
for vid_id in ids]
|
|
||||||
|
|
||||||
def _extract_mix(self, playlist_id):
|
|
||||||
# The mixes are generated from a a single video
|
|
||||||
# the id of the playlist is just 'RD' + video_id
|
|
||||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
|
|
||||||
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
|
||||||
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
|
||||||
get_element_by_attribute('class', 'title ', webpage))
|
|
||||||
title = clean_html(title_span)
|
|
||||||
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
|
|
||||||
ids = orderedSet(re.findall(video_re, webpage))
|
|
||||||
url_results = self._ids_to_results(ids)
|
|
||||||
|
|
||||||
return self.playlist_result(url_results, playlist_id, title)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
@ -1573,20 +1556,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
|
|
||||||
# Mixes require a custom extraction process
|
|
||||||
return self._extract_mix(playlist_id)
|
|
||||||
|
|
||||||
# Extract the video ids from the playlist pages
|
# Extract the video ids from the playlist pages
|
||||||
ids = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||||
matches = re.finditer(self._VIDEO_RE, page)
|
# The ids are duplicated
|
||||||
# We remove the duplicates and the link with index 0
|
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
||||||
# (it's not the first video of the playlist)
|
|
||||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||||
@ -1594,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
playlist_title = self._og_search_title(page)
|
playlist_title = self._og_search_title(page)
|
||||||
|
|
||||||
url_results = self._ids_to_results(ids)
|
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||||
|
for vid_id in ids]
|
||||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
@ -1791,6 +1769,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||||
"""
|
"""
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
|
_PAGING_STEP = 30
|
||||||
# use action_load_personal_feed instead of action_load_system_feed
|
# use action_load_personal_feed instead of action_load_system_feed
|
||||||
_PERSONAL_FEED = False
|
_PERSONAL_FEED = False
|
||||||
|
|
||||||
@ -1810,8 +1789,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
feed_entries = []
|
feed_entries = []
|
||||||
paging = 0
|
# The step argument is available only in 2.7 or higher
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(0):
|
||||||
|
paging = i*self._PAGING_STEP
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
@ -1824,7 +1804,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
for video_id in ids)
|
for video_id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
paging = info['paging']
|
|
||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
@ -1844,6 +1823,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
||||||
_FEED_NAME = 'watch_later'
|
_FEED_NAME = 'watch_later'
|
||||||
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
||||||
|
_PAGING_STEP = 100
|
||||||
_PERSONAL_FEED = True
|
_PERSONAL_FEED = True
|
||||||
|
|
||||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||||
@ -1853,6 +1833,13 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
|||||||
_PERSONAL_FEED = True
|
_PERSONAL_FEED = True
|
||||||
_PLAYLIST_TITLE = u'Youtube Watch History'
|
_PLAYLIST_TITLE = u'Youtube Watch History'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
|
||||||
|
data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
|
||||||
|
# The step is actually a ridiculously big number (like 1374343569725646)
|
||||||
|
self._PAGING_STEP = int(data_paging)
|
||||||
|
return super(YoutubeHistoryIE, self)._real_extract(url)
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||||
IE_NAME = u'youtube:favorites'
|
IE_NAME = u'youtube:favorites'
|
||||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||||
|
@ -1,38 +1,24 @@
|
|||||||
# coding: utf-8
|
|
||||||
|
|
||||||
import operator
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
parse_xml_doc,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ZDFIE(InfoExtractor):
|
class ZDFIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
|
|
||||||
u"file": u"2037704.webm",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20131127",
|
|
||||||
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
|
|
||||||
u"uploader": u"spezial",
|
|
||||||
u"title": u"ZDFspezial - Ende des Machtpokers"
|
|
||||||
},
|
|
||||||
u"skip": u"Videos on ZDF.de are depublicised in short order",
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
doc = self._download_xml(
|
info_xml = self._download_webpage(
|
||||||
xml_url, video_id,
|
xml_url, video_id, note=u'Downloading video info')
|
||||||
note=u'Downloading video info',
|
doc = parse_xml_doc(info_xml)
|
||||||
errnote=u'Failed to download video info')
|
|
||||||
|
|
||||||
title = doc.find('.//information/title').text
|
title = doc.find('.//information/title').text
|
||||||
description = doc.find('.//information/detail').text
|
description = doc.find('.//information/detail').text
|
||||||
|
@ -1009,6 +1009,11 @@ def unsmuggle_url(smug_url):
|
|||||||
return url, data
|
return url, data
|
||||||
|
|
||||||
|
|
||||||
|
def parse_xml_doc(s):
|
||||||
|
assert isinstance(s, type(u''))
|
||||||
|
return xml.etree.ElementTree.fromstring(s.encode('utf-8'))
|
||||||
|
|
||||||
|
|
||||||
def format_bytes(bytes):
|
def format_bytes(bytes):
|
||||||
if bytes is None:
|
if bytes is None:
|
||||||
return u'N/A'
|
return u'N/A'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.11.28'
|
__version__ = '2013.11.25.3'
|
||||||
|
Reference in New Issue
Block a user