Compare commits
56 Commits
2014.04.13
...
2014.04.21
Author | SHA1 | Date | |
---|---|---|---|
|
6ea74538e3 | ||
|
24b8924b46 | ||
|
86a3c67112 | ||
|
8be874370d | ||
|
aec74dd95a | ||
|
6890574256 | ||
|
d03745c684 | ||
|
28746fbd59 | ||
|
0321213c11 | ||
|
3f0aae4244 | ||
|
48099643cc | ||
|
621f33c9d0 | ||
|
f07a9f6f43 | ||
|
e51880fd32 | ||
|
88ce273da4 | ||
|
b9ba5dfa28 | ||
|
4086f11929 | ||
|
478c2c6193 | ||
|
d2d6481afb | ||
|
43acb120f3 | ||
|
e8f2025edf | ||
|
a4eb9578af | ||
|
fa35cdad02 | ||
|
d1b9c912a4 | ||
|
edec83a025 | ||
|
c0a7c60815 | ||
|
117a7d1944 | ||
|
a40e0dd434 | ||
|
188b086dd9 | ||
|
1f27d2c0e1 | ||
|
7560096db5 | ||
|
282cb9c7ba | ||
|
3a9d6790ad | ||
|
0610a3e0b2 | ||
|
7f9c31df88 | ||
|
3fa6b6e293 | ||
|
3c50b99ab4 | ||
|
52fadd5fb2 | ||
|
5367fe7f4d | ||
|
427588f6e7 | ||
|
51745be312 | ||
|
d7f1e7c88f | ||
|
4145a257be | ||
|
525dc9809e | ||
|
1bf3210816 | ||
|
e6c6d10d99 | ||
|
f270256e06 | ||
|
f401c6f69f | ||
|
b075d25bed | ||
|
3d1bb6b4dd | ||
|
1db2666916 | ||
|
8f5c0218d8 | ||
|
d7666dff82 | ||
|
2d4c98dbd1 | ||
|
fd50bf623c | ||
|
d360a14678 |
@@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL):
|
|||||||
old_report_warning(message)
|
old_report_warning(message)
|
||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
def gettestcases():
|
|
||||||
|
def gettestcases(include_onlymatching=False):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
t = getattr(ie, '_TEST', None)
|
t = getattr(ie, '_TEST', None)
|
||||||
if t:
|
if t:
|
||||||
t['name'] = type(ie).__name__[:-len('IE')]
|
assert not hasattr(ie, '_TESTS'), \
|
||||||
yield t
|
'%s has _TEST and _TESTS' % type(ie).__name__
|
||||||
for t in getattr(ie, '_TESTS', []):
|
tests = [t]
|
||||||
|
else:
|
||||||
|
tests = getattr(ie, '_TESTS', [])
|
||||||
|
for t in tests:
|
||||||
|
if not include_onlymatching and t.get('only_matching', False):
|
||||||
|
continue
|
||||||
t['name'] = type(ie).__name__[:-len('IE')]
|
t['name'] = type(ie).__name__[:-len('IE')]
|
||||||
yield t
|
yield t
|
||||||
|
|
||||||
|
@@ -77,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
|
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
|
||||||
|
|
||||||
def test_justintv_videoid_matching(self):
|
def test_justintv_videoid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
|
||||||
|
|
||||||
def test_justin_tv_chapterid_matching(self):
|
def test_justin_tv_chapterid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
@@ -106,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in gettestcases():
|
for tc in gettestcases(include_onlymatching=True):
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
@@ -176,5 +176,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||||
['Yahoo'])
|
['Yahoo'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -192,8 +192,8 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
||||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
||||||
self.assertTrue(len(result['entries']) >= 36)
|
self.assertTrue(len(result['entries']) >= 23)
|
||||||
|
|
||||||
def test_ivi_compilation_season(self):
|
def test_ivi_compilation_season(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = IviCompilationIE(dl)
|
ie = IviCompilationIE(dl)
|
||||||
@@ -201,7 +201,7 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
||||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||||
self.assertTrue(len(result['entries']) >= 20)
|
self.assertTrue(len(result['entries']) >= 7)
|
||||||
|
|
||||||
def test_imdb_list(self):
|
def test_imdb_list(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
|
@@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 28)
|
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
def test_list_subtitles(self):
|
||||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
|
|||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
from .bbccouk import BBCCoUkIE
|
from .bbccouk import BBCCoUkIE
|
||||||
|
from .bilibili import BiliBiliIE
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
@@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE
|
|||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
|
from .clubic import ClubicIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
from .cnet import CNETIE
|
from .cnet import CNETIE
|
||||||
from .cnn import (
|
from .cnn import (
|
||||||
@@ -181,6 +183,7 @@ from .nfb import NFBIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .noco import NocoIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
@@ -251,6 +254,7 @@ from .tf1 import TF1IE
|
|||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
|
from .tlc import TlcIE, TlcDeIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
@@ -293,6 +297,7 @@ from .vine import VineIE
|
|||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
|
from .vuclip import VuClipIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .wdr import (
|
from .wdr import (
|
||||||
|
@@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
return self._extract_from_webpage(webpage, video_id, lang)
|
return self._extract_from_webpage(webpage, video_id, lang)
|
||||||
|
|
||||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
json_url = self._html_search_regex(
|
||||||
|
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||||
|
106
youtube_dl/extractor/bilibili.py
Normal file
106
youtube_dl/extractor/bilibili.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_parse_qs,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BiliBiliIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||||
|
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1074402',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '【金坷垃】金泡沫',
|
||||||
|
'duration': 308,
|
||||||
|
'upload_date': '20140420',
|
||||||
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_code = self._search_regex(
|
||||||
|
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'media:title', video_code, 'title', fatal=True)
|
||||||
|
duration_str = self._html_search_meta(
|
||||||
|
'duration', video_code, 'duration')
|
||||||
|
if duration_str is None:
|
||||||
|
duration = None
|
||||||
|
else:
|
||||||
|
duration_mobj = re.match(
|
||||||
|
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
|
||||||
|
duration_str)
|
||||||
|
duration = (
|
||||||
|
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
|
||||||
|
int(duration_mobj.group('minutes')) * 60 +
|
||||||
|
int(duration_mobj.group('seconds')))
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'uploadDate', video_code, fatal=False))
|
||||||
|
thumbnail = self._html_search_meta(
|
||||||
|
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
player_params = compat_parse_qs(self._html_search_regex(
|
||||||
|
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
|
||||||
|
webpage, 'player params'))
|
||||||
|
|
||||||
|
if 'cid' in player_params:
|
||||||
|
cid = player_params['cid'][0]
|
||||||
|
|
||||||
|
lq_doc = self._download_xml(
|
||||||
|
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
||||||
|
video_id,
|
||||||
|
note='Downloading LQ video info'
|
||||||
|
)
|
||||||
|
lq_durl = lq_doc.find('.//durl')
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'lq',
|
||||||
|
'quality': 1,
|
||||||
|
'url': lq_durl.find('./url').text,
|
||||||
|
'filesize': int_or_none(
|
||||||
|
lq_durl.find('./size'), get_attr='text'),
|
||||||
|
}]
|
||||||
|
|
||||||
|
hq_doc = self._download_xml(
|
||||||
|
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
||||||
|
video_id,
|
||||||
|
note='Downloading HQ video info',
|
||||||
|
fatal=False,
|
||||||
|
)
|
||||||
|
if hq_doc is not False:
|
||||||
|
hq_durl = hq_doc.find('.//durl')
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'hq',
|
||||||
|
'quality': 2,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': hq_durl.find('./url').text,
|
||||||
|
'filesize': int_or_none(
|
||||||
|
hq_durl.find('./size'), get_attr='text'),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -1,4 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,46 +10,56 @@ from ..utils import unified_strdate
|
|||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||||
IE_NAME = u'canalplus.fr'
|
IE_NAME = 'canalplus.fr'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||||
u'file': u'922470.flv',
|
'md5': '60c29434a416a83c15dae2587d47027d',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Zapping - 26/08/13',
|
'id': '922470',
|
||||||
u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
'ext': 'flv',
|
||||||
u'upload_date': u'20130826',
|
'title': 'Zapping - 26/08/13',
|
||||||
},
|
'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
||||||
u'params': {
|
'upload_date': '20130826',
|
||||||
u'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.groupdict().get('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
|
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
doc = self._download_xml(info_url,video_id,
|
doc = self._download_xml(info_url, video_id, 'Downloading video XML')
|
||||||
u'Downloading video info')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
formats = [media.find('VIDEOS/%s' % format)
|
infos = video_info.find('INFOS')
|
||||||
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
|
|
||||||
video_url = [format.text for format in formats if format is not None][-1]
|
|
||||||
|
|
||||||
return {'id': video_id,
|
preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']
|
||||||
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
|
||||||
infos.find('TITRAGE/SOUS_TITRE').text),
|
formats = [
|
||||||
'url': video_url,
|
{
|
||||||
'ext': 'flv',
|
'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text,
|
||||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
'format_id': fmt.tag,
|
||||||
'thumbnail': media.find('IMAGES/GRAND').text,
|
'ext': 'mp4' if fmt.tag == 'HLS' else 'flv',
|
||||||
'description': infos.find('DESCRIPTION').text,
|
'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1,
|
||||||
'view_count': int(infos.find('NB_VUES').text),
|
} for fmt in media.find('VIDEOS') if fmt.text
|
||||||
}
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||||
|
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||||
|
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||||
|
'thumbnail': media.find('IMAGES/GRAND').text,
|
||||||
|
'description': infos.find('DESCRIPTION').text,
|
||||||
|
'view_count': int(infos.find('NB_VUES').text),
|
||||||
|
'like_count': int(infos.find('NB_LIKES').text),
|
||||||
|
'comment_count': int(infos.find('NB_COMMENTS').text),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
58
youtube_dl/extractor/clubic.py
Normal file
58
youtube_dl/extractor/clubic.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClubicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||||
|
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '448474',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
|
||||||
|
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||||
|
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
|
||||||
|
config_json = self._search_regex(
|
||||||
|
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||||
|
'configuration')
|
||||||
|
config = json.loads(config_json)
|
||||||
|
|
||||||
|
video_info = config['videoInfo']
|
||||||
|
sources = config['sources']
|
||||||
|
quality_order = qualities(['sd', 'hq'])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': src['streamQuality'],
|
||||||
|
'url': src['src'],
|
||||||
|
'quality': quality_order(src['streamQuality']),
|
||||||
|
} for src in sources]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_info['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(video_info.get('description')),
|
||||||
|
'thumbnail': config.get('poster'),
|
||||||
|
}
|
@@ -33,7 +33,7 @@ class CNETIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
data_json = self._html_search_regex(
|
data_json = self._html_search_regex(
|
||||||
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
|
||||||
webpage, 'data json')
|
webpage, 'data json')
|
||||||
data = json.loads(data_json)
|
data = json.loads(data_json)
|
||||||
vdata = data['video']
|
vdata = data['video']
|
||||||
|
@@ -279,9 +279,12 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Unable to download XML',
|
note=u'Downloading XML', errnote=u'Unable to download XML',
|
||||||
transform_source=None):
|
transform_source=None, fatal=True):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
xml_string = self._download_webpage(
|
||||||
|
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||||
|
if xml_string is False:
|
||||||
|
return xml_string
|
||||||
if transform_source:
|
if transform_source:
|
||||||
xml_string = transform_source(xml_string)
|
xml_string = transform_source(xml_string)
|
||||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
@@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'glamour': 'Glamour',
|
'glamour': 'Glamour',
|
||||||
'wmagazine': 'W Magazine',
|
'wmagazine': 'W Magazine',
|
||||||
'vanityfair': 'Vanity Fair',
|
'vanityfair': 'Vanity Fair',
|
||||||
|
'cnevids': 'Condé Nast',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
|
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
'file': '5171b343c2b4c00dd0c1ccb3.mp4',
|
|
||||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5171b343c2b4c00dd0c1ccb3',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '3D Printed Speakers Lit With LED',
|
'title': '3D Printed Speakers Lit With LED',
|
||||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||||
}
|
}
|
||||||
@@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage):
|
def _extract_video(self, webpage, url_type):
|
||||||
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
|
if url_type != 'embed':
|
||||||
r'<div class="video-post-content">(.+?)</div>',
|
description = self._html_search_regex(
|
||||||
],
|
[
|
||||||
webpage, 'description',
|
r'<div class="cne-video-description">(.+?)</div>',
|
||||||
fatal=False, flags=re.DOTALL)
|
r'<div class="video-post-content">(.+?)</div>',
|
||||||
|
],
|
||||||
|
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||||
|
else:
|
||||||
|
description = None
|
||||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||||
'player params', flags=re.DOTALL)
|
'player params', flags=re.DOTALL)
|
||||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
||||||
@@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
site = mobj.group('site')
|
site = mobj.group('site')
|
||||||
url_type = mobj.group('type')
|
url_type = mobj.group('type')
|
||||||
id = mobj.group('id')
|
item_id = mobj.group('id')
|
||||||
|
|
||||||
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, item_id)
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
return self._extract_video(webpage)
|
return self._extract_video(webpage, url_type)
|
||||||
|
@@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_str,
|
compat_str,
|
||||||
get_element_by_id,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
import os
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,18 +9,23 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExtremeTubeIE(InfoExtractor):
|
class ExtremeTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||||
u'file': u'652431.mp4',
|
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||||
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '652431',
|
||||||
u"title": u"Music Video 14 british euro brit european cumshots swallow",
|
'ext': 'mp4',
|
||||||
u"uploader": u"unknown",
|
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||||
u"age_limit": 18,
|
'uploader': 'unknown',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
|
video_title = self._html_search_regex(
|
||||||
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
|
r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
uploader = self._html_search_regex(
|
||||||
|
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
|
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
format = "-".join(format)
|
format = "-".join(format)
|
||||||
|
|
||||||
@@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
check_form = {
|
check_form = {
|
||||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
|
||||||
'name_action_selected': 'dont_save',
|
'name_action_selected': 'dont_save',
|
||||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
|
||||||
}
|
}
|
||||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
@@ -5,6 +5,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -16,16 +17,28 @@ class FiveMinIE(InfoExtractor):
|
|||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
{
|
||||||
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||||
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||||
'info_dict': {
|
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||||
'id': '518013791',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '518013791',
|
||||||
'title': 'iPad Mini with Retina Display Review',
|
'ext': 'mp4',
|
||||||
|
'title': 'iPad Mini with Retina Display Review',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
{
|
||||||
|
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||||
|
'url': '5min:518086247',
|
||||||
|
'md5': 'e539a9dd682c288ef5a498898009f69e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518086247',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How to Make a Next-Level Fruit Salad',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_result(cls, video_id):
|
def _build_result(cls, video_id):
|
||||||
@@ -34,9 +47,19 @@ class FiveMinIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||||
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
'Downloading embed page')
|
||||||
|
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||||
|
query = compat_urllib_parse.urlencode({
|
||||||
|
'func': 'GetResults',
|
||||||
|
'playlist': video_id,
|
||||||
|
'sid': sid,
|
||||||
|
'isPlayerSeed': 'true',
|
||||||
|
'url': embed_url,
|
||||||
|
})
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
|
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
|
||||||
'playlist=%s&url=https' % video_id,
|
|
||||||
video_id)['binding'][0]
|
video_id)['binding'][0]
|
||||||
|
|
||||||
second_id = compat_str(int(video_id[:-2]) + 1)
|
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||||
|
@@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
'file': '13601338388002.mp4',
|
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '13601338388002',
|
||||||
|
'ext': 'mp4',
|
||||||
'uploader': 'www.hodiho.fr',
|
'uploader': 'www.hodiho.fr',
|
||||||
'title': 'R\u00e9gis plante sa Jeep',
|
'title': 'R\u00e9gis plante sa Jeep',
|
||||||
}
|
}
|
||||||
@@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'add_ie': ['Bandcamp'],
|
'add_ie': ['Bandcamp'],
|
||||||
'url': 'http://bronyrock.com/track/the-pony-mash',
|
'url': 'http://bronyrock.com/track/the-pony-mash',
|
||||||
'file': '3235767654.mp3',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3235767654',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'The Pony Mash',
|
'title': 'The Pony Mash',
|
||||||
'uploader': 'M_Pallante',
|
'uploader': 'M_Pallante',
|
||||||
},
|
},
|
||||||
@@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
# https://github.com/rg3/youtube-dl/issues/2253
|
# https://github.com/rg3/youtube-dl/issues/2253
|
||||||
'url': 'http://bcove.me/i6nfkrc3',
|
'url': 'http://bcove.me/i6nfkrc3',
|
||||||
'file': '3101154703001.mp4',
|
|
||||||
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3101154703001',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Still no power',
|
'title': 'Still no power',
|
||||||
'uploader': 'thestar.com',
|
'uploader': 'thestar.com',
|
||||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||||
@@ -236,6 +239,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'uploader_id': 'rbctv_2012_4',
|
'uploader_id': 'rbctv_2012_4',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Condé Nast embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||||
|
'md5': 'ba0dfe966fa007657bd1443ee672db0f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '53501be369702d3275860000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@@ -482,6 +495,22 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result(mobj.group(1), 'BlipTV')
|
return self.url_result(mobj.group(1), 'BlipTV')
|
||||||
|
|
||||||
|
# Look for embedded condenast player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
||||||
|
webpage)
|
||||||
|
if matches:
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': [{
|
||||||
|
'_type': 'url',
|
||||||
|
'ie_key': 'CondeNast',
|
||||||
|
'url': ma,
|
||||||
|
} for ma in matches],
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -502,7 +531,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
# Look for Aparat videos
|
# Look for Aparat videos
|
||||||
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group(1), 'Aparat')
|
return self.url_result(mobj.group(1), 'Aparat')
|
||||||
|
|
||||||
|
@@ -106,7 +106,7 @@ class OneUPIE(IGNIE):
|
|||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://gamevideos.1up.com/video/id/34976',
|
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||||
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -115,10 +115,7 @@ class OneUPIE(IGNIE):
|
|||||||
'title': 'Sniper Elite V2 - Trailer',
|
'title': 'Sniper Elite V2 - Trailer',
|
||||||
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
|
||||||
# Override IGN tests
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -11,16 +11,15 @@ from ..utils import (
|
|||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
||||||
"file": "12-jan-pythonthings.mp4",
|
'info_dict': {
|
||||||
"info_dict": {
|
'id': '12-jan-pythonthings',
|
||||||
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
|
'ext': 'mp4',
|
||||||
"title": "A Few of My Favorite [Python] Things",
|
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
||||||
},
|
'title': 'A Few of My Favorite [Python] Things',
|
||||||
"params": {
|
|
||||||
"skip_download": True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||||
|
video_description = self._html_search_meta('description', webpage, 'description')
|
||||||
|
|
||||||
|
# The server URL is hardcoded
|
||||||
|
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(
|
||||||
|
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
||||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
|
playpath = 'mp4:' + real_id
|
||||||
|
|
||||||
# Extract title
|
video_filename = playpath.split('/')[-1]
|
||||||
video_title = self._search_regex(r'contentTitle = "(.*?)";',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
# Extract description
|
|
||||||
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
|
|
||||||
video_filename = video_url.split('/')[-1]
|
|
||||||
video_id, extension = video_filename.split('.')
|
video_id, extension = video_filename.split('.')
|
||||||
|
|
||||||
|
http_base = self._search_regex(
|
||||||
|
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
|
||||||
|
'HTTP base URL')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'play_path': playpath,
|
||||||
|
}, {
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': http_base + real_id,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'r303r',
|
'id': 'r303r',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
@@ -1,15 +1,18 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MDRIE(InfoExtractor):
|
class MDRIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
|
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
|
||||||
|
|
||||||
# No tests, MDR regularily deletes its videos
|
# No tests, MDR regularily deletes its videos
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@@ -19,9 +22,9 @@ class MDRIE(InfoExtractor):
|
|||||||
# determine title and media streams from webpage
|
# determine title and media streams from webpage
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
|
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
|
||||||
xmlurl = self._search_regex(
|
xmlurl = self._search_regex(
|
||||||
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
|
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
|
||||||
|
|
||||||
doc = self._download_xml(domain + xmlurl, video_id)
|
doc = self._download_xml(domain + xmlurl, video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@@ -41,7 +44,7 @@ class MDRIE(InfoExtractor):
|
|||||||
if vbr_el is None:
|
if vbr_el is None:
|
||||||
format.update({
|
format.update({
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'format_id': u'%s-%d' % (media_type, abr),
|
'format_id': '%s-%d' % (media_type, abr),
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
vbr = int(vbr_el.text) // 1000
|
vbr = int(vbr_el.text) // 1000
|
||||||
@@ -49,12 +52,9 @@ class MDRIE(InfoExtractor):
|
|||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': int(a.find('frameWidth').text),
|
'width': int(a.find('frameWidth').text),
|
||||||
'height': int(a.find('frameHeight').text),
|
'height': int(a.find('frameHeight').text),
|
||||||
'format_id': u'%s-%d' % (media_type, vbr),
|
'format_id': '%s-%d' % (media_type, vbr),
|
||||||
})
|
})
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
if not formats:
|
|
||||||
raise ExtractorError(u'Could not find any valid formats')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -1,8 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import str_to_int
|
||||||
|
|
||||||
|
|
||||||
class NineGagIE(InfoExtractor):
|
class NineGagIE(InfoExtractor):
|
||||||
@@ -44,23 +46,14 @@ class NineGagIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
youtube_id = self._html_search_regex(
|
post_view = json.loads(self._html_search_regex(
|
||||||
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
|
r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
|
||||||
webpage, 'video ID')
|
|
||||||
title = self._html_search_regex(
|
youtube_id = post_view['videoExternalId']
|
||||||
r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
|
title = post_view['title']
|
||||||
webpage, 'title', default=None)
|
description = post_view['description']
|
||||||
if not title:
|
view_count = str_to_int(post_view['externalView'])
|
||||||
title = self._og_search_title(webpage)
|
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
|
|
||||||
'description', fatal=False)
|
|
||||||
view_count_str = self._html_search_regex(
|
|
||||||
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
|
|
||||||
fatal=False)
|
|
||||||
view_count = (
|
|
||||||
None if view_count_str is None
|
|
||||||
else int(view_count_str.replace(',', '')))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
@@ -71,5 +64,5 @@ class NineGagIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
105
youtube_dl/extractor/noco.py
Normal file
105
youtube_dl/extractor/noco.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NocoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||||
|
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ami Ami Idol - Hello! France',
|
||||||
|
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||||
|
'upload_date': '20140412',
|
||||||
|
'uploader': 'Nolife',
|
||||||
|
'uploader_id': 'NOL',
|
||||||
|
'duration': 2851.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
medias = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for fmt in medias['fr']['video_list']['default']['quality_list']:
|
||||||
|
format_id = fmt['quality_key']
|
||||||
|
|
||||||
|
file = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||||
|
video_id, 'Downloading %s video JSON' % format_id)
|
||||||
|
|
||||||
|
file_url = file['file']
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if file_url == 'forbidden':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (
|
||||||
|
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': file_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': fmt['res_width'],
|
||||||
|
'height': fmt['res_lines'],
|
||||||
|
'abr': fmt['audiobitrate'],
|
||||||
|
'vbr': fmt['videobitrate'],
|
||||||
|
'filesize': fmt['filesize'],
|
||||||
|
'format_note': fmt['quality_name'],
|
||||||
|
'preference': fmt['priority'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
show = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||||
|
|
||||||
|
upload_date = unified_strdate(show['indexed'])
|
||||||
|
uploader = show['partner_name']
|
||||||
|
uploader_id = show['partner_key']
|
||||||
|
duration = show['duration_ms'] / 1000.0
|
||||||
|
thumbnail = show['screenshot']
|
||||||
|
|
||||||
|
episode = show.get('show_TT') or show.get('show_OT')
|
||||||
|
family = show.get('family_TT') or show.get('family_OT')
|
||||||
|
episode_number = show.get('episode_number')
|
||||||
|
|
||||||
|
title = ''
|
||||||
|
if family:
|
||||||
|
title += family
|
||||||
|
if episode_number:
|
||||||
|
title += ' #' + compat_str(episode_number)
|
||||||
|
if episode:
|
||||||
|
title += ' - ' + episode
|
||||||
|
|
||||||
|
description = show.get('show_resume') or show.get('family_resume')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -6,22 +6,36 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class PodomaticIE(InfoExtractor):
|
class PodomaticIE(InfoExtractor):
|
||||||
IE_NAME = 'podomatic'
|
IE_NAME = 'podomatic'
|
||||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
{
|
||||||
"file": "2009-01-02T16_03_35-08_00.mp3",
|
'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
|
||||||
"md5": "84bb855fcf3429e6bf72460e1eed782d",
|
'md5': '84bb855fcf3429e6bf72460e1eed782d',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"uploader": "Science Teaching Tips",
|
'id': '2009-01-02T16_03_35-08_00',
|
||||||
"uploader_id": "scienceteachingtips",
|
'ext': 'mp3',
|
||||||
"title": "64. When the Moon Hits Your Eye",
|
'uploader': 'Science Teaching Tips',
|
||||||
"duration": 446,
|
'uploader_id': 'scienceteachingtips',
|
||||||
}
|
'title': '64. When the Moon Hits Your Eye',
|
||||||
}
|
'duration': 446,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
|
||||||
|
'md5': 'd2cf443931b6148e27638650e2638297',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2013-11-15T16_31_21-08_00',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'uploader': 'Ostbahnhof / Techno Mix',
|
||||||
|
'uploader_id': 'ostbahnhof',
|
||||||
|
'title': 'Einunddreizig',
|
||||||
|
'duration': 3799,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor):
|
|||||||
'?permalink=true&rtmp=0') %
|
'?permalink=true&rtmp=0') %
|
||||||
(mobj.group('proto'), channel, video_id))
|
(mobj.group('proto'), channel, video_id))
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
json_url, video_id, note=u'Downloading video info')
|
json_url, video_id, 'Downloading video info')
|
||||||
data = json.loads(data_json)
|
data = json.loads(data_json)
|
||||||
|
|
||||||
video_url = data['downloadLink']
|
video_url = data['downloadLink']
|
||||||
|
if not video_url:
|
||||||
|
video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
|
||||||
uploader = data['podcast']
|
uploader = data['podcast']
|
||||||
title = data['title']
|
title = data['title']
|
||||||
thumbnail = data['imageLocation']
|
thumbnail = data['imageLocation']
|
||||||
|
@@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor):
|
|||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
trackinfo = self._download_json(
|
|
||||||
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading trackinfo JSON')
|
|
||||||
|
|
||||||
# Some videos don't have the author field
|
# Some videos don't have the author field
|
||||||
author = trackinfo.get('author') or {}
|
author = video.get('author') or {}
|
||||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
|
||||||
|
options = self._download_json(
|
||||||
|
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||||
|
video_id, 'Downloading options JSON')
|
||||||
|
|
||||||
|
m3u8_url = options['video_balancer'].get('m3u8')
|
||||||
if m3u8_url is None:
|
if m3u8_url is None:
|
||||||
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
||||||
|
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,78 +10,114 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class SteamIE(InfoExtractor):
|
class SteamIE(InfoExtractor):
|
||||||
_VALID_URL = r"""http://store\.steampowered\.com/
|
_VALID_URL = r"""(?x)
|
||||||
(agecheck/)?
|
https?://store\.steampowered\.com/
|
||||||
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
(agecheck/)?
|
||||||
(?P<gameID>\d+)/?
|
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
||||||
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
|
(?P<gameID>\d+)/?
|
||||||
"""
|
(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
|
||||||
|
|
|
||||||
|
https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
|
||||||
|
"""
|
||||||
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
|
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
|
||||||
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
|
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u"url": u"http://store.steampowered.com/video/105600/",
|
"url": "http://store.steampowered.com/video/105600/",
|
||||||
u"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
u"file": u"81300.flv",
|
"md5": "f870007cee7065d7c76b88f0a45ecc07",
|
||||||
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
|
"info_dict": {
|
||||||
u"info_dict": {
|
'id': '81300',
|
||||||
u"title": u"Terraria 1.1 Trailer",
|
'ext': 'flv',
|
||||||
u'playlist_index': 1,
|
"title": "Terraria 1.1 Trailer",
|
||||||
|
'playlist_index': 1,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"80859.flv",
|
"md5": "61aaf31a5c5c3041afb58fb83cbb5751",
|
||||||
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
|
"info_dict": {
|
||||||
u"info_dict": {
|
'id': '80859',
|
||||||
u"title": u"Terraria Trailer",
|
'ext': 'flv',
|
||||||
u'playlist_index': 2,
|
"title": "Terraria Trailer",
|
||||||
|
'playlist_index': 2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
'params': {
|
||||||
|
'playlistend': 2,
|
||||||
|
}
|
||||||
@classmethod
|
}, {
|
||||||
def suitable(cls, url):
|
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
'info_dict': {
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
'id': 'WB5DvDOOvAY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20140329',
|
||||||
|
'title': 'FRONTIERS - Final Greenlight Trailer',
|
||||||
|
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
|
||||||
|
'uploader': 'AAD Productions',
|
||||||
|
'uploader_id': 'AtomicAgeDogGames',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
m = re.match(self._VALID_URL, url)
|
||||||
gameID = m.group('gameID')
|
fileID = m.group('fileID')
|
||||||
|
if fileID:
|
||||||
videourl = self._VIDEO_PAGE_TEMPLATE % gameID
|
videourl = url
|
||||||
webpage = self._download_webpage(videourl, gameID)
|
playlist_id = fileID
|
||||||
|
else:
|
||||||
|
gameID = m.group('gameID')
|
||||||
|
playlist_id = gameID
|
||||||
|
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
|
||||||
|
webpage = self._download_webpage(videourl, playlist_id)
|
||||||
|
|
||||||
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
||||||
videourl = self._AGECHECK_TEMPLATE % gameID
|
videourl = self._AGECHECK_TEMPLATE % playlist_id
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
webpage = self._download_webpage(videourl, gameID)
|
webpage = self._download_webpage(videourl, playlist_id)
|
||||||
|
|
||||||
self.report_extraction(gameID)
|
if fileID:
|
||||||
game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
|
playlist_title = self._html_search_regex(
|
||||||
webpage, 'game title')
|
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
|
||||||
|
mweb = re.finditer(r'''(?x)
|
||||||
|
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||||
|
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
|
||||||
|
''', webpage)
|
||||||
|
videos = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': vid.group('youtube_id'),
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
} for vid in mweb]
|
||||||
|
else:
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
|
||||||
|
|
||||||
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
|
mweb = re.finditer(r'''(?x)
|
||||||
mweb = re.finditer(urlRE, webpage)
|
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||||
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
|
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
|
||||||
titles = re.finditer(namesRE, webpage)
|
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
|
||||||
thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
|
''', webpage)
|
||||||
thumbs = re.finditer(thumbsRE, webpage)
|
titles = re.finditer(
|
||||||
videos = []
|
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
|
||||||
for vid,vtitle,thumb in zip(mweb,titles,thumbs):
|
thumbs = re.finditer(
|
||||||
video_id = vid.group('videoID')
|
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
|
||||||
title = vtitle.group('videoName')
|
videos = []
|
||||||
video_url = vid.group('videoURL')
|
|
||||||
video_thumb = thumb.group('thumbnail')
|
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
|
||||||
if not video_url:
|
video_id = vid.group('videoID')
|
||||||
raise ExtractorError(u'Cannot find video url for %s' % video_id)
|
title = vtitle.group('videoName')
|
||||||
info = {
|
video_url = vid.group('videoURL')
|
||||||
'id':video_id,
|
video_thumb = thumb.group('thumbnail')
|
||||||
'url':video_url,
|
if not video_url:
|
||||||
'ext': 'flv',
|
raise ExtractorError('Cannot find video url for %s' % video_id)
|
||||||
'title': unescapeHTML(title),
|
videos.append({
|
||||||
'thumbnail': video_thumb
|
'id': video_id,
|
||||||
}
|
'url': video_url,
|
||||||
videos.append(info)
|
'ext': 'flv',
|
||||||
return [self.playlist_result(videos, gameID, game_title)]
|
'title': unescapeHTML(title),
|
||||||
|
'thumbnail': video_thumb
|
||||||
|
})
|
||||||
|
if not videos:
|
||||||
|
raise ExtractorError('Could not find any videos')
|
||||||
|
|
||||||
|
return self.playlist_result(videos, playlist_id, playlist_title)
|
||||||
|
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(InfoExtractor):
|
||||||
|
@@ -49,6 +49,19 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1972',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Be passionate. Be courageous. Be your best.',
|
||||||
|
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||||
|
'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_NATIVE_FORMATS = {
|
_NATIVE_FORMATS = {
|
||||||
@@ -84,7 +97,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
playlist_info = info['playlist']
|
playlist_info = info['playlist']
|
||||||
|
|
||||||
playlist_entries = [
|
playlist_entries = [
|
||||||
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||||
for talk in info['talks']
|
for talk in info['talks']
|
||||||
]
|
]
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
@@ -102,11 +115,23 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format': format_id,
|
'format': format_id,
|
||||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
|
||||||
for f in formats:
|
if formats:
|
||||||
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
for f in formats:
|
||||||
if finfo:
|
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
||||||
f.update(finfo)
|
if finfo:
|
||||||
|
f.update(finfo)
|
||||||
|
else:
|
||||||
|
# Use rtmp downloads
|
||||||
|
formats = [{
|
||||||
|
'format_id': f['name'],
|
||||||
|
'url': talk_info['streamer'],
|
||||||
|
'play_path': f['file'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'width': f['width'],
|
||||||
|
'height': f['height'],
|
||||||
|
'tbr': f['bitrate'],
|
||||||
|
} for f in talk_info['resources']['rtmp']]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_id = compat_str(talk_info['id'])
|
video_id = compat_str(talk_info['id'])
|
||||||
@@ -138,7 +163,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
sub_lang_list[l] = url
|
sub_lang_list[l] = url
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _watch_info(self, url, name):
|
def _watch_info(self, url, name):
|
||||||
@@ -153,7 +178,10 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
[
|
||||||
|
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||||
|
r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
|
||||||
|
],
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
60
youtube_dl/extractor/tlc.py
Normal file
60
youtube_dl/extractor/tlc.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
|
from .discovery import DiscoveryIE
|
||||||
|
|
||||||
|
|
||||||
|
class TlcIE(DiscoveryIE):
|
||||||
|
IE_NAME = 'tlc.com'
|
||||||
|
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
|
||||||
|
'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853232',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cake Boss: Too Big to Fly',
|
||||||
|
'description': 'Buddy has taken on a high flying task.',
|
||||||
|
'duration': 119,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TlcDeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tlc.de'
|
||||||
|
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3235167922001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Breaking Amish: Die Welt da draußen',
|
||||||
|
'uploader': 'Discovery Networks - Germany',
|
||||||
|
'description': 'Vier Amische und eine Mennonitin wagen in New York'
|
||||||
|
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
|
||||||
|
' ihrem spannenden Weg.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
iframe_url = self._search_regex(
|
||||||
|
'<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
|
||||||
|
'iframe url')
|
||||||
|
# Otherwise we don't get the correct 'BrightcoveExperience' element,
|
||||||
|
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
|
||||||
|
iframe_url = iframe_url.replace('.htm?', '.php?')
|
||||||
|
iframe = self._download_webpage(iframe_url, title)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': BrightcoveIE._extract_brightcove_url(iframe),
|
||||||
|
'ie': BrightcoveIE.ie_key(),
|
||||||
|
}
|
66
youtube_dl/extractor/vuclip.py
Normal file
66
youtube_dl/extractor/vuclip.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
parse_duration,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VuClipIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:m)?\.vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
|
||||||
|
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '843902317',
|
||||||
|
'ext': '3gp',
|
||||||
|
'title': 'Movie Trailer: Noah',
|
||||||
|
'duration': 139,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
ad_m = re.search(
|
||||||
|
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
|
||||||
|
if ad_m:
|
||||||
|
urlr = compat_urllib_parse_urlparse(url)
|
||||||
|
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
adfree_url, video_id, note='Download post-ad page')
|
||||||
|
|
||||||
|
links_code = self._search_regex(
|
||||||
|
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
|
||||||
|
'links')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
||||||
|
|
||||||
|
quality_order = qualities(['Reg', 'Hi'])
|
||||||
|
formats = []
|
||||||
|
for url, q in re.findall(
|
||||||
|
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
|
||||||
|
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': url,
|
||||||
|
'quality': quality_order(q),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@@ -14,8 +14,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YahooIE(InfoExtractor):
|
class YahooIE(InfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen'
|
IE_DESC = 'Yahoo screen and movies'
|
||||||
_VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
|
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
@@ -37,6 +37,16 @@ class YahooIE(InfoExtractor):
|
|||||||
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
|
||||||
|
'md5': '410b7104aa9893b765bc22787a22f3d9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The World Loves Spider-Man',
|
||||||
|
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -44,13 +54,20 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
items_json = self._search_regex(
|
||||||
webpage, 'items', flags=re.MULTILINE)
|
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||||
items = json.loads(items_json)
|
default=None)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
if items_json is None:
|
||||||
# The 'meta' field is not always in the video webpage, we request it
|
long_id = self._search_regex(
|
||||||
# from another page
|
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||||
long_id = info['id']
|
webpage, 'content ID')
|
||||||
|
video_id = long_id
|
||||||
|
else:
|
||||||
|
items = json.loads(items_json)
|
||||||
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
|
# The 'meta' field is not always in the video webpage, we request it
|
||||||
|
# from another page
|
||||||
|
long_id = info['id']
|
||||||
return self._get_info(long_id, video_id)
|
return self._get_info(long_id, video_id)
|
||||||
|
|
||||||
def _get_info(self, long_id, video_id):
|
def _get_info(self, long_id, video_id):
|
||||||
@@ -104,7 +121,7 @@ class YahooNewsIE(YahooIE):
|
|||||||
IE_NAME = 'yahoo:news'
|
IE_NAME = 'yahoo:news'
|
||||||
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -113,10 +130,7 @@ class YahooNewsIE(YahooIE):
|
|||||||
'title': 'China Moses Is Crazy About the Blues',
|
'title': 'China Moses Is Crazy About the Blues',
|
||||||
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
# Overwrite YahooIE properties we don't want
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -1419,7 +1419,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
if playlist_id.startswith('RD'):
|
if playlist_id.startswith('RD'):
|
||||||
# Mixes require a custom extraction process
|
# Mixes require a custom extraction process
|
||||||
|
@@ -923,9 +923,6 @@ def _windows_write_string(s, out):
|
|||||||
2: -12,
|
2: -12,
|
||||||
}
|
}
|
||||||
|
|
||||||
def ucs2_len(s):
|
|
||||||
return sum((2 if ord(c) > 0xffff else 1) for c in s)
|
|
||||||
|
|
||||||
fileno = out.fileno()
|
fileno = out.fileno()
|
||||||
if fileno not in WIN_OUTPUT_IDS:
|
if fileno not in WIN_OUTPUT_IDS:
|
||||||
return False
|
return False
|
||||||
@@ -959,13 +956,25 @@ def _windows_write_string(s, out):
|
|||||||
if not_a_console(h):
|
if not_a_console(h):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
remaining = ucs2_len(s)
|
def next_nonbmp_pos(s):
|
||||||
while remaining > 0:
|
try:
|
||||||
|
return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
|
||||||
|
except StopIteration:
|
||||||
|
return len(s)
|
||||||
|
|
||||||
|
while s:
|
||||||
|
count = min(next_nonbmp_pos(s), 1024)
|
||||||
|
|
||||||
ret = WriteConsoleW(
|
ret = WriteConsoleW(
|
||||||
h, s, min(remaining, 1024), ctypes.byref(written), None)
|
h, s, count if count else 2, ctypes.byref(written), None)
|
||||||
if ret == 0:
|
if ret == 0:
|
||||||
raise OSError('Failed to write string')
|
raise OSError('Failed to write string')
|
||||||
remaining -= written.value
|
if not count: # We just wrote a non-BMP character
|
||||||
|
assert written.value == 2
|
||||||
|
s = s[1:]
|
||||||
|
else:
|
||||||
|
assert written.value > 0
|
||||||
|
s = s[written.value:]
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@@ -1236,7 +1245,10 @@ class HEADRequest(compat_urllib_request.Request):
|
|||||||
return "HEAD"
|
return "HEAD"
|
||||||
|
|
||||||
|
|
||||||
def int_or_none(v, scale=1, default=None):
|
def int_or_none(v, scale=1, default=None, get_attr=None):
|
||||||
|
if get_attr:
|
||||||
|
if v is not None:
|
||||||
|
v = getattr(v, get_attr, None)
|
||||||
return default if v is None else (int(v) // scale)
|
return default if v is None else (int(v) // scale)
|
||||||
|
|
||||||
|
|
||||||
@@ -1397,3 +1409,14 @@ US_RATINGS = {
|
|||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||||
|
|
||||||
|
|
||||||
|
def qualities(quality_ids):
|
||||||
|
""" Get a numeric quality value out of a list of possible values """
|
||||||
|
def q(qid):
|
||||||
|
try:
|
||||||
|
return quality_ids.index(qid)
|
||||||
|
except ValueError:
|
||||||
|
return -1
|
||||||
|
return q
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.04.13'
|
__version__ = '2014.04.21.5'
|
||||||
|
Reference in New Issue
Block a user