Compare commits
54 Commits
2014.09.04
...
2014.09.12
Author | SHA1 | Date | |
---|---|---|---|
|
473219a778 | ||
|
aa37e3d486 | ||
|
edb53e2dc3 | ||
|
f401a4173b | ||
|
de00ff6494 | ||
|
3616997929 | ||
|
eef93b0912 | ||
|
08d037309e | ||
|
6024b0f25e | ||
|
561266641f | ||
|
6899f2fe9e | ||
|
136c8bd275 | ||
|
1bf5423e82 | ||
|
2c5c1f48e9 | ||
|
d8e487fbd9 | ||
|
bc7ff0a8dd | ||
|
8e9da53140 | ||
|
f1d15e6dbc | ||
|
91ebb17ede | ||
|
c63b30901b | ||
|
f009f19ece | ||
|
68477e8839 | ||
|
0dc5365564 | ||
|
9face18d08 | ||
|
ff0ba8ce0f | ||
|
759c6293bd | ||
|
3fbeb95e14 | ||
|
6e25f51cdf | ||
|
321c1e44f9 | ||
|
cc7fec5818 | ||
|
5fb9077e8c | ||
|
8e20f81c5b | ||
|
e154762c74 | ||
|
ba92ab3d05 | ||
|
a2f0cdc074 | ||
|
70a1ecd2c1 | ||
|
88a23aef5a | ||
|
140d8d77b3 | ||
|
665cd96929 | ||
|
4d067a58ca | ||
|
1c1cff6a52 | ||
|
f063a04f07 | ||
|
af8812bb9b | ||
|
78149a962b | ||
|
f2d9e3a370 | ||
|
16e6f396b4 | ||
|
c6ec6b2e8b | ||
|
7bbc6428b6 | ||
|
c1a3c9ddb2 | ||
|
feec0f56f5 | ||
|
8029857d27 | ||
|
aa61802c1e | ||
|
f54aee0209 | ||
|
5df921b0e3 |
@@ -43,16 +43,16 @@ class TestCache(unittest.TestCase):
|
||||
})
|
||||
c = Cache(ydl)
|
||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||
self.assertEqual(c.load('test_cache', 'k'), None)
|
||||
c.store('test_cache', 'k', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
c.store('test_cache', 'k.', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k2'), None)
|
||||
self.assertFalse(_is_empty(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k'), obj)
|
||||
self.assertEqual(c.load('test_cache', 'k.'), obj)
|
||||
self.assertEqual(c.load('test_cache', 'y'), None)
|
||||
self.assertEqual(c.load('test_cache2', 'k'), None)
|
||||
self.assertEqual(c.load('test_cache2', 'k.'), None)
|
||||
c.remove()
|
||||
self.assertFalse(os.path.exists(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k'), None)
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -74,6 +74,7 @@ __authors__ = (
|
||||
'Keith Beckman',
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
if tmpfilename != u'-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
|
@@ -67,6 +67,7 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
@@ -139,6 +140,7 @@ from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
@@ -199,6 +201,7 @@ from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .mooshare import MooshareIE
|
||||
@@ -295,6 +298,7 @@ from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -374,6 +378,7 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vh1 import VH1IE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
|
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
@@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
translation_table = {
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
@@ -13,6 +15,10 @@ translation_table = {
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
IE_NAME = 'cliphunter'
|
||||
|
||||
@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'file': '1012420.flv',
|
||||
'md5': '15e7740f30428abf70f4223478dc1225',
|
||||
'md5': 'a2ba71eebf523859fe527a61018f723e',
|
||||
'info_dict': {
|
||||
'id': '1012420',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'duration': 1317,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
pl_fiji = self._search_regex(
|
||||
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
|
||||
pl_c_qual = self._search_regex(
|
||||
r'pl_c_qual = "(.)"', webpage, 'video quality')
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
|
||||
|
||||
video_url = _decode(pl_fiji)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': pl_c_qual,
|
||||
'format_id': 'default-%s' % pl_c_qual,
|
||||
}]
|
||||
|
||||
qualities_json = self._search_regex(
|
||||
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
|
||||
qualities_data = json.loads(qualities_json)
|
||||
|
||||
for i, t in enumerate(
|
||||
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
|
||||
quality_id, crypted_url = t
|
||||
video_url = _decode(crypted_url)
|
||||
f = {
|
||||
'format_id': quality_id,
|
||||
'url': video_url,
|
||||
'quality': i,
|
||||
}
|
||||
if quality_id in qualities_data:
|
||||
qd = qualities_data[quality_id]
|
||||
m = re.match(
|
||||
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
|
||||
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
|
||||
if m:
|
||||
f['width'] = int(m.group('width'))
|
||||
f['height'] = int(m.group('height'))
|
||||
f['tbr'] = int(m.group('tbr'))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
74
youtube_dl/extractor/dbtv.py
Normal file
74
youtube_dl/extractor/dbtv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
@@ -628,7 +628,7 @@ class GenericIE(InfoExtractor):
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
|
84
youtube_dl/extractor/hostingbulk.py
Normal file
84
youtube_dl/extractor/hostingbulk.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HostingBulkIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hostingbulk\.com/
|
||||
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
|
||||
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
|
||||
_TEST = {
|
||||
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
|
||||
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
|
||||
'info_dict': {
|
||||
'id': 'n0ulw1hv20fm',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5afeba33f48ec87219c269e054afd622',
|
||||
'filesize': 6816081,
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
|
||||
|
||||
# Custom request with cookie to set language to English, so our file
|
||||
# deleted regex would work.
|
||||
request = compat_urllib_request.Request(
|
||||
url, headers={'Cookie': 'lang=english'})
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
|
||||
filesize = int_or_none(
|
||||
self._search_regex(
|
||||
r'<small>\((\d+)\sbytes?\)</small>',
|
||||
webpage,
|
||||
'filesize',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
response = self._request_webpage(request, video_id,
|
||||
'Submiting download request')
|
||||
video_url = response.geturl()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'filesize': filesize,
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -9,29 +9,50 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:(?:www|m)\.)?izlesene\.com/
|
||||
(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
|
||||
'md5': '97f09b6872bffa284cb7fa4f6910cb72',
|
||||
'info_dict': {
|
||||
'id': '17997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tarkan Dortmund 2006 Konseri',
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': 1163318593,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
|
||||
r"adduserUsername\s*=\s*'([^']+)';",
|
||||
webpage, 'uploader', fatal=False, default='')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"',
|
||||
webpage, 'duration', fatal=False), scale=1000)
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
|
||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||
webpage, 'comment_count', fatal=False)
|
||||
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
||||
@@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):
|
||||
ext = determine_ext(content_url, 'mp4')
|
||||
|
||||
# Might be empty for some videos.
|
||||
qualities = self._html_search_regex(
|
||||
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
|
||||
streams = self._html_search_regex(
|
||||
r'"qualitylevel"\s*:\s*"([^"]+)"',
|
||||
webpage, 'streams', fatal=False, default='')
|
||||
|
||||
formats = []
|
||||
for quality in qualities.split('|'):
|
||||
json = self._download_json(
|
||||
self._STREAM_URL.format(id=video_id, format=quality), video_id,
|
||||
note='Getting video URL for "%s" quality' % quality,
|
||||
errnote='Failed to get video URL for "%s" quality' % quality
|
||||
)
|
||||
if streams:
|
||||
for stream in streams.split('|'):
|
||||
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
||||
formats.append({
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
stream_url = self._search_regex(
|
||||
r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
|
||||
formats.append({
|
||||
'url': json.get('streamurl'),
|
||||
'format_id': 'sd',
|
||||
'url': stream_url,
|
||||
'ext': ext,
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
|
||||
_TESTS = [{
|
||||
|
112
youtube_dl/extractor/moevideo.py
Normal file
112
youtube_dl/extractor/moevideo.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MoeVideoIE(InfoExtractor):
|
||||
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:(?:moevideo|playreplay|videochart)\.net))/
|
||||
(?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
|
||||
_API_URL = 'http://api.letitbit.net/'
|
||||
_API_KEY = 'tVL0gjqo5'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
|
||||
'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
|
||||
'info_dict': {
|
||||
'id': '00297.0036103fe3d513ef27915216fd29',
|
||||
'ext': 'flv',
|
||||
'title': 'Sink cut out machine',
|
||||
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 540,
|
||||
'height': 360,
|
||||
'duration': 179,
|
||||
'filesize': 17822500,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
|
||||
'md5': '74f0a014d5b661f0f0e2361300d1620e',
|
||||
'info_dict': {
|
||||
'id': '77107.7f325710a627383d40540d8e991a',
|
||||
'ext': 'flv',
|
||||
'title': 'Operacion Condor.',
|
||||
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 480,
|
||||
'height': 296,
|
||||
'duration': 6027,
|
||||
'filesize': 588257923,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://%s/video/%s' % (mobj.group('host'), video_id),
|
||||
video_id, 'Downloading webpage')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
r = [
|
||||
self._API_KEY,
|
||||
[
|
||||
'preview/flv_link',
|
||||
{
|
||||
'uid': video_id,
|
||||
},
|
||||
],
|
||||
]
|
||||
r_json = json.dumps(r)
|
||||
post = compat_urllib_parse.urlencode({'r': r_json})
|
||||
req = compat_urllib_request.Request(self._API_URL, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_json(req, video_id)
|
||||
if response['status'] != 'OK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, response['data']),
|
||||
expected=True
|
||||
)
|
||||
item = response['data'][0]
|
||||
video_url = item['link']
|
||||
duration = int_or_none(item['length'])
|
||||
width = int_or_none(item['width'])
|
||||
height = int_or_none(item['height'])
|
||||
filesize = int_or_none(item['convert_size'])
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'http_headers': {'Range': 'bytes=0-'}, # Required to download
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'filesize': filesize,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -5,7 +5,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
)
|
||||
@@ -13,6 +16,8 @@ from ..utils import (
|
||||
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
@@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'a': 'login',
|
||||
'cookie': '1',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
|
||||
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
@@ -5,8 +5,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@@ -18,11 +19,12 @@ class NosVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
|
||||
'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
|
||||
_PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
|
||||
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
|
||||
_TEST = {
|
||||
'url': 'http://nosvideo.com/?v=drlp6s40kg54',
|
||||
'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c',
|
||||
'url': 'http://nosvideo.com/?v=mu8fle7g7rpq',
|
||||
'md5': '6124ed47130d8be3eacae635b071e6b6',
|
||||
'info_dict': {
|
||||
'id': 'drlp6s40kg54',
|
||||
'id': 'mu8fle7g7rpq',
|
||||
'ext': 'mp4',
|
||||
'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -38,11 +40,14 @@ class NosVideoIE(InfoExtractor):
|
||||
'op': 'download1',
|
||||
'method_free': 'Continue to Video',
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading download page')
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
|
||||
playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
@@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
91
youtube_dl/extractor/sharesix.py
Normal file
91
youtube_dl/extractor/sharesix.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ShareSixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://sharesix.com/f/OXjQ7Y6',
|
||||
'md5': '9e8e95d8823942815a7d7c773110cc93',
|
||||
'info_dict': {
|
||||
'id': 'OXjQ7Y6',
|
||||
'ext': 'mp4',
|
||||
'title': 'big_buck_bunny_480p_surround-fix.avi',
|
||||
'duration': 596,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sharesix.com/lfrwoxp35zdd',
|
||||
'md5': 'dd19f1435b7cec2d7912c64beeee8185',
|
||||
'info_dict': {
|
||||
'id': 'lfrwoxp35zdd',
|
||||
'ext': 'flv',
|
||||
'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
|
||||
'duration': 65,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
fields = {
|
||||
'method_free': 'Free'
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading video page')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
|
||||
duration = parse_duration(
|
||||
self._search_regex(
|
||||
r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
|
||||
webpage,
|
||||
'duration',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
|
||||
m = re.search(
|
||||
r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
|
||||
<dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
|
||||
webpage
|
||||
)
|
||||
width = height = None
|
||||
if m:
|
||||
width, height = int(m.group('width')), int(m.group('height'))
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
u'playlist': [
|
||||
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
u'file': u'57758.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'file': u'57758-slides.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758-slides',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
talk_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, talk_id)
|
||||
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||
u'rtmp url')
|
||||
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, u'presenter play path')
|
||||
rtmp_url = self._search_regex(
|
||||
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
||||
play_path = self._search_regex(
|
||||
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, 'presenter play path')
|
||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||
video_info = {
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||
if m_slides is None:
|
||||
return video_info
|
||||
else:
|
||||
return [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'entries': [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mikey',
|
||||
},
|
||||
'playlist_mincount': 9917,
|
||||
'playlist_mincount': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,13 +6,28 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_str,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
|
||||
IE_DESC = 'TV3Play and related services'
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?
|
||||
(?:tvplay\.lv/parraides|
|
||||
tv3play\.lt/programos|
|
||||
tv3play\.ee/sisu|
|
||||
tv3play\.se/program|
|
||||
tv6play\.se/program|
|
||||
tv8play\.se/program|
|
||||
tv10play\.se/program|
|
||||
tv3play\.no/programmer|
|
||||
viasat4play\.no/programmer|
|
||||
tv6play\.no/programmer|
|
||||
tv3play\.dk/programmer|
|
||||
)/[^/]+/(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
@@ -30,6 +45,134 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '409229',
|
||||
'ext': 'flv',
|
||||
'title': 'Moterys meluoja geriau',
|
||||
'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
|
||||
'duration': 1330,
|
||||
'timestamp': 1403769181,
|
||||
'upload_date': '20140626',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '238551',
|
||||
'ext': 'flv',
|
||||
'title': 'Kodu keset linna 398537',
|
||||
'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
|
||||
'duration': 1257,
|
||||
'timestamp': 1292449761,
|
||||
'upload_date': '20101215',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '395385',
|
||||
'ext': 'flv',
|
||||
'title': 'Husräddarna S02E07',
|
||||
'description': 'md5:f210c6c89f42d4fc39faa551be813777',
|
||||
'duration': 2574,
|
||||
'timestamp': 1400596321,
|
||||
'upload_date': '20140520',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '266636',
|
||||
'ext': 'flv',
|
||||
'title': 'Den sista dokusåpan S01E08',
|
||||
'description': 'md5:295be39c872520221b933830f660b110',
|
||||
'duration': 1492,
|
||||
'timestamp': 1330522854,
|
||||
'upload_date': '20120229',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '282756',
|
||||
'ext': 'flv',
|
||||
'title': 'Antikjakten S01E10',
|
||||
'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
|
||||
'duration': 2646,
|
||||
'timestamp': 1348575868,
|
||||
'upload_date': '20120925',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '230898',
|
||||
'ext': 'flv',
|
||||
'title': 'Anna Anka søker assistent - Ep. 8',
|
||||
'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
|
||||
'duration': 2656,
|
||||
'timestamp': 1277720005,
|
||||
'upload_date': '20100628',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '21873',
|
||||
'ext': 'flv',
|
||||
'title': 'Budbringerne program 10',
|
||||
'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
|
||||
'duration': 1297,
|
||||
'timestamp': 1254205102,
|
||||
'upload_date': '20090929',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '361883',
|
||||
'ext': 'flv',
|
||||
'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
|
||||
'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
|
||||
'duration': 2594,
|
||||
'timestamp': 1393236292,
|
||||
'upload_date': '20140224',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -49,7 +192,7 @@ class TVPlayIE(InfoExtractor):
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
if not video_url:
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
|
@@ -1,32 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
||||
'md5': '736f605cfdc96724d55bb543ab3ced24',
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'ext': 'mp4',
|
||||
'title': 'M!ss Yella',
|
||||
'description': 'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/index.php?id_video=437',
|
||||
'md5': '1ddddd6cccaae76f622ce29b8779636d',
|
||||
'info_dict': {
|
||||
'id': '437',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prix Louise Weiss 2014',
|
||||
'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
||||
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = []
|
||||
for file_path in files:
|
||||
format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
|
||||
formats.append({
|
||||
'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id)
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>UTV - (.*?)</', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
|
||||
thumbnail = self._search_regex(
|
||||
r'image: "(.*?)"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
}
|
||||
|
119
youtube_dl/extractor/vgtv.py
Normal file
119
youtube_dl/extractor/vgtv.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class VGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# streamType: vod
|
||||
'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
|
||||
'md5': 'b8be7a234cebb840c0d512c78013e02f',
|
||||
'info_dict': {
|
||||
'id': '84196',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hevnen er søt episode 10: Abu',
|
||||
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 648.000,
|
||||
'timestamp': 1404626400,
|
||||
'upload_date': '20140706',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: wasLive
|
||||
'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
|
||||
'info_dict': {
|
||||
'id': '100764',
|
||||
'ext': 'mp4',
|
||||
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
|
||||
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 9056.000,
|
||||
'timestamp': 1410113864,
|
||||
'upload_date': '20140907',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: live
|
||||
'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
|
||||
'info_dict': {
|
||||
'id': '100015',
|
||||
'ext': 'mp4',
|
||||
'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
|
||||
'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1407423348,
|
||||
'upload_date': '20140807',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
streams = data['streamUrls']
|
||||
|
||||
formats = []
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
|
||||
|
||||
mp4_url = streams.get('mp4')
|
||||
if mp4_url:
|
||||
_url = hls_url or hds_url
|
||||
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
|
||||
for mp4_format in _url.split(','):
|
||||
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
|
||||
if not m:
|
||||
continue
|
||||
width = int(m.group('width'))
|
||||
height = int(m.group('height'))
|
||||
vbr = int(m.group('vbr'))
|
||||
formats.append({
|
||||
'url': MP4_URL_TEMPLATE % mp4_format,
|
||||
'format_id': 'mp4-%s' % vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
|
||||
'timestamp': data['published'],
|
||||
'duration': float_or_none(data['duration'], 1000),
|
||||
'view_count': data['displays'],
|
||||
'formats': formats,
|
||||
}
|
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
||||
_TEST = {
|
||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
'file': '2189178.flv',
|
||||
|
@@ -199,7 +199,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""(?x)^
|
||||
(
|
||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||
(?:https?://|//) # http(s):// or protocol-independent URL
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||
(?:www\.)?pwnyoutube\.com/|
|
||||
@@ -217,7 +217,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
)
|
||||
))
|
||||
|youtu\.be/ # just youtu.be/xxxx
|
||||
|https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
)
|
||||
)? # all until now is optional -> you can pass the naked ID
|
||||
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
||||
|
@@ -617,7 +617,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
|
||||
self.sock = sock
|
||||
self._tunnel()
|
||||
try:
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
|
||||
except ssl.SSLError:
|
||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
|
||||
@@ -625,8 +625,14 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
|
||||
def https_open(self, req):
|
||||
return self.do_open(HTTPSConnectionV3, req)
|
||||
return HTTPSHandlerV3(**kwargs)
|
||||
else:
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
|
||||
elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
|
||||
context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
|
||||
context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
|
||||
if opts_no_check_certificate:
|
||||
context.verify_mode = ssl.CERT_NONE
|
||||
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
|
||||
else: # Python < 3.4
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||
context.verify_mode = (ssl.CERT_NONE
|
||||
if opts_no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.04.1'
|
||||
__version__ = '2014.09.12'
|
||||
|
Reference in New Issue
Block a user