Compare commits
12 Commits
2014.02.03
...
2014.02.04
Author | SHA1 | Date | |
---|---|---|---|
eef726c04b | |||
acf1555d76 | |||
22e7f1a6ec | |||
3c49325658 | |||
bb1cd2bea1 | |||
fdf1f8d4ce | |||
117c8c6b97 | |||
5cef4ff09b | |||
91264ce572 | |||
c79ef8e1ae | |||
58d915df51 | |||
7881a64499 |
75
test/test_signatures.py
Normal file
75
test/test_signatures.py
Normal file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
u'js',
|
||||
86,
|
||||
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
u'js',
|
||||
85,
|
||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_length, expected_sig):
|
||||
basename = url.rpartition('/')[2]
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
|
||||
assert m, '%r should follow URL format' % basename
|
||||
test_id = m.group(1)
|
||||
|
||||
def test_func(self):
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
compat_urlretrieve(url, fn)
|
||||
|
||||
ie = YoutubeIE()
|
||||
if stype == 'js':
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
func = ie._parse_sig_js(jscode)
|
||||
else:
|
||||
assert stype == 'swf'
|
||||
with open(fn, 'rb') as testf:
|
||||
swfcode = testf.read()
|
||||
func = ie._parse_sig_swf(swfcode)
|
||||
src_sig = compat_str(string.printable[:sig_length])
|
||||
got_sig = func(src_sig)
|
||||
self.assertEqual(got_sig, expected_sig)
|
||||
|
||||
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
|
||||
for test_spec in _TESTS:
|
||||
make_tfunc(*test_spec)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -96,6 +96,7 @@ from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@ -126,6 +127,7 @@ from .mit import TechTVMITIE, MITIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mooshare import MooshareIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVIggyIE,
|
||||
|
@ -1,19 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||
'uploader': 'Funnyplox TV',
|
||||
'uploader_id': 'funnyploxtv',
|
||||
'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
|
||||
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||
'upload_date': '20140128',
|
||||
},
|
||||
'params': {
|
||||
|
85
youtube_dl/extractor/iprima.py
Normal file
85
youtube_dl/extractor/iprima.py
Normal file
@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from random import random
|
||||
from math import floor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_request
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'info_dict': {
|
||||
'id': '39152',
|
||||
'ext': 'flv',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||
floor(random()*1073741824),
|
||||
floor(random()*1073741824))
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
elif format_id == 'hq':
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/'+filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
|
||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||
|
114
youtube_dl/extractor/mooshare.py
Normal file
114
youtube_dl/extractor/mooshare.py
Normal file
@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class MooshareIE(InfoExtractor):
|
||||
IE_NAME = 'mooshare'
|
||||
IE_DESC = 'Mooshare.biz'
|
||||
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||
'info_dict': {
|
||||
'id': '8dqtk4bjbp8g',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||
'duration': 893,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||
'info_dict': {
|
||||
'id': 'aipjtoc4g95j',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orange Caramel Dashing Through the Snow',
|
||||
'duration': 212,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||
|
||||
download_form = {
|
||||
'op': 'download1',
|
||||
'id': video_id,
|
||||
'hash': hash_key,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||
time.sleep(5)
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||
duration = int(duration_str) if duration_str is not None else None
|
||||
|
||||
formats = []
|
||||
|
||||
# SD video
|
||||
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'sd',
|
||||
'format': 'SD',
|
||||
})
|
||||
|
||||
# HD video
|
||||
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'hd',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
# rtmp video
|
||||
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('rtmpurl'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'rtmp',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@ -5,30 +7,63 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||
)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
u'file': u'2365006249.mp4',
|
||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
u'info_dict': {
|
||||
u'title': u'A More Perfect Union',
|
||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
u'duration': 3190,
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
webpage, 'player URL')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
if not display_id:
|
||||
display_id = player_id
|
||||
if player_id:
|
||||
player_page = self._download_webpage(
|
||||
url, display_id, note='Downloading player page',
|
||||
errnote='Could not download player page')
|
||||
video_id = self._search_regex(
|
||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
info =json.loads(info_page)
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
||||
|
@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
def _request_subtitle_url(self, sub_lang, url):
|
||||
""" makes the http request for the subtitle """
|
||||
try:
|
||||
return self._download_subtitle_url(sub_lang, url)
|
||||
sub = self._download_subtitle_url(sub_lang, url)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
"""
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class TrailerAddictIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.02.03.1'
|
||||
__version__ = '2014.02.04'
|
||||
|
Reference in New Issue
Block a user