Compare commits

...

18 Commits

Author SHA1 Message Date
69ede8ef81 release 2014.11.13.3 2014-11-13 16:28:24 +01:00
609a61e3e6 [npo] Improve npo.nl (Fixes #4173) 2014-11-13 16:28:05 +01:00
bf951c5e29 release 2014.11.13.2 2014-11-13 16:12:54 +01:00
af63fed7d8 [generic] Add support for livestream embeds (Fixes #4185) 2014-11-13 16:12:51 +01:00
68d1d41c03 Credit @yaccz for freevideo (#4131) 2014-11-13 15:59:48 +01:00
3deed1e91a [freevideo] Simplify and raise error for foreigners (Fixes #4131) 2014-11-13 15:59:22 +01:00
11b28e93d3 Merge remote-tracking branch 'yaccz/add-extractor/freevideo' 2014-11-13 15:53:16 +01:00
c3d582985f release 2014.11.13.1 2014-11-13 15:42:48 +01:00
4c0924bb24 [utils] Fix intlist_to_bytes in Python 2 (#4181) 2014-11-13 15:28:42 +01:00
3fa5bb3802 [sexu] Modernize (#4171) 2014-11-13 15:20:49 +01:00
c47ec62b83 Merge remote-tracking branch 'peugeot/sexu' 2014-11-13 15:18:38 +01:00
e4bdb37ec6 [spiegel] Add support for embeds 2014-11-13 15:02:31 +01:00
3e6e4999ca [test/helper] Improve output 2014-11-13 14:55:45 +01:00
0e15e725a0 [spiegel] Modernize 2014-11-13 14:45:17 +01:00
437f68d868 Update sexu.py 2014-11-13 14:02:53 +01:00
d91d124081 fix python 2 test 2014-11-13 13:57:10 +01:00
bbd5f2de5e [sexu] initial support 2014-11-12 20:41:13 +01:00
yac
3898c8a7b2 [FreeVideo] Add new extractor 2014-11-08 00:13:28 +01:00
12 changed files with 187 additions and 29 deletions

View File

@ -81,3 +81,4 @@ winwon
Xavier Beynon Xavier Beynon
Gabriel Schubiner Gabriel Schubiner
xantares xantares
Jan Matějka

View File

@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
info_dict_str = ''.join( info_dict_str = ''.join(
' %s: %s,\n' % (_repr(k), _repr(v)) ' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items()) for k, v in test_info_dict.items())
write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr) write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
self.assertFalse( self.assertFalse(
missing_keys, missing_keys,
'Missing keys in test definition: %s' % ( 'Missing keys in test definition: %s' % (

View File

@ -16,6 +16,7 @@ import json
import xml.etree.ElementTree import xml.etree.ElementTree
from youtube_dl.utils import ( from youtube_dl.utils import (
clean_html,
DateRange, DateRange,
encodeFilename, encodeFilename,
find_xpath_attr, find_xpath_attr,
@ -45,6 +46,7 @@ from youtube_dl.utils import (
escape_url, escape_url,
js_to_json, js_to_json,
get_filesystem_encoding, get_filesystem_encoding,
intlist_to_bytes,
) )
@ -282,6 +284,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}]) self.assertEqual(d, [{"id": "532cb", "x": 3}])
stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
d = json.loads(stripped)
self.assertEqual(d, {'STATUS': 'OK'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@ -345,5 +351,14 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": true}') on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True}) self.assertEqual(json.loads(on), {'abc': True})
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
def test_intlist_to_bytes(self):
self.assertEqual(
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -127,6 +127,7 @@ from .francetv import (
) )
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .freespeech import FreespeechIE from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import ( from .gameone import (
@ -324,6 +325,7 @@ from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE

View File

@ -0,0 +1,38 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class FreeVideoIE(InfoExtractor):
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
_TEST = {
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
'info_dict': {
'id': 'vysukany-zadecek-22033',
'ext': 'mp4',
"title": "vysukany-zadecek-22033",
"age_limit": 18,
},
'skip': 'Blocked outside .cz',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, handle = self._download_webpage_handle(url, video_id)
if '//www.czechav.com/' in handle.geturl():
raise ExtractorError(
'Access to freevideo is blocked from your location',
expected=True)
video_url = self._search_regex(
r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_id,
'age_limit': 18,
}

View File

@ -434,7 +434,17 @@ class GenericIE(InfoExtractor):
'title': 'Chet Chat 171 - Oct 29, 2014', 'title': 'Chet Chat 171 - Oct 29, 2014',
'upload_date': '20141029', 'upload_date': '20141029',
} }
} },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
'info_dict': {
'id': '67864563',
'ext': 'flv',
'upload_date': '20141112',
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -916,6 +926,12 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
def check_video(vurl): def check_video(vurl):
vpath = compat_urlparse.urlparse(vurl).path vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath) vext = determine_ext(vpath)

View File

@ -18,7 +18,7 @@ from ..utils import (
class LivestreamIE(InfoExtractor): class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream' IE_NAME = 'livestream'
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b', 'md5': '53274c76ba7754fb0e8d072716f2292b',
@ -37,6 +37,9 @@ class LivestreamIE(InfoExtractor):
'title': 'TEDCity2.0 (English)', 'title': 'TEDCity2.0 (English)',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
}] }]
def _parse_smil(self, video_id, smil_url): def _parse_smil(self, video_id, smil_url):

View File

@ -7,6 +7,7 @@ from ..utils import (
unified_strdate, unified_strdate,
parse_duration, parse_duration,
qualities, qualities,
strip_jsonp,
url_basename, url_basename,
) )
@ -63,7 +64,7 @@ class NPOIE(InfoExtractor):
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
# We have to remove the javascript callback # We have to remove the javascript callback
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j) transform_source=strip_jsonp,
) )
token_page = self._download_webpage( token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js', 'http://ida.omroep.nl/npoplayer/i.js',

View File

@ -0,0 +1,61 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SexuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://sexu.com/961791/',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': '961791',
'ext': 'mp4',
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
quality_arr = self._search_regex(
r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
formats = [{
'url': fmt[0].replace('\\', ''),
'format_id': fmt[1],
'height': int(fmt[1][:3]),
} for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
self._sort_formats(formats)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._html_search_regex(
r'image:\s*"([^"]+)"',
webpage, 'thumbnail', fatal=False)
categories_str = self._html_search_meta(
'keywords', webpage, 'categories')
categories = (
None if categories_str is None
else categories_str.split(','))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'age_limit': 18,
}

View File

@ -4,11 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urlparse from ..compat import compat_urlparse
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '2c2754212136f35fb4b19767d242f66e', 'md5': '2c2754212136f35fb4b19767d242f66e',
@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
'description': 'md5:c2322b65e58f385a820c10fa03b2d088', 'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983, 'duration': 983,
}, },
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
'info_dict': {
'id': '1519126',
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex( title = re.sub(r'\s+', ' ', self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, 'title') r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
webpage, 'title'))
description = self._html_search_meta('description', webpage, 'description') description = self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex( base_url = self._search_regex(
@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
IE_NAME = 'Spiegel:Article' IE_NAME = 'Spiegel:Article'
IE_DESC = 'Articles on spiegel.de' IE_DESC = 'Articles on spiegel.de'
_TEST = { _TESTS = [{
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
'info_dict': { 'info_dict': {
'id': '1516455', 'id': '1516455',
@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
'title': 'Faszination Badminton: Nennt es bloß nicht Federball', 'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
'description': 're:^Patrick Kämnitz gehört.{100,}', 'description': 're:^Patrick Kämnitz gehört.{100,}',
}, },
} }, {
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
'info_dict': {
},
'playlist_count': 6,
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# Single video on top of the page
video_link = self._search_regex( video_link = self._search_regex(
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
'video page URL') 'video page URL', default=None)
video_url = compat_urlparse.urljoin( if video_link:
self.http_scheme() + '//spiegel.de/', video_link) video_url = compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', video_link)
return self.url_result(video_url)
return { # Multiple embedded videos
'_type': 'url', embeds = re.findall(
'url': video_url, r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
} webpage)
entries = [
self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path))
for embed_path in embeds
]
return self.playlist_result(entries)

View File

@ -843,10 +843,7 @@ def bytes_to_intlist(bs):
def intlist_to_bytes(xs): def intlist_to_bytes(xs):
if not xs: if not xs:
return b'' return b''
if isinstance(chr(0), bytes): # Python 2 return struct.pack('%dB' % len(xs), *xs)
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)
# Cross-platform file locking # Cross-platform file locking
@ -1334,7 +1331,8 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) return re.sub(
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
def js_to_json(code): def js_to_json(code):

View File

@ -1,2 +1,2 @@
__version__ = '2014.11.13' __version__ = '2014.11.13.3'