Compare commits

...

15 Commits

Author SHA1 Message Date
Sergey M․
e642347492 release 2016.06.05 2016-06-05 00:56:21 +07:00
Sergey M․
4ea8d2ee76 release 2016.06.04 2016-06-04 22:42:10 +07:00
Sergey M․
bf4c6a38e1 release 2016.06.03 2016-06-03 23:25:24 +07:00
Sergey M․
7f3c3dfa52 [loc] Improve (Closes #9521) 2016-06-03 23:19:11 +07:00
TRox1972
9c3c447eb3 [loc] Add extractor (Closes #3188)
Added extractor of loc.gov, which closes #3188. I am not an experienced programmer, so I am sure I did a bunch of mistakes, but the extractor works (for me at least).

[LibraryOfCongress] don't use video_id for _search_regex()

[LibraryOfCongress] Improvements
2016-06-03 22:17:35 +07:00
Yen Chi Hsuan
ad73083ff0 [bilibili] Add _part%d suffixes back (closes #9660) 2016-06-02 19:29:27 +08:00
Yen Chi Hsuan
1e8b59243f Merge pull request #9669 from bzc6p/master
Added sanitization support for Hungarian letters Ő and Ű
2016-06-02 18:23:54 +08:00
bzc6p
c88270271e Added sanitization support for Hungarian letters Ő and Ű 2016-06-02 11:51:48 +02:00
bzc6p
b96f007eeb Added sanitization support for Hungarian letters Ő and Ű 2016-06-02 11:39:32 +02:00
Yen Chi Hsuan
9a4aec8b7e [utils] Use bytes-like objects as header values on Python 2 2016-06-02 15:00:49 +08:00
Yen Chi Hsuan
54fb199681 [test/test_http] Fix getsockname() on Jython 2016-06-02 15:00:49 +08:00
Yen Chi Hsuan
8c32e5dc32 [test/test_utils] Add test for #9588 2016-06-02 15:00:49 +08:00
Yen Chi Hsuan
0ea590076f [utils] Always decode Location header
escape_url is broken for bytes-like objects
2016-06-02 15:00:49 +08:00
Remita Amine
4a684895c0 [seeker] Add new extractor(closes #9619) 2016-06-01 21:20:25 +01:00
Remita Amine
f4e4aa9b6b [revision3:embed] Add new extractor 2016-06-01 21:20:25 +01:00
11 changed files with 333 additions and 88 deletions

View File

@@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.05**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.02 [debug] youtube-dl version 2016.06.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@@ -339,6 +339,7 @@
- **livestream** - **livestream**
- **livestream:original** - **livestream:original**
- **LnkGo** - **LnkGo**
- **loc**: Library of Congress
- **LocalNews8** - **LocalNews8**
- **LoveHomePorn** - **LoveHomePorn**
- **lrt.lt** - **lrt.lt**
@@ -528,7 +529,8 @@
- **Restudy** - **Restudy**
- **Reuters** - **Reuters**
- **ReverbNation** - **ReverbNation**
- **Revision3** - **revision**
- **revision3:embed**
- **RICE** - **RICE**
- **RingTV** - **RingTV**
- **RottenTomatoes** - **RottenTomatoes**
@@ -567,6 +569,7 @@
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScreenJunkies** - **ScreenJunkies**
- **ScreenwaveMedia** - **ScreenwaveMedia**
- **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
- **ServingSys** - **ServingSys**

View File

@@ -16,6 +16,15 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args): def log_message(self, format, *args):
pass pass
@@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.send_header('Content-Type', 'video/mp4') self.send_header('Content-Type', 'video/mp4')
self.end_headers() self.end_headers()
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
elif self.path == '/302':
if sys.version_info[0] == 3:
# XXX: Python 3 http server does not allow non-ASCII header values
self.send_response(404)
self.end_headers()
return
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
self.send_response(302)
self.send_header(b'Location', new_url.encode('utf-8'))
self.end_headers()
elif self.path == '/%E4%B8%AD%E6%96%87.html':
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
else: else:
assert False assert False
@@ -47,18 +72,32 @@ class FakeLogger(object):
class TestHTTP(unittest.TestCase): class TestHTTP(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
def test_unicode_path_redirection(self):
# XXX: Python 3 http server does not allow non-ASCII header values
if sys.version_info[0] == 3:
return
ydl = YoutubeDL({'logger': FakeLogger()})
r = ydl.extract_info('http://localhost:%d/302' % self.port)
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
class TestHTTPS(unittest.TestCase):
def setUp(self): def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem') certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.httpd = compat_http_server.HTTPServer( self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler) ('localhost', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket( self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True) self.httpd.socket, certfile=certfn, server_side=True)
if os.name == 'java': self.port = http_server_port(self.httpd)
# In Jython SSLSocket is not a subclass of socket.socket
sock = self.httpd.socket.sock
else:
sock = self.httpd.socket
self.port = sock.getsockname()[1]
self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True self.server_thread.daemon = True
self.server_thread.start() self.server_thread.start()
@@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase):
def setUp(self): def setUp(self):
self.proxy = compat_http_server.HTTPServer( self.proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('normal')) ('localhost', 0), _build_proxy_handler('normal'))
self.port = self.proxy.socket.getsockname()[1] self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True self.proxy_thread.daemon = True
self.proxy_thread.start() self.proxy_thread.start()
self.cn_proxy = compat_http_server.HTTPServer( self.cn_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('cn')) ('localhost', 0), _build_proxy_handler('cn'))
self.cn_port = self.cn_proxy.socket.getsockname()[1] self.cn_port = http_server_port(self.cn_proxy)
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
self.cn_proxy_thread.daemon = True self.cn_proxy_thread.daemon = True
self.cn_proxy_thread.start() self.cn_proxy_thread.start()

View File

@@ -157,8 +157,8 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '')
self.assertEqual(sanitize_filename( self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
def test_sanitize_ids(self): def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')

View File

@@ -46,6 +46,62 @@ class BiliBiliIE(InfoExtractor):
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
}, },
'playlist_count': 9, 'playlist_count': 9,
}, {
'url': 'http://www.bilibili.com/video/av4808130/',
'info_dict': {
'id': '4808130',
'title': '【长篇】哆啦A梦443【钉铛】',
'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
},
'playlist': [{
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
'info_dict': {
'id': '4808130_part1',
'ext': 'flv',
'title': '【长篇】哆啦A梦443【钉铛】',
'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
'timestamp': 1464564180,
'upload_date': '20160529',
'uploader': '喜欢拉面',
'uploader_id': '151066',
},
}, {
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
'info_dict': {
'id': '4808130_part2',
'ext': 'flv',
'title': '【长篇】哆啦A梦443【钉铛】',
'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
'timestamp': 1464564180,
'upload_date': '20160529',
'uploader': '喜欢拉面',
'uploader_id': '151066',
},
}, {
'md5': '4b7b225b968402d7c32348c646f1fd83',
'info_dict': {
'id': '4808130_part3',
'ext': 'flv',
'title': '【长篇】哆啦A梦443【钉铛】',
'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
'timestamp': 1464564180,
'upload_date': '20160529',
'uploader': '喜欢拉面',
'uploader_id': '151066',
},
}, {
'md5': '7b795e214166501e9141139eea236e91',
'info_dict': {
'id': '4808130_part4',
'ext': 'flv',
'title': '【长篇】哆啦A梦443【钉铛】',
'description': '(2016.05.27)来组合客人的脸吧&amp;amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;amp;illust_id=56912929',
'timestamp': 1464564180,
'upload_date': '20160529',
'uploader': '喜欢拉面',
'uploader_id': '151066',
},
}],
}] }]
# BiliBili blocks keys from time to time. The current key is extracted from # BiliBili blocks keys from time to time. The current key is extracted from
@@ -144,6 +200,9 @@ class BiliBiliIE(InfoExtractor):
if len(entries) == 1: if len(entries) == 1:
return entries[0] return entries[0]
else: else:
for idx, entry in enumerate(entries):
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
return { return {
'_type': 'multi_video', '_type': 'multi_video',
'id': video_id, 'id': video_id,

View File

@@ -382,6 +382,7 @@ from .leeco import (
LePlaylistIE, LePlaylistIE,
LetvCloudIE, LetvCloudIE,
) )
from .libraryofcongress import LibraryOfCongressIE
from .libsyn import LibsynIE from .libsyn import LibsynIE
from .lifenews import ( from .lifenews import (
LifeNewsIE, LifeNewsIE,
@@ -639,7 +640,10 @@ from .regiotv import RegioTVIE
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .revision3 import Revision3IE from .revision3 import (
Revision3EmbedIE,
Revision3IE,
)
from .rice import RICEIE from .rice import RICEIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
@@ -678,6 +682,7 @@ from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE from .screenjunkies import ScreenJunkiesIE
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE

View File

@@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
)
class LibraryOfCongressIE(InfoExtractor):
IE_NAME = 'loc'
IE_DESC = 'Library of Congress'
_VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://loc.gov/item/90716351/',
'md5': '353917ff7f0255aa6d4b80a034833de8',
'info_dict': {
'id': '90716351',
'ext': 'mp4',
'title': "Pa's trip to Mars",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 0,
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media_id = self._search_regex(
(r'id=(["\'])media-player-(?P<id>.+?)\1',
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'),
webpage, 'media id', group='id')
data = self._parse_json(
self._download_webpage(
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
video_id),
video_id)['mediaObject']
derivative = data['derivatives'][0]
media_url = derivative['derivativeUrl']
# Following algorithm was extracted from setAVSource js function
# found in webpage
media_url = media_url.replace('rtmp', 'https')
is_video = data.get('mediaType', 'v').lower() == 'v'
ext = determine_ext(media_url)
if ext not in ('mp4', 'mp3'):
media_url += '.mp4' if is_video else '.mp3'
if 'vod/mp4:' in media_url:
formats = [{
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
'format_id': 'hls',
'ext': 'mp4',
'protocol': 'm3u8_native',
}]
elif 'vod/mp3:' in media_url:
formats = [{
'url': media_url.replace('vod/mp3:', ''),
'vcodec': 'none',
}]
self._sort_formats(formats)
title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage)
duration = float_or_none(data.get('duration'))
view_count = int_or_none(data.get('viewCount'))
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@@ -13,8 +13,64 @@ from ..utils import (
) )
class Revision3EmbedIE(InfoExtractor):
IE_NAME = 'revision3:embed'
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
_TEST = {
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
'info_dict': {
'id': '67558',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
'uploader_id': 'dnews',
'uploader': 'DNews',
}
}
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('playlist_id')
playlist_type = mobj.group('playlist_type') or 'video_id'
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
'api_key': self._API_KEY,
'codecs': 'h264,vp8,theora',
playlist_type: playlist_id,
})['items'][0]
formats = []
for vcodec, media in video_data['media'].items():
for quality_id, quality in media.items():
if quality_id == 'hls':
formats.extend(self._extract_m3u8_formats(
quality['url'], playlist_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'url': quality['url'],
'format_id': '%s-%s' % (vcodec, quality_id),
'tbr': int_or_none(quality.get('bitrate')),
'vcodec': vcodec,
})
self._sort_formats(formats)
return {
'id': playlist_id,
'title': unescapeHTML(video_data['title']),
'description': unescapeHTML(video_data.get('summary')),
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
}
class Revision3IE(InfoExtractor): class Revision3IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' IE_NAME = 'revision'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
'md5': 'd94a72d85d0a829766de4deb8daaf7df', 'md5': 'd94a72d85d0a829766de4deb8daaf7df',
@@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor):
} }
}, { }, {
# Show # Show
'url': 'http://testtube.com/brainstuff', 'url': 'http://revision3.com/variant',
'info_dict': { 'only_matching': True,
'id': '251',
'title': 'BrainStuff',
'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
},
'playlist_mincount': 93,
}, {
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
'info_dict': {
'id': '58227',
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
'duration': 275,
'ext': 'webm',
'title': '5 Weird Ways Plants Can Eat Animals',
'description': 'Why have some plants evolved to eat meat?',
'upload_date': '20150120',
'timestamp': 1421763300,
'uploader': 'DNews',
'uploader_id': 'dnews',
},
}, {
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'info_dict': {
'id': '71618',
'ext': 'mp4',
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
'uploader': 'Editors\' Picks',
'uploader_id': 'tt-editors-picks',
'timestamp': 1453309200,
'upload_date': '20160120',
},
'add_ie': ['Youtube'],
}, { }, {
# Tag # Tag
'url': 'http://testtube.com/tech-news', 'url': 'http://revision3.com/vr',
'info_dict': { 'only_matching': True,
'id': '21018',
'title': 'tech news',
},
'playlist_mincount': 9,
}] }]
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url): def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups() domain, display_id = re.match(self._VALID_URL, url).groups()
@@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor):
}) })
return info return info
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
video_id)['items'][0]
formats = []
for vcodec, media in video_data['media'].items():
for quality_id, quality in media.items():
if quality_id == 'hls':
formats.extend(self._extract_m3u8_formats(
quality['url'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
else:
formats.append({
'url': quality['url'],
'format_id': '%s-%s' % (vcodec, quality_id),
'tbr': int_or_none(quality.get('bitrate')),
'vcodec': vcodec,
})
self._sort_formats(formats)
info.update({ info.update({
'title': unescapeHTML(video_data['title']), '_type': 'url_transparent',
'description': unescapeHTML(video_data.get('summary')), 'url': 'revision3:%s' % video_id,
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
'formats': formats,
}) })
return info return info
else: else:

View File

@@ -0,0 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SeekerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
_TESTS = [{
# player.loadRevision3Item
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
'md5': '30c1dc4030cc715cf05b423d0947ac18',
'info_dict': {
'id': '76243',
'ext': 'webm',
'title': 'Should Trump Be Required To Release His Tax Returns?',
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
'uploader': 'Seeker Daily',
'uploader_id': 'seekerdaily',
}
}, {
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
'playlist': [
{
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
'info_dict': {
'id': '67558',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
'uploader': 'DNews',
'uploader_id': 'dnews',
},
}
],
'info_dict': {
'id': '1834116536',
'title': 'After Gorilla Killing, Changes Ahead for Zoos',
'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
},
}]
def _real_extract(self, url):
display_id, article_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
if mobj:
playlist_type, playlist_id = mobj.groups()
return self.url_result(
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
else:
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
return self.playlist_result(
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))

View File

@@ -105,9 +105,9 @@ KNOWN_EXTENSIONS = (
'f4f', 'f4m', 'm3u8', 'smil') 'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode # needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
def preferredencoding(): def preferredencoding():
@@ -861,9 +861,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
location = location.encode('iso-8859-1').decode('utf-8') location = location.encode('iso-8859-1').decode('utf-8')
else:
location = location.decode('utf-8')
location_escaped = escape_url(location) location_escaped = escape_url(location)
if location != location_escaped: if location != location_escaped:
del resp.headers['Location'] del resp.headers['Location']
if sys.version_info < (3, 0):
location_escaped = location_escaped.encode('utf-8')
resp.headers['Location'] = location_escaped resp.headers['Location'] = location_escaped
return resp return resp

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.06.02' __version__ = '2016.06.05'