Compare commits
48 Commits
2015.01.25
...
2015.01.30
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a8a29cfea | ||
|
|
c1708b89c0 | ||
|
|
83fddfd493 | ||
|
|
1798791df1 | ||
|
|
6ebb0dca9f | ||
|
|
cf8d6ec865 | ||
|
|
f452f72c6b | ||
|
|
3198291f26 | ||
|
|
02c1d5e285 | ||
|
|
ec4161a57d | ||
|
|
03d8d4df38 | ||
|
|
03d2d6d51b | ||
|
|
83fda3c000 | ||
|
|
4fe8495a23 | ||
|
|
a16f6643f0 | ||
|
|
adc0ae3ceb | ||
|
|
7bb3ceb4c7 | ||
|
|
75a4fc5b72 | ||
|
|
87673cd438 | ||
|
|
f345fe9db7 | ||
|
|
e683a48d0e | ||
|
|
a7a14d9586 | ||
|
|
219337990b | ||
|
|
376a770cc4 | ||
|
|
7e500dbd93 | ||
|
|
affd04a45d | ||
|
|
c84130e865 | ||
|
|
4f264c02c7 | ||
|
|
d205476103 | ||
|
|
367cc95aa7 | ||
|
|
206dba27a4 | ||
|
|
dcf53d4408 | ||
|
|
63be3b8989 | ||
|
|
18b4e9e79d | ||
|
|
cb454b333d | ||
|
|
e0d9f85aee | ||
|
|
b04fbd789c | ||
|
|
aad9556414 | ||
|
|
48a1e5141a | ||
|
|
0865f397ae | ||
|
|
796df3c631 | ||
|
|
a28383834b | ||
|
|
3a0d2f520a | ||
|
|
6348ad12a0 | ||
|
|
fe7710cbcc | ||
|
|
2103d038b3 | ||
|
|
9f0df77ab1 | ||
|
|
e72c7e4123 |
3
AUTHORS
3
AUTHORS
@@ -106,3 +106,6 @@ Johan K. Jensen
|
||||
Yen Chi Hsuan
|
||||
Enam Mijbah Noor
|
||||
David Luhmer
|
||||
Shaya Goldberg
|
||||
Yen Chi Hsuan
|
||||
Paul Hartmann
|
||||
|
||||
@@ -292,9 +292,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
video results by putting a condition in
|
||||
brackets, as in -f "best[height=720]" (or
|
||||
-f "[filesize>10M]"). This works for
|
||||
filesize, height, width, tbr, abr, and vbr
|
||||
and the comparisons <, <=, >, >=, =, != .
|
||||
Formats for which the value is not known
|
||||
filesize, height, width, tbr, abr, vbr, and
|
||||
fps and the comparisons <, <=, >, >=, =, !=
|
||||
. Formats for which the value is not known
|
||||
are excluded unless you put a question mark
|
||||
(?) after the operator. You can combine
|
||||
format filters, so -f "[height <=?
|
||||
|
||||
72
test/test_http.py
Normal file
72
test/test_http.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
elif self.path == '/vid.mp4':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = self.httpd.socket.getsockname()[1]
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
52
test/testcert.pem
Normal file
52
test/testcert.pem
Normal file
@@ -0,0 +1,52 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
|
||||
HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
|
||||
Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
|
||||
A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
|
||||
mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
|
||||
aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
|
||||
tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
|
||||
BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
|
||||
Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
|
||||
63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
|
||||
ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
|
||||
8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
|
||||
XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
|
||||
G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
|
||||
zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
|
||||
8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
|
||||
gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
|
||||
XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
|
||||
Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
|
||||
98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
|
||||
1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
|
||||
DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
|
||||
Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
|
||||
FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
|
||||
pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
|
||||
VJua1Wn8HKxnXMI61DhTCSo=
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
|
||||
VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
|
||||
A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
|
||||
c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
|
||||
aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
|
||||
CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
|
||||
MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
|
||||
IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
|
||||
Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
|
||||
zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
|
||||
YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
|
||||
O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
|
||||
A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
|
||||
KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
|
||||
Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
|
||||
ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
|
||||
AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
|
||||
5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
|
||||
PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
|
||||
cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
|
||||
SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
|
||||
Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
|
||||
-----END CERTIFICATE-----
|
||||
@@ -543,6 +543,11 @@ class YoutubeDL(object):
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return filename
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
@@ -820,7 +825,7 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize)
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
@@ -953,7 +958,7 @@ class YoutubeDL(object):
|
||||
if thumbnails is None:
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
if thumbnail:
|
||||
thumbnails = [{'url': thumbnail}]
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
@@ -1069,6 +1074,7 @@ class YoutubeDL(object):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'format_id': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
@@ -1130,7 +1136,7 @@ class YoutubeDL(object):
|
||||
|
||||
self._num_downloads += 1
|
||||
|
||||
filename = self.prepare_filename(info_dict)
|
||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
@@ -1155,10 +1161,7 @@ class YoutubeDL(object):
|
||||
if self.params.get('forceformat', False):
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
info_dict['_filename'] = filename
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
if self.params.get('dump_single_json', False):
|
||||
info_dict['_filename'] = filename
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
|
||||
@@ -361,7 +361,9 @@ def _real_main(argv=None):
|
||||
sys.exit()
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error('you must provide at least one URL')
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type youtube-dl --help to see a list of all options.')
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
|
||||
@@ -71,6 +71,11 @@ try:
|
||||
except ImportError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
try:
|
||||
import http.server as compat_http_server
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
@@ -365,6 +370,7 @@ __all__ = [
|
||||
'compat_html_entities',
|
||||
'compat_html_parser',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
|
||||
@@ -104,6 +104,7 @@ class RtmpFD(FileDownloader):
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = info_dict.get('continuedl', False)
|
||||
|
||||
@@ -143,6 +144,8 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
if real_time:
|
||||
basic_args += ['--realtime']
|
||||
|
||||
args = basic_args
|
||||
if not no_resume and continue_dl and not live:
|
||||
|
||||
@@ -82,6 +82,7 @@ from .crunchyroll import (
|
||||
CrunchyrollShowPlaylistIE
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@@ -89,6 +90,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
@@ -284,6 +286,12 @@ from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyRealtimeNewsIE,
|
||||
AppleDailyAnimationNewsIE
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
@@ -546,6 +554,7 @@ from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
|
||||
93
youtube_dl/extractor/ctsnews.py
Normal file
93
youtube_dl/extractor/ctsnews.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
'md5': 'a9875cb790252b08431186d741beaabe',
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
}, {
|
||||
# News count not appear on page but still available in database
|
||||
'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
|
||||
'md5': '3aee7e0df7cdff94e43581f54c22619e',
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
}
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone6熱銷 蘋果財報亮眼',
|
||||
'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
57
youtube_dl/extractor/dctp.py
Normal file
57
youtube_dl/extractor/dctp.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
}
|
||||
@@ -498,6 +498,19 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.abc.net.au',
|
||||
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
|
||||
}
|
||||
},
|
||||
# embedded viddler video
|
||||
{
|
||||
'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'deadspin',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'timestamp': 1422285291,
|
||||
'upload_date': '20150126',
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
}
|
||||
]
|
||||
|
||||
@@ -860,9 +873,16 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Viddler player
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
@@ -63,29 +63,34 @@ class IviIE(InfoExtractor):
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {'method': 'da.content.get',
|
||||
'params': [video_id, {'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
data = {
|
||||
'method': 'da.content.get',
|
||||
'params': [
|
||||
video_id, {
|
||||
'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
|
||||
video_json_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if 'error' in video_json:
|
||||
error = video_json['error']
|
||||
if error['origin'] == 'NoRedisValidData':
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
|
||||
raise ExtractorError(
|
||||
'Unable to download video %s: %s' % (video_id, error['message']),
|
||||
expected=True)
|
||||
|
||||
result = video_json['result']
|
||||
|
||||
|
||||
@@ -6,13 +6,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||
'info_dict': {
|
||||
@@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading player webpage')
|
||||
@@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor):
|
||||
r'data-ep="([^"]+)"', webpage, 'video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
|
||||
thumbnail_w = int_or_none(
|
||||
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
||||
@@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor):
|
||||
'height': thumbnail_h,
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
|
||||
|
||||
pg_rating = self._search_regex(
|
||||
r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
|
||||
age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
|
||||
if config.get('pGeo'):
|
||||
self.report_warning(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
|
||||
sources_js = self._search_regex(
|
||||
r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
|
||||
sources = self._parse_json(
|
||||
sources_js, video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'url': config['EpisodeVideoLink_HLS'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source.get('provider') == 'rtmp':
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
|
||||
if not m:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
elif source.get('file').endswith('.m3u8'):
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'ext': source.get('type', 'mp4'),
|
||||
'url': source['file'],
|
||||
})
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
|
||||
if m:
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': [thumbnail],
|
||||
'duration': duration,
|
||||
'duration': int_or_none(config.get('VideoTime')),
|
||||
'description': description,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
163
youtube_dl/extractor/nextmedia.py
Normal file
163
youtube_dl/extractor/nextmedia.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
|
||||
'info_dict': {
|
||||
'id': '53109199',
|
||||
'ext': 'mp4',
|
||||
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
|
||||
'timestamp': 1415456273,
|
||||
'upload_date': '20141108',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
attrs = {
|
||||
'id': news_id,
|
||||
'title': title,
|
||||
'url': video_url, # ext can be inferred from url
|
||||
'thumbnail': self._fetch_thumbnail(page),
|
||||
'description': self._fetch_description(page),
|
||||
}
|
||||
|
||||
timestamp = self._fetch_timestamp(page)
|
||||
if timestamp:
|
||||
attrs['timestamp'] = timestamp
|
||||
else:
|
||||
attrs['upload_date'] = self._fetch_upload_date(url)
|
||||
|
||||
return attrs
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._og_search_title(page)
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._og_search_thumbnail(page)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
|
||||
return parse_iso8601(dateCreated)
|
||||
|
||||
def _fetch_upload_date(self, url):
|
||||
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._og_search_property('description', page)
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
|
||||
'info_dict': {
|
||||
'id': '19009428',
|
||||
'ext': 'mp4',
|
||||
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
|
||||
'timestamp': 1421791200,
|
||||
'upload_date': '20150120',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
actionnews_page = self._download_webpage(url, news_id)
|
||||
article_url = self._og_search_url(actionnews_page)
|
||||
article_page = self._download_webpage(article_url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||
|
||||
|
||||
class AppleDailyRealtimeNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
'info_dict': {
|
||||
'id': '36354694',
|
||||
'ext': 'mp4',
|
||||
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:b23787119933404ce515c6356a8c355c',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
|
||||
'md5': '86b4e9132d158279c7883822d94ccc49',
|
||||
'info_dict': {
|
||||
'id': '550549',
|
||||
'ext': 'mp4',
|
||||
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
return None
|
||||
|
||||
|
||||
class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
||||
_VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||
'info_dict': {
|
||||
'id': '5003671',
|
||||
'ext': 'mp4',
|
||||
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
# No thumbnail
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
|
||||
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
|
||||
'info_dict': {
|
||||
'id': '5003673',
|
||||
'ext': 'mp4',
|
||||
'title': '半夜尿尿 好像會看到___',
|
||||
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'video thumbnail',
|
||||
]
|
||||
}]
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_meta('description', page, 'news title')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
@@ -8,7 +8,7 @@ from ..utils import js_to_json
|
||||
|
||||
|
||||
class SRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Süddeutscher Rundfunk'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -5,27 +5,58 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urllib_request
|
||||
)
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
"url": "http://www.viddler.com/v/43903784",
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
"title": "Video Made Easy",
|
||||
'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
|
||||
"uploader": "viddler",
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
'timestamp': 1335371429,
|
||||
'upload_date': '20120425',
|
||||
"duration": 100.89,
|
||||
'duration': 100.89,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
'timestamp': 1422285291,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
'uploader': 'BCLETeens',
|
||||
'timestamp': 1411997190,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -33,14 +64,17 @@ class ViddlerIE(InfoExtractor):
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
data = self._download_json(json_url, video_id)['video']
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = compat_urllib_request.Request(json_url, None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
for filed in data['files']:
|
||||
if filed.get('status', 'ready') != 'ready':
|
||||
continue
|
||||
format_id = filed.get('profile_id') or filed['profile_name']
|
||||
f = {
|
||||
'format_id': filed['profile_id'],
|
||||
'format_id': format_id,
|
||||
'format_note': filed['profile_name'],
|
||||
'url': self._proto_relative_url(filed['url']),
|
||||
'width': int_or_none(filed.get('width')),
|
||||
@@ -53,16 +87,15 @@ class ViddlerIE(InfoExtractor):
|
||||
|
||||
if filed.get('cdn_url'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'])
|
||||
f['format_id'] = filed['profile_id'] + '-cdn'
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
|
||||
f['format_id'] = format_id + '-cdn'
|
||||
f['source_preference'] = 1
|
||||
formats.append(f)
|
||||
|
||||
if filed.get('html5_video_source'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(
|
||||
filed['html5_video_source'])
|
||||
f['format_id'] = filed['profile_id'] + '-html5'
|
||||
f['url'] = self._proto_relative_url(filed['html5_video_source'])
|
||||
f['format_id'] = format_id + '-html5'
|
||||
f['source_preference'] = 0
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
@@ -71,7 +104,6 @@ class ViddlerIE(InfoExtractor):
|
||||
t.get('text') for t in data.get('tags', []) if 'text' in t]
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
@@ -81,5 +113,6 @@ class ViddlerIE(InfoExtractor):
|
||||
'uploader': data.get('author'),
|
||||
'duration': float_or_none(data.get('length')),
|
||||
'view_count': int_or_none(data.get('view_count')),
|
||||
'comment_count': int_or_none(data.get('comment_count')),
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
142
youtube_dl/extractor/xuite.py
Normal file
142
youtube_dl/extractor/xuite.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class XuiteIE(InfoExtractor):
|
||||
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
|
||||
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
|
||||
_TESTS = [{
|
||||
# Audio
|
||||
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
||||
'info_dict': {
|
||||
'id': '3860914',
|
||||
'ext': 'mp3',
|
||||
'title': '孤單南半球-歐德陽',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 247.246,
|
||||
'timestamp': 1314932940,
|
||||
'upload_date': '20110902',
|
||||
'uploader': '阿能',
|
||||
'uploader_id': '15973816',
|
||||
'categories': ['個人短片'],
|
||||
},
|
||||
}, {
|
||||
# Video with only one format
|
||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
||||
'info_dict': {
|
||||
'id': '3441629',
|
||||
'ext': 'mp4',
|
||||
'title': '孫燕姿 - 眼淚成詩',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 217.399,
|
||||
'timestamp': 1299383640,
|
||||
'upload_date': '20110306',
|
||||
'uploader': 'Valen',
|
||||
'uploader_id': '10400126',
|
||||
'categories': ['影視娛樂'],
|
||||
},
|
||||
}, {
|
||||
# Video with two formats
|
||||
'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
|
||||
'md5': '1166e0f461efe55b62e26a2d2a68e6de',
|
||||
'info_dict': {
|
||||
'id': '21301170',
|
||||
'ext': 'mp4',
|
||||
'title': '暗殺教室 02',
|
||||
'description': '字幕:【極影字幕社】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1384.907,
|
||||
'timestamp': 1421481240,
|
||||
'upload_date': '20150117',
|
||||
'uploader': '我只是想認真點',
|
||||
'uploader_id': '242127761',
|
||||
'categories': ['電玩動漫'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_flv_config(self, media_id):
|
||||
base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
|
||||
flv_config = self._download_xml(
|
||||
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
|
||||
'flv config')
|
||||
prop_dict = {}
|
||||
for prop in flv_config.findall('./property'):
|
||||
prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
|
||||
# CDATA may be empty in flv config
|
||||
if not prop.text:
|
||||
continue
|
||||
encoded_content = base64.b64decode(prop.text).decode('utf-8')
|
||||
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
|
||||
return prop_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
error_msg = self._search_regex(
|
||||
r'<div id="error-message-content">([^<]+)',
|
||||
webpage, 'error message', default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_msg),
|
||||
expected=True)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-mediaid="(\d+)"', webpage, 'media id')
|
||||
flv_config = self._extract_flv_config(video_id)
|
||||
|
||||
FORMATS = {
|
||||
'audio': 'mp3',
|
||||
'video': 'mp4',
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_tag in ('src', 'hq_src'):
|
||||
video_url = flv_config.get(format_tag)
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': FORMATS.get(flv_config['type'], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'height': int(format_id) if format_id.isnumeric() else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = flv_config.get('publish_datetime')
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
|
||||
|
||||
category = flv_config.get('category')
|
||||
categories = [category] if category else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': flv_config['title'],
|
||||
'description': flv_config.get('description'),
|
||||
'thumbnail': flv_config.get('thumb'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': flv_config.get('author_name'),
|
||||
'uploader_id': flv_config.get('author_id'),
|
||||
'duration': parse_duration(flv_config.get('duration')),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1688,6 +1688,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
feature=[a-z_]+|
|
||||
annotation_id=annotation_[^&]+|
|
||||
x-yt-cl=[0-9]+|
|
||||
hl=[^&]*|
|
||||
)?
|
||||
|
|
||||
attribution_link\?a=[^&]+
|
||||
@@ -1707,6 +1708,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?feature=foo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None):
|
||||
' You can filter the video results by putting a condition in'
|
||||
' brackets, as in -f "best[height=720]"'
|
||||
' (or -f "[filesize>10M]"). '
|
||||
' This works for filesize, height, width, tbr, abr, and vbr'
|
||||
' This works for filesize, height, width, tbr, abr, vbr, and fps'
|
||||
' and the comparisons <, <=, >, >=, =, != .'
|
||||
' Formats for which the value is not known are excluded unless you'
|
||||
' put a question mark (?) after the operator.'
|
||||
|
||||
@@ -654,9 +654,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
self._params = params
|
||||
|
||||
def https_open(self, req):
|
||||
kwargs = {}
|
||||
if hasattr(self, '_context'): # python > 2.6
|
||||
kwargs['context'] = self._context
|
||||
if hasattr(self, '_check_hostname'): # python 3.x
|
||||
kwargs['check_hostname'] = self._check_hostname
|
||||
return self.do_open(functools.partial(
|
||||
_create_http_connection, self, self._https_conn_class, True),
|
||||
req)
|
||||
req, **kwargs)
|
||||
|
||||
|
||||
def parse_iso8601(date_str, delimiter='T'):
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.01.25'
|
||||
__version__ = '2015.01.30'
|
||||
|
||||
Reference in New Issue
Block a user