Compare commits
48 Commits
2013.09.20
...
2013.09.24
Author | SHA1 | Date | |
---|---|---|---|
|
8b25323ae2 | ||
|
f426de8460 | ||
|
695dc094ab | ||
|
e80d861064 | ||
|
2cdeb20135 | ||
|
7f74773254 | ||
|
f2c327fd39 | ||
|
e35e4ddc9a | ||
|
c3c88a2664 | ||
|
bb0eee71e7 | ||
|
6f56389b88 | ||
|
5b333c1ce6 | ||
|
a825f33030 | ||
|
92f618f2e2 | ||
|
81ec7c7901 | ||
|
dd5d2eb03c | ||
|
4ae720042c | ||
|
c705320f48 | ||
|
d2d8f89531 | ||
|
bdde940e90 | ||
|
45f4a76dbc | ||
|
13dc64ce74 | ||
|
c35f9e72ce | ||
|
f8061589e6 | ||
|
0ca96d48c7 | ||
|
4ba146f35d | ||
|
edf3e38ebd | ||
|
c4417ddb61 | ||
|
4a2080e407 | ||
|
2f2ffea9ca | ||
|
ba552f542f | ||
|
8379969834 | ||
|
95dbd2f990 | ||
|
a7177865b1 | ||
|
e0df6211cc | ||
|
b00ca882a4 | ||
|
39baacc49f | ||
|
3a1d48d6de | ||
|
34308b30d6 | ||
|
bc1506f8c0 | ||
|
b61067fa4f | ||
|
69b227a9bc | ||
|
0fd49457f5 | ||
|
58f289d013 | ||
|
3d60bb96e1 | ||
|
38d025b3f0 | ||
|
c40c6aaaaa | ||
|
1a810f0d4e |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -24,3 +24,4 @@ updates_key.pem
|
|||||||
*.flv
|
*.flv
|
||||||
*.mp4
|
*.mp4
|
||||||
*.part
|
*.part
|
||||||
|
test/testdata
|
||||||
|
@@ -30,6 +30,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--extractor-descriptions Output descriptions of all supported extractors
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
--proxy URL Use the specified HTTP/HTTPS proxy
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation.
|
||||||
|
--cache-dir None Location in the filesystem where youtube-dl can
|
||||||
|
store downloaded information permanently.
|
||||||
|
~/.youtube-dl/cache by default
|
||||||
|
--no-cache-dir Disable filesystem caching
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
|
@@ -12,6 +12,9 @@ tests = [
|
|||||||
# 92 - vflQw-fB4 2013/07/17
|
# 92 - vflQw-fB4 2013/07/17
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
|
||||||
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
|
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
|
||||||
|
# 91 - vfl79wBKW 2013/07/20 (sporadic)
|
||||||
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
|
||||||
|
"/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
|
||||||
# 90
|
# 90
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||||
@@ -24,15 +27,15 @@ tests = [
|
|||||||
# 87
|
# 87
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||||
# 86 - vfluy6kdb 2013/09/06
|
# 86 - vflHql6Pr 2013/09/24
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||||
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
|
";}|[{=+-d)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYT_EWQ0987654321mnbvcxzas/fghjklpoiuytrewq"),
|
||||||
# 85 - vflkuzxcs 2013/09/11
|
# 85 - vflkuzxcs 2013/09/11
|
||||||
('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
|
('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
|
||||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
|
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
|
||||||
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
|
# 84 - vflHql6Pr 2013/09/24 (sporadic)
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||||
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
"}[{=+-_)g*&^%$#@!MNBVCXZASDFGHJKLPOIUYTRE(Q0987654321mnbvcxzasdf?hjklpoiuytrewq"),
|
||||||
# 83
|
# 83
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
@@ -8,7 +9,14 @@ import json
|
|||||||
import os
|
import os
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
|
from youtube_dl.extractor import (
|
||||||
|
DailymotionPlaylistIE,
|
||||||
|
DailymotionUserIE,
|
||||||
|
VimeoChannelIE,
|
||||||
|
UstreamChannelIE,
|
||||||
|
SoundcloudUserIE,
|
||||||
|
LivestreamIE,
|
||||||
|
)
|
||||||
from youtube_dl.utils import *
|
from youtube_dl.utils import *
|
||||||
|
|
||||||
from helper import FakeYDL
|
from helper import FakeYDL
|
||||||
@@ -26,6 +34,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], u'SPORT')
|
self.assertEqual(result['title'], u'SPORT')
|
||||||
self.assertTrue(len(result['entries']) > 20)
|
self.assertTrue(len(result['entries']) > 20)
|
||||||
|
|
||||||
|
def test_dailymotion_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = DailymotionUserIE(dl)
|
||||||
|
result = ie.extract('http://www.dailymotion.com/user/generation-quoi/')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Génération Quoi')
|
||||||
|
self.assertTrue(len(result['entries']) >= 26)
|
||||||
|
|
||||||
def test_vimeo_channel(self):
|
def test_vimeo_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = VimeoChannelIE(dl)
|
ie = VimeoChannelIE(dl)
|
||||||
@@ -50,5 +66,13 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], u'9615865')
|
self.assertEqual(result['id'], u'9615865')
|
||||||
self.assertTrue(len(result['entries']) >= 12)
|
self.assertTrue(len(result['entries']) >= 12)
|
||||||
|
|
||||||
|
def test_livestream_event(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = LivestreamIE(dl)
|
||||||
|
result = ie.extract('http://new.livestream.com/tedx/cityenglish')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'TEDCity2.0 (English)')
|
||||||
|
self.assertTrue(len(result['entries']) >= 4)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
80
test/test_youtube_signature.py
Normal file
80
test/test_youtube_signature.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||||
|
u'js',
|
||||||
|
86,
|
||||||
|
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||||
|
u'js',
|
||||||
|
85,
|
||||||
|
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
|
||||||
|
u'swf',
|
||||||
|
82,
|
||||||
|
u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestSignature(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||||
|
if not os.path.exists(self.TESTDATA_DIR):
|
||||||
|
os.mkdir(self.TESTDATA_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
def make_tfunc(url, stype, sig_length, expected_sig):
|
||||||
|
basename = url.rpartition('/')[2]
|
||||||
|
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
|
||||||
|
assert m, '%r should follow URL format' % basename
|
||||||
|
test_id = m.group(1)
|
||||||
|
|
||||||
|
def test_func(self):
|
||||||
|
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||||
|
|
||||||
|
if not os.path.exists(fn):
|
||||||
|
compat_urlretrieve(url, fn)
|
||||||
|
|
||||||
|
ie = YoutubeIE()
|
||||||
|
if stype == 'js':
|
||||||
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
|
jscode = testf.read()
|
||||||
|
func = ie._parse_sig_js(jscode)
|
||||||
|
else:
|
||||||
|
assert stype == 'swf'
|
||||||
|
with open(fn, 'rb') as testf:
|
||||||
|
swfcode = testf.read()
|
||||||
|
func = ie._parse_sig_swf(swfcode)
|
||||||
|
src_sig = compat_str(string.printable[:sig_length])
|
||||||
|
got_sig = func(src_sig)
|
||||||
|
self.assertEqual(got_sig, expected_sig)
|
||||||
|
|
||||||
|
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||||
|
setattr(TestSignature, test_func.__name__, test_func)
|
||||||
|
|
||||||
|
for test_spec in _TESTS:
|
||||||
|
make_tfunc(*test_spec)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@@ -77,26 +77,43 @@ class FileDownloader(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def calc_percent(byte_counter, data_len):
|
def calc_percent(byte_counter, data_len):
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
|
return None
|
||||||
|
return float(byte_counter) / float(data_len) * 100.0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_percent(percent):
|
||||||
|
if percent is None:
|
||||||
return '---.-%'
|
return '---.-%'
|
||||||
return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
|
return '%6s' % ('%3.1f%%' % percent)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calc_eta(start, now, total, current):
|
def calc_eta(start, now, total, current):
|
||||||
if total is None:
|
if total is None:
|
||||||
return '--:--'
|
return None
|
||||||
dif = now - start
|
dif = now - start
|
||||||
if current == 0 or dif < 0.001: # One millisecond
|
if current == 0 or dif < 0.001: # One millisecond
|
||||||
return '--:--'
|
return None
|
||||||
rate = float(current) / dif
|
rate = float(current) / dif
|
||||||
eta = int((float(total) - float(current)) / rate)
|
return int((float(total) - float(current)) / rate)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_eta(eta):
|
||||||
|
if eta is None:
|
||||||
|
return '--:--'
|
||||||
return FileDownloader.format_seconds(eta)
|
return FileDownloader.format_seconds(eta)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calc_speed(start, now, bytes):
|
def calc_speed(start, now, bytes):
|
||||||
dif = now - start
|
dif = now - start
|
||||||
if bytes == 0 or dif < 0.001: # One millisecond
|
if bytes == 0 or dif < 0.001: # One millisecond
|
||||||
|
return None
|
||||||
|
return float(bytes) / dif
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_speed(speed):
|
||||||
|
if speed is None:
|
||||||
return '%10s' % '---b/s'
|
return '%10s' % '---b/s'
|
||||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
|
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
@@ -205,11 +222,14 @@ class FileDownloader(object):
|
|||||||
"""Report destination filename."""
|
"""Report destination filename."""
|
||||||
self.to_screen(u'[download] Destination: ' + filename)
|
self.to_screen(u'[download] Destination: ' + filename)
|
||||||
|
|
||||||
def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
|
def report_progress(self, percent, data_len_str, speed, eta):
|
||||||
"""Report download progress."""
|
"""Report download progress."""
|
||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
return
|
return
|
||||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||||
|
eta_str = self.format_eta(eta)
|
||||||
|
percent_str = self.format_percent(percent)
|
||||||
|
speed_str = self.format_speed(speed)
|
||||||
if self.params.get('progress_with_newline', False):
|
if self.params.get('progress_with_newline', False):
|
||||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||||
(percent_str, data_len_str, speed_str, eta_str))
|
(percent_str, data_len_str, speed_str, eta_str))
|
||||||
@@ -378,6 +398,7 @@ class FileDownloader(object):
|
|||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -524,13 +545,14 @@ class FileDownloader(object):
|
|||||||
block_size = self.best_block_size(after - before, len(data_block))
|
block_size = self.best_block_size(after - before, len(data_block))
|
||||||
|
|
||||||
# Progress message
|
# Progress message
|
||||||
speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
|
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
|
||||||
|
eta = None
|
||||||
else:
|
else:
|
||||||
percent_str = self.calc_percent(byte_counter, data_len)
|
percent = self.calc_percent(byte_counter, data_len)
|
||||||
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
@@ -538,6 +560,8 @@ class FileDownloader(object):
|
|||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
|
||||||
# Apply rate limit
|
# Apply rate limit
|
||||||
@@ -580,6 +604,8 @@ class FileDownloader(object):
|
|||||||
* downloaded_bytes: Bytes on disks
|
* downloaded_bytes: Bytes on disks
|
||||||
* total_bytes: Total bytes, None if unknown
|
* total_bytes: Total bytes, None if unknown
|
||||||
* tmpfilename: The filename we're currently writing to
|
* tmpfilename: The filename we're currently writing to
|
||||||
|
* eta: The estimated time in seconds, None if unknown
|
||||||
|
* speed: The download speed in bytes/second, None if unknown
|
||||||
|
|
||||||
Hooks are guaranteed to be called at least once (with status "finished")
|
Hooks are guaranteed to be called at least once (with status "finished")
|
||||||
if the download is successful.
|
if the download is successful.
|
||||||
|
@@ -81,6 +81,8 @@ class YoutubeDL(object):
|
|||||||
keepvideo: Keep the video file after post-processing
|
keepvideo: Keep the video file after post-processing
|
||||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||||
skip_download: Skip the actual download of the video file
|
skip_download: Skip the actual download of the video file
|
||||||
|
cachedir: Location of the cache files in the filesystem.
|
||||||
|
None to disable filesystem cache.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@@ -104,6 +106,17 @@ class YoutubeDL(object):
|
|||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
|
|
||||||
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
|
and not params['restrictfilenames']):
|
||||||
|
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||||
|
self.report_warning(
|
||||||
|
u'Assuming --restrict-filenames isnce file system encoding '
|
||||||
|
u'cannot encode all charactes. '
|
||||||
|
u'Set the LC_ALL environment variable to fix this.')
|
||||||
|
params['restrictfilenames'] = True
|
||||||
|
|
||||||
self.params = params
|
self.params = params
|
||||||
self.fd = FileDownloader(self, self.params)
|
self.fd = FileDownloader(self, self.params)
|
||||||
|
|
||||||
@@ -544,11 +557,11 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
success = self.fd._do_download(filename, info_dict)
|
success = self.fd._do_download(filename, info_dict)
|
||||||
except (OSError, IOError) as err:
|
|
||||||
raise UnavailableVideoError(err)
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self.report_error(u'unable to download video data: %s' % str(err))
|
self.report_error(u'unable to download video data: %s' % str(err))
|
||||||
return
|
return
|
||||||
|
except (OSError, IOError) as err:
|
||||||
|
raise UnavailableVideoError(err)
|
||||||
except (ContentTooShortError, ) as err:
|
except (ContentTooShortError, ) as err:
|
||||||
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||||
return
|
return
|
||||||
|
@@ -167,6 +167,12 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='Output descriptions of all supported extractors', default=False)
|
help='Output descriptions of all supported extractors', default=False)
|
||||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
|
general.add_option(
|
||||||
|
'--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache',
|
||||||
|
help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default')
|
||||||
|
general.add_option(
|
||||||
|
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||||
|
help='Disable filesystem caching')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option('--playlist-start',
|
selection.add_option('--playlist-start',
|
||||||
@@ -272,6 +278,10 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--dump-intermediate-pages',
|
verbosity.add_option('--dump-intermediate-pages',
|
||||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||||
help='print downloaded pages to debug problems(very verbose)')
|
help='print downloaded pages to debug problems(very verbose)')
|
||||||
|
verbosity.add_option('--youtube-print-sig-code',
|
||||||
|
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||||
|
help=optparse.SUPPRESS_HELP)
|
||||||
|
|
||||||
|
|
||||||
filesystem.add_option('-t', '--title',
|
filesystem.add_option('-t', '--title',
|
||||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||||
@@ -551,7 +561,11 @@ def _real_main(argv=None):
|
|||||||
or (opts.useid and u'%(id)s.%(ext)s')
|
or (opts.useid and u'%(id)s.%(ext)s')
|
||||||
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
||||||
or u'%(title)s-%(id)s.%(ext)s')
|
or u'%(title)s-%(id)s.%(ext)s')
|
||||||
|
if '%(ext)s' not in outtmpl and opts.extractaudio:
|
||||||
|
parser.error(u'Cannot download a video and extract audio into the same'
|
||||||
|
u' file! Use "%%(ext)s" instead of %r' %
|
||||||
|
determine_ext(outtmpl, u''))
|
||||||
|
raise ValueError(repr(opts.cachedir))
|
||||||
# YoutubeDL
|
# YoutubeDL
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
@@ -609,6 +623,8 @@ def _real_main(argv=None):
|
|||||||
'min_filesize': opts.min_filesize,
|
'min_filesize': opts.min_filesize,
|
||||||
'max_filesize': opts.max_filesize,
|
'max_filesize': opts.max_filesize,
|
||||||
'daterange': date,
|
'daterange': date,
|
||||||
|
'cachedir': opts.cachedir,
|
||||||
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
})
|
})
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
|
@@ -18,12 +18,17 @@ from .comedycentral import ComedyCentralIE
|
|||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
|
from .dailymotion import (
|
||||||
|
DailymotionIE,
|
||||||
|
DailymotionPlaylistIE,
|
||||||
|
DailymotionUserIE,
|
||||||
|
)
|
||||||
from .daum import DaumIE
|
from .daum import DaumIE
|
||||||
from .depositfiles import DepositFilesIE
|
from .depositfiles import DepositFilesIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
@@ -37,6 +42,8 @@ from .flickr import FlickrIE
|
|||||||
from .francetv import (
|
from .francetv import (
|
||||||
PluzzIE,
|
PluzzIE,
|
||||||
FranceTvInfoIE,
|
FranceTvInfoIE,
|
||||||
|
France2IE,
|
||||||
|
GenerationQuoiIE
|
||||||
)
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
|
@@ -63,6 +63,9 @@ class DailymotionIE(SubtitlesInfoExtractor):
|
|||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info', flags=re.MULTILINE)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
|
if info.get('error') is not None:
|
||||||
|
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
# TODO: support choosing qualities
|
# TODO: support choosing qualities
|
||||||
|
|
||||||
@@ -111,28 +114,54 @@ class DailymotionIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class DailymotionPlaylistIE(InfoExtractor):
|
class DailymotionPlaylistIE(InfoExtractor):
|
||||||
|
IE_NAME = u'dailymotion:playlist'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
||||||
|
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_entries(self, id):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
video_ids = []
|
video_ids = []
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
|
webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum),
|
||||||
playlist_id, u'Downloading page %s' % pagenum)
|
id, u'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
||||||
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
|
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
break
|
break
|
||||||
|
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||||
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
|
||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
return {'_type': 'playlist',
|
return {'_type': 'playlist',
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': get_element_by_id(u'playlist_name', webpage),
|
'title': get_element_by_id(u'playlist_name', webpage),
|
||||||
'entries': entries,
|
'entries': self._extract_entries(playlist_id),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||||
|
IE_NAME = u'dailymotion:user'
|
||||||
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
|
||||||
|
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
|
||||||
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
user = mobj.group('user')
|
||||||
|
webpage = self._download_webpage(url, user)
|
||||||
|
full_user = self._html_search_regex(
|
||||||
|
r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
|
||||||
|
webpage, u'user', flags=re.DOTALL)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': user,
|
||||||
|
'title': full_user,
|
||||||
|
'entries': self._extract_entries(user),
|
||||||
|
}
|
||||||
|
37
youtube_dl/extractor/ebaumsworld.py
Normal file
37
youtube_dl/extractor/ebaumsworld.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
|
||||||
|
class EbaumsWorldIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||||
|
u'file': u'83367677.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'A Giant Python Opens The Door',
|
||||||
|
u'description': u'This is how nightmares start...',
|
||||||
|
u'uploader': u'jihadpizza',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
config_xml = self._download_webpage(
|
||||||
|
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||||
|
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||||
|
video_url = config.find('file').text
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': config.find('title').text,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url),
|
||||||
|
'description': config.find('description').text,
|
||||||
|
'thumbnail': config.find('image').text,
|
||||||
|
'uploader': config.find('username').text,
|
||||||
|
}
|
@@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_duration = int(video_data['video_duration'])
|
video_duration = int(video_data['video_duration'])
|
||||||
thumbnail = video_data['thumbnail_src']
|
thumbnail = video_data['thumbnail_src']
|
||||||
|
|
||||||
video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
|
video_title = self._html_search_regex(
|
||||||
webpage, u'title')
|
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -65,3 +66,52 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class France2IE(FranceTVBaseInfoExtractor):
|
||||||
|
IE_NAME = u'france2.fr'
|
||||||
|
_VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
|
u'file': u'75540104.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'13h15, le samedi...',
|
||||||
|
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
|
IE_NAME = u'http://generation-quoi.france2.fr'
|
||||||
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||||
|
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Génération Quoi - Garde à Vous',
|
||||||
|
u'uploader': u'Génération Quoi',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# It uses Dailymotion
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
name = mobj.group('name')
|
||||||
|
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
|
||||||
|
info_json = self._download_webpage(info_url, name)
|
||||||
|
info = json.loads(info_json)
|
||||||
|
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
||||||
|
ie='Dailymotion')
|
||||||
|
@@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
|
video_url = self._search_regex(
|
||||||
|
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||||
webpage, u'video URL', flags=re.DOTALL)
|
webpage, u'video URL', flags=re.DOTALL)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
|
@@ -2,7 +2,12 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import compat_urllib_parse_urlparse, compat_urlparse
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
|
get_meta_content,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
@@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor):
|
|||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
|
player = get_meta_content('twitter:player', webpage)
|
||||||
webpage, 'api url')
|
if player is None:
|
||||||
|
raise ExtractorError('Couldn\'t extract event api url')
|
||||||
|
api_url = player.replace('/player', '')
|
||||||
|
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
|
||||||
info = json.loads(self._download_webpage(api_url, event_name,
|
info = json.loads(self._download_webpage(api_url, event_name,
|
||||||
u'Downloading event info'))
|
u'Downloading event info'))
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
|
@@ -5,7 +5,7 @@ from .mtv import MTVIE, _media_xml_tag
|
|||||||
|
|
||||||
class SouthParkStudiosIE(MTVIE):
|
class SouthParkStudiosIE(MTVIE):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = u'southparkstudios.com'
|
||||||
_VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
@@ -23,7 +23,11 @@ class SouthParkStudiosIE(MTVIE):
|
|||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
return itemdoc.find(search_path).attrib['url']
|
thumb_node = itemdoc.find(search_path)
|
||||||
|
if thumb_node is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return thumb_node.attrib['url']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
try:
|
try:
|
||||||
config = json.loads(jsondata)
|
config = json.loads(jsondata)
|
||||||
|
error_code = config['data'][0].get('error_code')
|
||||||
|
if error_code:
|
||||||
|
# -8 means blocked outside China.
|
||||||
|
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||||
|
raise ExtractorError(error or u'Server reported error %i' % error_code,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
video_title = config['data'][0]['title']
|
video_title = config['data'][0]['title']
|
||||||
seed = config['data'][0]['seed']
|
seed = config['data'][0]['seed']
|
||||||
@@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor):
|
|||||||
|
|
||||||
fileid = config['data'][0]['streamfileids'][format]
|
fileid = config['data'][0]['streamfileids'][format]
|
||||||
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
||||||
|
# segs is usually a dictionary, but an empty *list* if an error occured.
|
||||||
except (UnicodeDecodeError, ValueError, KeyError):
|
except (UnicodeDecodeError, ValueError, KeyError):
|
||||||
raise ExtractorError(u'Unable to extract info section')
|
raise ExtractorError(u'Unable to extract info section')
|
||||||
|
|
||||||
|
@@ -1,15 +1,23 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import errno
|
||||||
|
import io
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import netrc
|
import os.path
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import itertools
|
import string
|
||||||
|
import struct
|
||||||
|
import traceback
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_chr,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
@@ -23,6 +31,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
write_json_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
@@ -352,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"upload_date": u"20120506",
|
u"upload_date": u"20120506",
|
||||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||||
u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
|
u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b",
|
||||||
u"uploader": u"Icona Pop",
|
u"uploader": u"Icona Pop",
|
||||||
u"uploader_id": u"IconaPop"
|
u"uploader_id": u"IconaPop"
|
||||||
}
|
}
|
||||||
@@ -393,6 +402,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if YoutubePlaylistIE.suitable(url): return False
|
if YoutubePlaylistIE.suitable(url): return False
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||||
|
self._player_cache = {}
|
||||||
|
|
||||||
def report_video_webpage_download(self, video_id):
|
def report_video_webpage_download(self, video_id):
|
||||||
"""Report attempt to download video webpage."""
|
"""Report attempt to download video webpage."""
|
||||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||||
@@ -413,13 +426,663 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
"""Indicate the download will use the RTMP protocol."""
|
"""Indicate the download will use the RTMP protocol."""
|
||||||
self.to_screen(u'RTMP download detected')
|
self.to_screen(u'RTMP download detected')
|
||||||
|
|
||||||
def _decrypt_signature(self, s):
|
def _extract_signature_function(self, video_id, player_url, slen):
|
||||||
|
id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
|
||||||
|
player_url)
|
||||||
|
player_type = id_m.group('ext')
|
||||||
|
player_id = id_m.group('id')
|
||||||
|
|
||||||
|
# Read from filesystem cache
|
||||||
|
func_id = '%s_%s_%d' % (player_type, player_id, slen)
|
||||||
|
assert os.path.basename(func_id) == func_id
|
||||||
|
cache_dir = self._downloader.params.get('cachedir',
|
||||||
|
u'~/.youtube-dl/cache')
|
||||||
|
|
||||||
|
cache_enabled = cache_dir is not None
|
||||||
|
if cache_enabled:
|
||||||
|
cache_fn = os.path.join(os.path.expanduser(cache_dir),
|
||||||
|
u'youtube-sigfuncs',
|
||||||
|
func_id + '.json')
|
||||||
|
try:
|
||||||
|
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||||
|
cache_spec = json.load(cachef)
|
||||||
|
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||||
|
except IOError:
|
||||||
|
pass # No cache available
|
||||||
|
|
||||||
|
if player_type == 'js':
|
||||||
|
code = self._download_webpage(
|
||||||
|
player_url, video_id,
|
||||||
|
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||||
|
errnote=u'Download of %s failed' % player_url)
|
||||||
|
res = self._parse_sig_js(code)
|
||||||
|
elif player_type == 'swf':
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
player_url, video_id,
|
||||||
|
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||||
|
errnote=u'Download of %s failed' % player_url)
|
||||||
|
code = urlh.read()
|
||||||
|
res = self._parse_sig_swf(code)
|
||||||
|
else:
|
||||||
|
assert False, 'Invalid player type %r' % player_type
|
||||||
|
|
||||||
|
if cache_enabled:
|
||||||
|
try:
|
||||||
|
test_string = u''.join(map(compat_chr, range(slen)))
|
||||||
|
cache_res = res(test_string)
|
||||||
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(cache_fn))
|
||||||
|
except OSError as ose:
|
||||||
|
if ose.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
write_json_file(cache_spec, cache_fn)
|
||||||
|
except Exception:
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
self._downloader.report_warning(
|
||||||
|
u'Writing cache to %r failed: %s' % (cache_fn, tb))
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _print_sig_code(self, func, slen):
|
||||||
|
def gen_sig_code(idxs):
|
||||||
|
def _genslice(start, end, step):
|
||||||
|
starts = u'' if start == 0 else str(start)
|
||||||
|
ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
|
||||||
|
steps = u'' if step == 1 else (u':%d' % step)
|
||||||
|
return u's[%s%s%s]' % (starts, ends, steps)
|
||||||
|
|
||||||
|
step = None
|
||||||
|
start = '(Never used)' # Quelch pyflakes warnings - start will be
|
||||||
|
# set as soon as step is set
|
||||||
|
for i, prev in zip(idxs[1:], idxs[:-1]):
|
||||||
|
if step is not None:
|
||||||
|
if i - prev == step:
|
||||||
|
continue
|
||||||
|
yield _genslice(start, prev, step)
|
||||||
|
step = None
|
||||||
|
continue
|
||||||
|
if i - prev in [-1, 1]:
|
||||||
|
step = i - prev
|
||||||
|
start = prev
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
yield u's[%d]' % prev
|
||||||
|
if step is None:
|
||||||
|
yield u's[%d]' % i
|
||||||
|
else:
|
||||||
|
yield _genslice(start, i, step)
|
||||||
|
|
||||||
|
test_string = u''.join(map(compat_chr, range(slen)))
|
||||||
|
cache_res = func(test_string)
|
||||||
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
|
expr_code = u' + '.join(gen_sig_code(cache_spec))
|
||||||
|
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
|
||||||
|
self.to_screen(u'Extracted signature function:\n' + code)
|
||||||
|
|
||||||
|
def _parse_sig_js(self, jscode):
|
||||||
|
funcname = self._search_regex(
|
||||||
|
r'signature=([a-zA-Z]+)', jscode,
|
||||||
|
u'Initial JS player signature function name')
|
||||||
|
|
||||||
|
functions = {}
|
||||||
|
|
||||||
|
def argidx(varname):
|
||||||
|
return string.lowercase.index(varname)
|
||||||
|
|
||||||
|
def interpret_statement(stmt, local_vars, allow_recursion=20):
|
||||||
|
if allow_recursion < 0:
|
||||||
|
raise ExtractorError(u'Recursion limit reached')
|
||||||
|
|
||||||
|
if stmt.startswith(u'var '):
|
||||||
|
stmt = stmt[len(u'var '):]
|
||||||
|
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
||||||
|
r'=(?P<expr>.*)$', stmt)
|
||||||
|
if ass_m:
|
||||||
|
if ass_m.groupdict().get('index'):
|
||||||
|
def assign(val):
|
||||||
|
lvar = local_vars[ass_m.group('out')]
|
||||||
|
idx = interpret_expression(ass_m.group('index'),
|
||||||
|
local_vars, allow_recursion)
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
lvar[idx] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
else:
|
||||||
|
def assign(val):
|
||||||
|
local_vars[ass_m.group('out')] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
elif stmt.startswith(u'return '):
|
||||||
|
assign = lambda v: v
|
||||||
|
expr = stmt[len(u'return '):]
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
u'Cannot determine left side of statement in %r' % stmt)
|
||||||
|
|
||||||
|
v = interpret_expression(expr, local_vars, allow_recursion)
|
||||||
|
return assign(v)
|
||||||
|
|
||||||
|
def interpret_expression(expr, local_vars, allow_recursion):
|
||||||
|
if expr.isdigit():
|
||||||
|
return int(expr)
|
||||||
|
|
||||||
|
if expr.isalpha():
|
||||||
|
return local_vars[expr]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
||||||
|
if m:
|
||||||
|
member = m.group('member')
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
if member == 'split("")':
|
||||||
|
return list(val)
|
||||||
|
if member == 'join("")':
|
||||||
|
return u''.join(val)
|
||||||
|
if member == 'length':
|
||||||
|
return len(val)
|
||||||
|
if member == 'reverse()':
|
||||||
|
return val[::-1]
|
||||||
|
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
||||||
|
if slice_m:
|
||||||
|
idx = interpret_expression(
|
||||||
|
slice_m.group('idx'), local_vars, allow_recursion-1)
|
||||||
|
return val[idx:]
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||||
|
if m:
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
idx = interpret_expression(m.group('idx'), local_vars,
|
||||||
|
allow_recursion-1)
|
||||||
|
return val[idx]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
||||||
|
if m:
|
||||||
|
a = interpret_expression(m.group('a'),
|
||||||
|
local_vars, allow_recursion)
|
||||||
|
b = interpret_expression(m.group('b'),
|
||||||
|
local_vars, allow_recursion)
|
||||||
|
return a % b
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||||
|
if m:
|
||||||
|
fname = m.group('func')
|
||||||
|
if fname not in functions:
|
||||||
|
functions[fname] = extract_function(fname)
|
||||||
|
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||||
|
for v in m.group('args').split(',')]
|
||||||
|
return functions[fname](argvals)
|
||||||
|
raise ExtractorError(u'Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
|
def extract_function(funcname):
|
||||||
|
func_m = re.search(
|
||||||
|
r'function ' + re.escape(funcname) +
|
||||||
|
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
||||||
|
jscode)
|
||||||
|
argnames = func_m.group('args').split(',')
|
||||||
|
|
||||||
|
def resf(args):
|
||||||
|
local_vars = dict(zip(argnames, args))
|
||||||
|
for stmt in func_m.group('code').split(';'):
|
||||||
|
res = interpret_statement(stmt, local_vars)
|
||||||
|
return res
|
||||||
|
return resf
|
||||||
|
|
||||||
|
initial_function = extract_function(funcname)
|
||||||
|
return lambda s: initial_function([s])
|
||||||
|
|
||||||
|
def _parse_sig_swf(self, file_contents):
|
||||||
|
if file_contents[1:3] != b'WS':
|
||||||
|
raise ExtractorError(
|
||||||
|
u'Not an SWF file; header is %r' % file_contents[:3])
|
||||||
|
if file_contents[:1] == b'C':
|
||||||
|
content = zlib.decompress(file_contents[8:])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(u'Unsupported compression format %r' %
|
||||||
|
file_contents[:1])
|
||||||
|
|
||||||
|
def extract_tags(content):
|
||||||
|
pos = 0
|
||||||
|
while pos < len(content):
|
||||||
|
header16 = struct.unpack('<H', content[pos:pos+2])[0]
|
||||||
|
pos += 2
|
||||||
|
tag_code = header16 >> 6
|
||||||
|
tag_len = header16 & 0x3f
|
||||||
|
if tag_len == 0x3f:
|
||||||
|
tag_len = struct.unpack('<I', content[pos:pos+4])[0]
|
||||||
|
pos += 4
|
||||||
|
assert pos+tag_len <= len(content)
|
||||||
|
yield (tag_code, content[pos:pos+tag_len])
|
||||||
|
pos += tag_len
|
||||||
|
|
||||||
|
code_tag = next(tag
|
||||||
|
for tag_code, tag in extract_tags(content)
|
||||||
|
if tag_code == 82)
|
||||||
|
p = code_tag.index(b'\0', 4) + 1
|
||||||
|
code_reader = io.BytesIO(code_tag[p:])
|
||||||
|
|
||||||
|
# Parse ABC (AVM2 ByteCode)
|
||||||
|
def read_int(reader=None):
|
||||||
|
if reader is None:
|
||||||
|
reader = code_reader
|
||||||
|
res = 0
|
||||||
|
shift = 0
|
||||||
|
for _ in range(5):
|
||||||
|
buf = reader.read(1)
|
||||||
|
assert len(buf) == 1
|
||||||
|
b = struct.unpack('<B', buf)[0]
|
||||||
|
res = res | ((b & 0x7f) << shift)
|
||||||
|
if b & 0x80 == 0:
|
||||||
|
break
|
||||||
|
shift += 7
|
||||||
|
return res
|
||||||
|
|
||||||
|
def u30(reader=None):
|
||||||
|
res = read_int(reader)
|
||||||
|
assert res & 0xf0000000 == 0
|
||||||
|
return res
|
||||||
|
u32 = read_int
|
||||||
|
|
||||||
|
def s32(reader=None):
|
||||||
|
v = read_int(reader)
|
||||||
|
if v & 0x80000000 != 0:
|
||||||
|
v = - ((v ^ 0xffffffff) + 1)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def read_string(reader=None):
|
||||||
|
if reader is None:
|
||||||
|
reader = code_reader
|
||||||
|
slen = u30(reader)
|
||||||
|
resb = reader.read(slen)
|
||||||
|
assert len(resb) == slen
|
||||||
|
return resb.decode('utf-8')
|
||||||
|
|
||||||
|
def read_bytes(count, reader=None):
|
||||||
|
if reader is None:
|
||||||
|
reader = code_reader
|
||||||
|
resb = reader.read(count)
|
||||||
|
assert len(resb) == count
|
||||||
|
return resb
|
||||||
|
|
||||||
|
def read_byte(reader=None):
|
||||||
|
resb = read_bytes(1, reader=reader)
|
||||||
|
res = struct.unpack('<B', resb)[0]
|
||||||
|
return res
|
||||||
|
|
||||||
|
# minor_version + major_version
|
||||||
|
read_bytes(2 + 2)
|
||||||
|
|
||||||
|
# Constant pool
|
||||||
|
int_count = u30()
|
||||||
|
for _c in range(1, int_count):
|
||||||
|
s32()
|
||||||
|
uint_count = u30()
|
||||||
|
for _c in range(1, uint_count):
|
||||||
|
u32()
|
||||||
|
double_count = u30()
|
||||||
|
read_bytes((double_count-1) * 8)
|
||||||
|
string_count = u30()
|
||||||
|
constant_strings = [u'']
|
||||||
|
for _c in range(1, string_count):
|
||||||
|
s = read_string()
|
||||||
|
constant_strings.append(s)
|
||||||
|
namespace_count = u30()
|
||||||
|
for _c in range(1, namespace_count):
|
||||||
|
read_bytes(1) # kind
|
||||||
|
u30() # name
|
||||||
|
ns_set_count = u30()
|
||||||
|
for _c in range(1, ns_set_count):
|
||||||
|
count = u30()
|
||||||
|
for _c2 in range(count):
|
||||||
|
u30()
|
||||||
|
multiname_count = u30()
|
||||||
|
MULTINAME_SIZES = {
|
||||||
|
0x07: 2, # QName
|
||||||
|
0x0d: 2, # QNameA
|
||||||
|
0x0f: 1, # RTQName
|
||||||
|
0x10: 1, # RTQNameA
|
||||||
|
0x11: 0, # RTQNameL
|
||||||
|
0x12: 0, # RTQNameLA
|
||||||
|
0x09: 2, # Multiname
|
||||||
|
0x0e: 2, # MultinameA
|
||||||
|
0x1b: 1, # MultinameL
|
||||||
|
0x1c: 1, # MultinameLA
|
||||||
|
}
|
||||||
|
multinames = [u'']
|
||||||
|
for _c in range(1, multiname_count):
|
||||||
|
kind = u30()
|
||||||
|
assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
|
||||||
|
if kind == 0x07:
|
||||||
|
u30() # namespace_idx
|
||||||
|
name_idx = u30()
|
||||||
|
multinames.append(constant_strings[name_idx])
|
||||||
|
else:
|
||||||
|
multinames.append('[MULTINAME kind: %d]' % kind)
|
||||||
|
for _c2 in range(MULTINAME_SIZES[kind]):
|
||||||
|
u30()
|
||||||
|
|
||||||
|
# Methods
|
||||||
|
method_count = u30()
|
||||||
|
MethodInfo = collections.namedtuple(
|
||||||
|
'MethodInfo',
|
||||||
|
['NEED_ARGUMENTS', 'NEED_REST'])
|
||||||
|
method_infos = []
|
||||||
|
for method_id in range(method_count):
|
||||||
|
param_count = u30()
|
||||||
|
u30() # return type
|
||||||
|
for _ in range(param_count):
|
||||||
|
u30() # param type
|
||||||
|
u30() # name index (always 0 for youtube)
|
||||||
|
flags = read_byte()
|
||||||
|
if flags & 0x08 != 0:
|
||||||
|
# Options present
|
||||||
|
option_count = u30()
|
||||||
|
for c in range(option_count):
|
||||||
|
u30() # val
|
||||||
|
read_bytes(1) # kind
|
||||||
|
if flags & 0x80 != 0:
|
||||||
|
# Param names present
|
||||||
|
for _ in range(param_count):
|
||||||
|
u30() # param name
|
||||||
|
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
|
||||||
|
method_infos.append(mi)
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
metadata_count = u30()
|
||||||
|
for _c in range(metadata_count):
|
||||||
|
u30() # name
|
||||||
|
item_count = u30()
|
||||||
|
for _c2 in range(item_count):
|
||||||
|
u30() # key
|
||||||
|
u30() # value
|
||||||
|
|
||||||
|
def parse_traits_info():
|
||||||
|
trait_name_idx = u30()
|
||||||
|
kind_full = read_byte()
|
||||||
|
kind = kind_full & 0x0f
|
||||||
|
attrs = kind_full >> 4
|
||||||
|
methods = {}
|
||||||
|
if kind in [0x00, 0x06]: # Slot or Const
|
||||||
|
u30() # Slot id
|
||||||
|
u30() # type_name_idx
|
||||||
|
vindex = u30()
|
||||||
|
if vindex != 0:
|
||||||
|
read_byte() # vkind
|
||||||
|
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
|
||||||
|
u30() # disp_id
|
||||||
|
method_idx = u30()
|
||||||
|
methods[multinames[trait_name_idx]] = method_idx
|
||||||
|
elif kind == 0x04: # Class
|
||||||
|
u30() # slot_id
|
||||||
|
u30() # classi
|
||||||
|
elif kind == 0x05: # Function
|
||||||
|
u30() # slot_id
|
||||||
|
function_idx = u30()
|
||||||
|
methods[function_idx] = multinames[trait_name_idx]
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'Unsupported trait kind %d' % kind)
|
||||||
|
|
||||||
|
if attrs & 0x4 != 0: # Metadata present
|
||||||
|
metadata_count = u30()
|
||||||
|
for _c3 in range(metadata_count):
|
||||||
|
u30() # metadata index
|
||||||
|
|
||||||
|
return methods
|
||||||
|
|
||||||
|
# Classes
|
||||||
|
TARGET_CLASSNAME = u'SignatureDecipher'
|
||||||
|
searched_idx = multinames.index(TARGET_CLASSNAME)
|
||||||
|
searched_class_id = None
|
||||||
|
class_count = u30()
|
||||||
|
for class_id in range(class_count):
|
||||||
|
name_idx = u30()
|
||||||
|
if name_idx == searched_idx:
|
||||||
|
# We found the class we're looking for!
|
||||||
|
searched_class_id = class_id
|
||||||
|
u30() # super_name idx
|
||||||
|
flags = read_byte()
|
||||||
|
if flags & 0x08 != 0: # Protected namespace is present
|
||||||
|
u30() # protected_ns_idx
|
||||||
|
intrf_count = u30()
|
||||||
|
for _c2 in range(intrf_count):
|
||||||
|
u30()
|
||||||
|
u30() # iinit
|
||||||
|
trait_count = u30()
|
||||||
|
for _c2 in range(trait_count):
|
||||||
|
parse_traits_info()
|
||||||
|
|
||||||
|
if searched_class_id is None:
|
||||||
|
raise ExtractorError(u'Target class %r not found' %
|
||||||
|
TARGET_CLASSNAME)
|
||||||
|
|
||||||
|
method_names = {}
|
||||||
|
method_idxs = {}
|
||||||
|
for class_id in range(class_count):
|
||||||
|
u30() # cinit
|
||||||
|
trait_count = u30()
|
||||||
|
for _c2 in range(trait_count):
|
||||||
|
trait_methods = parse_traits_info()
|
||||||
|
if class_id == searched_class_id:
|
||||||
|
method_names.update(trait_methods.items())
|
||||||
|
method_idxs.update(dict(
|
||||||
|
(idx, name)
|
||||||
|
for name, idx in trait_methods.items()))
|
||||||
|
|
||||||
|
# Scripts
|
||||||
|
script_count = u30()
|
||||||
|
for _c in range(script_count):
|
||||||
|
u30() # init
|
||||||
|
trait_count = u30()
|
||||||
|
for _c2 in range(trait_count):
|
||||||
|
parse_traits_info()
|
||||||
|
|
||||||
|
# Method bodies
|
||||||
|
method_body_count = u30()
|
||||||
|
Method = collections.namedtuple('Method', ['code', 'local_count'])
|
||||||
|
methods = {}
|
||||||
|
for _c in range(method_body_count):
|
||||||
|
method_idx = u30()
|
||||||
|
u30() # max_stack
|
||||||
|
local_count = u30()
|
||||||
|
u30() # init_scope_depth
|
||||||
|
u30() # max_scope_depth
|
||||||
|
code_length = u30()
|
||||||
|
code = read_bytes(code_length)
|
||||||
|
if method_idx in method_idxs:
|
||||||
|
m = Method(code, local_count)
|
||||||
|
methods[method_idxs[method_idx]] = m
|
||||||
|
exception_count = u30()
|
||||||
|
for _c2 in range(exception_count):
|
||||||
|
u30() # from
|
||||||
|
u30() # to
|
||||||
|
u30() # target
|
||||||
|
u30() # exc_type
|
||||||
|
u30() # var_name
|
||||||
|
trait_count = u30()
|
||||||
|
for _c2 in range(trait_count):
|
||||||
|
parse_traits_info()
|
||||||
|
|
||||||
|
assert p + code_reader.tell() == len(code_tag)
|
||||||
|
assert len(methods) == len(method_idxs)
|
||||||
|
|
||||||
|
method_pyfunctions = {}
|
||||||
|
|
||||||
|
def extract_function(func_name):
|
||||||
|
if func_name in method_pyfunctions:
|
||||||
|
return method_pyfunctions[func_name]
|
||||||
|
if func_name not in methods:
|
||||||
|
raise ExtractorError(u'Cannot find function %r' % func_name)
|
||||||
|
m = methods[func_name]
|
||||||
|
|
||||||
|
def resfunc(args):
|
||||||
|
registers = ['(this)'] + list(args) + [None] * m.local_count
|
||||||
|
stack = []
|
||||||
|
coder = io.BytesIO(m.code)
|
||||||
|
while True:
|
||||||
|
opcode = struct.unpack('!B', coder.read(1))[0]
|
||||||
|
if opcode == 36: # pushbyte
|
||||||
|
v = struct.unpack('!B', coder.read(1))[0]
|
||||||
|
stack.append(v)
|
||||||
|
elif opcode == 44: # pushstring
|
||||||
|
idx = u30(coder)
|
||||||
|
stack.append(constant_strings[idx])
|
||||||
|
elif opcode == 48: # pushscope
|
||||||
|
# We don't implement the scope register, so we'll just
|
||||||
|
# ignore the popped value
|
||||||
|
stack.pop()
|
||||||
|
elif opcode == 70: # callproperty
|
||||||
|
index = u30(coder)
|
||||||
|
mname = multinames[index]
|
||||||
|
arg_count = u30(coder)
|
||||||
|
args = list(reversed(
|
||||||
|
[stack.pop() for _ in range(arg_count)]))
|
||||||
|
obj = stack.pop()
|
||||||
|
if mname == u'split':
|
||||||
|
assert len(args) == 1
|
||||||
|
assert isinstance(args[0], compat_str)
|
||||||
|
assert isinstance(obj, compat_str)
|
||||||
|
if args[0] == u'':
|
||||||
|
res = list(obj)
|
||||||
|
else:
|
||||||
|
res = obj.split(args[0])
|
||||||
|
stack.append(res)
|
||||||
|
elif mname == u'slice':
|
||||||
|
assert len(args) == 1
|
||||||
|
assert isinstance(args[0], int)
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
res = obj[args[0]:]
|
||||||
|
stack.append(res)
|
||||||
|
elif mname == u'join':
|
||||||
|
assert len(args) == 1
|
||||||
|
assert isinstance(args[0], compat_str)
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
res = args[0].join(obj)
|
||||||
|
stack.append(res)
|
||||||
|
elif mname in method_pyfunctions:
|
||||||
|
stack.append(method_pyfunctions[mname](args))
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
u'Unsupported property %r on %r'
|
||||||
|
% (mname, obj))
|
||||||
|
elif opcode == 72: # returnvalue
|
||||||
|
res = stack.pop()
|
||||||
|
return res
|
||||||
|
elif opcode == 79: # callpropvoid
|
||||||
|
index = u30(coder)
|
||||||
|
mname = multinames[index]
|
||||||
|
arg_count = u30(coder)
|
||||||
|
args = list(reversed(
|
||||||
|
[stack.pop() for _ in range(arg_count)]))
|
||||||
|
obj = stack.pop()
|
||||||
|
if mname == u'reverse':
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
obj.reverse()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
u'Unsupported (void) property %r on %r'
|
||||||
|
% (mname, obj))
|
||||||
|
elif opcode == 93: # findpropstrict
|
||||||
|
index = u30(coder)
|
||||||
|
mname = multinames[index]
|
||||||
|
res = extract_function(mname)
|
||||||
|
stack.append(res)
|
||||||
|
elif opcode == 97: # setproperty
|
||||||
|
index = u30(coder)
|
||||||
|
value = stack.pop()
|
||||||
|
idx = stack.pop()
|
||||||
|
obj = stack.pop()
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
obj[idx] = value
|
||||||
|
elif opcode == 98: # getlocal
|
||||||
|
index = u30(coder)
|
||||||
|
stack.append(registers[index])
|
||||||
|
elif opcode == 99: # setlocal
|
||||||
|
index = u30(coder)
|
||||||
|
value = stack.pop()
|
||||||
|
registers[index] = value
|
||||||
|
elif opcode == 102: # getproperty
|
||||||
|
index = u30(coder)
|
||||||
|
pname = multinames[index]
|
||||||
|
if pname == u'length':
|
||||||
|
obj = stack.pop()
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
stack.append(len(obj))
|
||||||
|
else: # Assume attribute access
|
||||||
|
idx = stack.pop()
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
obj = stack.pop()
|
||||||
|
assert isinstance(obj, list)
|
||||||
|
stack.append(obj[idx])
|
||||||
|
elif opcode == 128: # coerce
|
||||||
|
u30(coder)
|
||||||
|
elif opcode == 133: # coerce_s
|
||||||
|
assert isinstance(stack[-1], (type(None), compat_str))
|
||||||
|
elif opcode == 164: # modulo
|
||||||
|
value2 = stack.pop()
|
||||||
|
value1 = stack.pop()
|
||||||
|
res = value1 % value2
|
||||||
|
stack.append(res)
|
||||||
|
elif opcode == 208: # getlocal_0
|
||||||
|
stack.append(registers[0])
|
||||||
|
elif opcode == 209: # getlocal_1
|
||||||
|
stack.append(registers[1])
|
||||||
|
elif opcode == 210: # getlocal_2
|
||||||
|
stack.append(registers[2])
|
||||||
|
elif opcode == 211: # getlocal_3
|
||||||
|
stack.append(registers[3])
|
||||||
|
elif opcode == 214: # setlocal_2
|
||||||
|
registers[2] = stack.pop()
|
||||||
|
elif opcode == 215: # setlocal_3
|
||||||
|
registers[3] = stack.pop()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
u'Unsupported opcode %d' % opcode)
|
||||||
|
|
||||||
|
method_pyfunctions[func_name] = resfunc
|
||||||
|
return resfunc
|
||||||
|
|
||||||
|
initial_function = extract_function(u'decipher')
|
||||||
|
return lambda s: initial_function([s])
|
||||||
|
|
||||||
|
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
|
||||||
"""Turn the encrypted s field into a working signature"""
|
"""Turn the encrypted s field into a working signature"""
|
||||||
|
|
||||||
|
if player_url is not None:
|
||||||
|
try:
|
||||||
|
if player_url not in self._player_cache:
|
||||||
|
func = self._extract_signature_function(
|
||||||
|
video_id, player_url, len(s)
|
||||||
|
)
|
||||||
|
self._player_cache[player_url] = func
|
||||||
|
func = self._player_cache[player_url]
|
||||||
|
if self._downloader.params.get('youtube_print_sig_code'):
|
||||||
|
self._print_sig_code(func, len(s))
|
||||||
|
return func(s)
|
||||||
|
except Exception:
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
self._downloader.report_warning(
|
||||||
|
u'Automatic signature extraction failed: ' + tb)
|
||||||
|
|
||||||
|
self._downloader.report_warning(
|
||||||
|
u'Warning: Falling back to static signature algorithm')
|
||||||
|
return self._static_decrypt_signature(
|
||||||
|
s, video_id, player_url, age_gate)
|
||||||
|
|
||||||
|
def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
|
||||||
|
if age_gate:
|
||||||
|
# The videos with age protection use another player, so the
|
||||||
|
# algorithms can be different.
|
||||||
|
if len(s) == 86:
|
||||||
|
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||||
|
|
||||||
if len(s) == 93:
|
if len(s) == 93:
|
||||||
return s[86:29:-1] + s[88] + s[28:5:-1]
|
return s[86:29:-1] + s[88] + s[28:5:-1]
|
||||||
elif len(s) == 92:
|
elif len(s) == 92:
|
||||||
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
||||||
|
elif len(s) == 91:
|
||||||
|
return s[84:27:-1] + s[86] + s[26:5:-1]
|
||||||
elif len(s) == 90:
|
elif len(s) == 90:
|
||||||
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
||||||
elif len(s) == 89:
|
elif len(s) == 89:
|
||||||
@@ -429,11 +1092,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
elif len(s) == 87:
|
elif len(s) == 87:
|
||||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||||
elif len(s) == 86:
|
elif len(s) == 86:
|
||||||
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
|
return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
|
||||||
elif len(s) == 85:
|
elif len(s) == 85:
|
||||||
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
|
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
|
||||||
elif len(s) == 84:
|
elif len(s) == 84:
|
||||||
return s[81:36:-1] + s[0] + s[35:2:-1]
|
return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
|
||||||
elif len(s) == 83:
|
elif len(s) == 83:
|
||||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||||
elif len(s) == 82:
|
elif len(s) == 82:
|
||||||
@@ -448,15 +1111,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
||||||
|
|
||||||
def _decrypt_signature_age_gate(self, s):
|
|
||||||
# The videos with age protection use another player, so the algorithms
|
|
||||||
# can be different.
|
|
||||||
if len(s) == 86:
|
|
||||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
|
||||||
else:
|
|
||||||
# Fallback to the other algortihms
|
|
||||||
return self._decrypt_signature(s)
|
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
@@ -629,7 +1283,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
|
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
# Attempt to extract SWF player URL
|
# Attempt to extract SWF player URL
|
||||||
mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
|
player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
|
||||||
else:
|
else:
|
||||||
@@ -782,21 +1436,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if 'sig' in url_data:
|
if 'sig' in url_data:
|
||||||
url += '&signature=' + url_data['sig'][0]
|
url += '&signature=' + url_data['sig'][0]
|
||||||
elif 's' in url_data:
|
elif 's' in url_data:
|
||||||
if self._downloader.params.get('verbose'):
|
|
||||||
s = url_data['s'][0]
|
|
||||||
if age_gate:
|
|
||||||
player = 'flash player'
|
|
||||||
else:
|
|
||||||
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
|
||||||
'html5 player', fatal=False)
|
|
||||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
|
|
||||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
|
||||||
(len(s), parts_sizes, url_data['itag'][0], player))
|
|
||||||
encrypted_sig = url_data['s'][0]
|
encrypted_sig = url_data['s'][0]
|
||||||
if age_gate:
|
if self._downloader.params.get('verbose'):
|
||||||
signature = self._decrypt_signature_age_gate(encrypted_sig)
|
if age_gate:
|
||||||
else:
|
if player_url is None:
|
||||||
signature = self._decrypt_signature(encrypted_sig)
|
player_version = 'unknown'
|
||||||
|
else:
|
||||||
|
player_version = self._search_regex(
|
||||||
|
r'-(.+)\.swf$', player_url,
|
||||||
|
u'flash player', fatal=False)
|
||||||
|
player_desc = 'flash player %s' % player_version
|
||||||
|
else:
|
||||||
|
player_version = self._search_regex(
|
||||||
|
r'html5player-(.+?)\.js', video_webpage,
|
||||||
|
'html5 player', fatal=False)
|
||||||
|
player_desc = u'html5 player %s' % player_version
|
||||||
|
|
||||||
|
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
|
||||||
|
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||||
|
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
|
||||||
|
|
||||||
|
if not age_gate:
|
||||||
|
jsplayer_url_json = self._search_regex(
|
||||||
|
r'"assets":.+?"js":\s*("[^"]+")',
|
||||||
|
video_webpage, u'JS player URL')
|
||||||
|
player_url = json.loads(jsplayer_url_json)
|
||||||
|
|
||||||
|
signature = self._decrypt_signature(
|
||||||
|
encrypted_sig, video_id, player_url, age_gate)
|
||||||
url += '&signature=' + signature
|
url += '&signature=' + signature
|
||||||
if 'ratebypass' not in url:
|
if 'ratebypass' not in url:
|
||||||
url += '&ratebypass=yes'
|
url += '&ratebypass=yes'
|
||||||
|
@@ -66,6 +66,12 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib2 import HTTPError as compat_HTTPError
|
from urllib2 import HTTPError as compat_HTTPError
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib.request import urlretrieve as compat_urlretrieve
|
||||||
|
except ImportError: # Python 2
|
||||||
|
from urllib import urlretrieve as compat_urlretrieve
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.09.20'
|
__version__ = '2013.09.24.1'
|
||||||
|
Reference in New Issue
Block a user