Compare commits
106 Commits
2013.12.23
...
2014.01.05
Author | SHA1 | Date | |
---|---|---|---|
|
a5f1e12a02 | ||
|
ca9e792253 | ||
|
aff24732b9 | ||
|
455fa214b6 | ||
|
a9c5e5ca6e | ||
|
cefcb9fde3 | ||
|
bca4e93076 | ||
|
67c20aebb7 | ||
|
448711e39f | ||
|
8bf48f237d | ||
|
7c0578dc86 | ||
|
55033ffb0a | ||
|
b4a9bf701a | ||
|
a015dce0e2 | ||
|
28ab2e48ae | ||
|
6febd1c1df | ||
|
6350728be2 | ||
|
a7c26e7338 | ||
|
c880557666 | ||
|
85689a531f | ||
|
cc14dfb8ec | ||
|
91d7d0b333 | ||
|
9887c9b2d6 | ||
|
d2fee313ec | ||
|
fa7f58e433 | ||
|
71cd2a571e | ||
|
7c094bfe2f | ||
|
0f30658329 | ||
|
31c1cf5a9d | ||
|
efa1739b74 | ||
|
5ffecde73f | ||
|
08d13955dd | ||
|
531147dd5e | ||
|
a17c95f5e4 | ||
|
eadaf08c16 | ||
|
4a9c9b6fdb | ||
|
b969ab48d9 | ||
|
8fa8a6299b | ||
|
b2b0870b3a | ||
|
4fb757d1e0 | ||
|
241bce7aaf | ||
|
33ec2ae8d9 | ||
|
c801b2051a | ||
|
7976fcac55 | ||
|
e9f9a10fba | ||
|
1cdfc31e1f | ||
|
19dab5e6cc | ||
|
c0f9969b9e | ||
|
a0ddb8a2fa | ||
|
c1d1facd06 | ||
|
b26559878f | ||
|
fd46a318a2 | ||
|
5d4f3985be | ||
|
360babf799 | ||
|
a1b92edbb3 | ||
|
12c978739a | ||
|
4bc60dafeb | ||
|
bf5b0a1bfb | ||
|
bfe9de8510 | ||
|
5ecd3c6a09 | ||
|
608d11f515 | ||
|
c7f8537dd9 | ||
|
723f839911 | ||
|
61224dbcdd | ||
|
c3afc93a69 | ||
|
7b8af56340 | ||
|
539179f45b | ||
|
7217e148fb | ||
|
d29b5e812b | ||
|
1e923b0d29 | ||
|
f7e9d77f34 | ||
|
41cc67c542 | ||
|
c645c7658d | ||
|
b874fe2da8 | ||
|
c7deaa4c74 | ||
|
e6812ac99d | ||
|
719d3927d7 | ||
|
55e663a8d7 | ||
|
2c62dc26c8 | ||
|
3d4a70b821 | ||
|
4bcc7bd1f2 | ||
|
f49d89ee04 | ||
|
dabc127362 | ||
|
c25c991809 | ||
|
f45f96f8f8 | ||
|
1538eff6d8 | ||
|
00b2685b9c | ||
|
8e3e03229e | ||
|
9d8d675e0e | ||
|
933605d7e8 | ||
|
b3d9ef88ec | ||
|
8958b6916c | ||
|
9fc3bef87a | ||
|
d80044c235 | ||
|
bc2103f3bf | ||
|
f82b18efc1 | ||
|
504c668d3b | ||
|
466617f539 | ||
|
196938835a | ||
|
62d68c43ed | ||
|
bfaae0a768 | ||
|
e56f22ae20 | ||
|
dbd1988ed9 | ||
|
4ea3be0a5c | ||
|
3bc2ddccc8 | ||
|
8ab470f1b2 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -23,6 +23,8 @@ updates_key.pem
|
||||
*.vtt
|
||||
*.flv
|
||||
*.mp4
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.part
|
||||
test/testdata
|
||||
.tox
|
||||
|
10
README.md
10
README.md
@@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
empty string (--proxy "") for direct connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can
|
||||
store downloaded information permanently. By
|
||||
store some downloaded information permanently. By
|
||||
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
||||
/youtube-dl .
|
||||
/youtube-dl . At the moment, only YouTube player
|
||||
files (for videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--bidi-workaround Work around terminals that lack bidirectional
|
||||
text support. Requires bidiv or fribidi
|
||||
@@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
4
setup.py
4
setup.py
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import print_function
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import pkg_resources
|
||||
import sys
|
||||
@@ -71,7 +71,7 @@ setup(
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Philipp Hagemeister',
|
||||
maintainer_email='phihag@phihag.de',
|
||||
packages=['youtube_dl', 'youtube_dl.extractor'],
|
||||
packages=['youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader'],
|
||||
|
||||
# Provokes warning on most systems (why?!)
|
||||
# test_suite = 'nose.collector',
|
||||
|
@@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
|
||||
class YDL(FakeYDL):
|
||||
@@ -33,6 +34,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
{u'ext': u'mp4', u'height': 460},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'webm')
|
||||
@@ -45,28 +48,46 @@ class TestFormatSelection(unittest.TestCase):
|
||||
{u'ext': u'mp4', u'height': 1080},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||
|
||||
# No prefer_free_formats => keep original formats order
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{u'ext': u'mp4', u'height': 720},
|
||||
{u'ext': u'flv', u'height': 720},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{u'ext': u'flv', u'height': 720},
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'flv')
|
||||
|
||||
def test_format_limit(self):
|
||||
formats = [
|
||||
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
|
||||
{u'format_id': u'good', u'url': u'http://example.com/good'},
|
||||
{u'format_id': u'great', u'url': u'http://example.com/great'},
|
||||
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
|
||||
{u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1},
|
||||
{u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2},
|
||||
{u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3},
|
||||
{u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},
|
||||
]
|
||||
info_dict = {
|
||||
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||
@@ -78,12 +99,12 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
ydl = YDL({'format_limit': 'good'})
|
||||
assert ydl.params['format_limit'] == 'good'
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'good')
|
||||
|
||||
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
|
||||
@@ -91,44 +112,80 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['format_limit'] = 'excellent'
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{u'format_id': u'35', u'ext': u'mp4'},
|
||||
{u'format_id': u'45', u'ext': u'webm'},
|
||||
{u'format_id': u'47', u'ext': u'webm'},
|
||||
{u'format_id': u'2', u'ext': u'flv'},
|
||||
{u'format_id': u'35', u'ext': u'mp4', 'preference': 1},
|
||||
{u'format_id': u'45', u'ext': u'webm', 'preference': 2},
|
||||
{u'format_id': u'47', u'ext': u'webm', 'preference': 3},
|
||||
{u'format_id': u'2', u'ext': u'flv', 'preference': 4},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
|
||||
ydl = YDL({'format': u'20/47'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
|
||||
ydl = YDL({'format': u'20/71/worst'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'2')
|
||||
|
||||
ydl = YDL({'format': u'webm/mp4'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
|
||||
ydl = YDL({'format': u'3gp/40/mp4'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '139', '171',
|
||||
]
|
||||
|
||||
for f1id, f2id in zip(order, order[1:]):
|
||||
f1 = YoutubeIE._formats[f1id].copy()
|
||||
f1['format_id'] = f1id
|
||||
f2 = YoutubeIE._formats[f2id].copy()
|
||||
f2['format_id'] = f2id
|
||||
|
||||
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
|
||||
ydl = YDL()
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
|
||||
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
|
||||
ydl = YDL()
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
|
||||
def test_add_extra_info(self):
|
||||
test_dict = {
|
||||
'extractor': 'Foo',
|
||||
|
@@ -90,7 +90,7 @@ def generator(test_case):
|
||||
def _hook(status):
|
||||
if status['status'] == 'finished':
|
||||
finished_hook_called.add(status['filename'])
|
||||
ydl.fd.add_progress_hook(_hook)
|
||||
ydl.add_progress_hook(_hook)
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
@@ -28,7 +28,8 @@ from youtube_dl.extractor import (
|
||||
BandcampAlbumIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE,
|
||||
IviCompilationIE
|
||||
IviCompilationIE,
|
||||
ImdbListIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
|
||||
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 20)
|
||||
|
||||
def test_imdb_list(self):
|
||||
dl = FakeYDL()
|
||||
ie = ImdbListIE(dl)
|
||||
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'sMjedvGDd8U')
|
||||
self.assertEqual(result['title'], u'Animated and Family Films')
|
||||
self.assertTrue(len(result['entries']) >= 48)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -36,10 +36,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
|
40
test/test_unicode_literals.py
Normal file
40
test/test_unicode_literals.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import unittest
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
print('Skipping this test (not yet fully implemented)')
|
||||
return
|
||||
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
for basename in filenames:
|
||||
if not basename.endswith('.py'):
|
||||
continue
|
||||
fn = os.path.join(dirpath, basename)
|
||||
with io.open(fn, encoding='utf-8') as inf:
|
||||
code = inf.read()
|
||||
|
||||
if "'" not in code and '"' not in code:
|
||||
continue
|
||||
imps = 'from __future__ import unicode_literals'
|
||||
self.assertTrue(
|
||||
imps in code,
|
||||
' %s missing in %s' % (imps, fn))
|
||||
|
||||
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
|
||||
if m is not None:
|
||||
self.assertTrue(
|
||||
m is None,
|
||||
'u present in %s, around %s' % (
|
||||
fn, code[m.start() - 10:m.end() + 10]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -18,6 +18,7 @@ from youtube_dl.utils import (
|
||||
find_xpath_attr,
|
||||
get_meta_content,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
@@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase):
|
||||
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
u'trailer.mp4')
|
||||
|
||||
def test_parse_duration(self):
|
||||
self.assertEqual(parse_duration(None), None)
|
||||
self.assertEqual(parse_duration('1'), 1)
|
||||
self.assertEqual(parse_duration('1337:12'), 80232)
|
||||
self.assertEqual(parse_duration('9:12:43'), 33163)
|
||||
self.assertEqual(parse_duration('x:y'), None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,724 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
ContentTooShortError,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
sanitize_open,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
"""
|
||||
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
return '%02d:%02d' % (mins, secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (hours, mins, secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
if data_len is None:
|
||||
return None
|
||||
return float(byte_counter) / float(data_len) * 100.0
|
||||
|
||||
@staticmethod
|
||||
def format_percent(percent):
|
||||
if percent is None:
|
||||
return '---.-%'
|
||||
return '%6s' % ('%3.1f%%' % percent)
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def format_eta(eta):
|
||||
if eta is None:
|
||||
return '--:--'
|
||||
return FileDownloader.format_seconds(eta)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
dif = now - start
|
||||
if bytes == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
return float(bytes) / dif
|
||||
|
||||
@staticmethod
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
||||
if elapsed_time < 0.001:
|
||||
return int(new_max)
|
||||
rate = bytes / elapsed_time
|
||||
if rate > new_max:
|
||||
return int(new_max)
|
||||
if rate < new_min:
|
||||
return int(new_min)
|
||||
return int(rate)
|
||||
|
||||
@staticmethod
|
||||
def parse_bytes(bytestr):
|
||||
"""Parse a string indicating a byte quantity into an integer."""
|
||||
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
||||
if matchobj is None:
|
||||
return None
|
||||
number = float(matchobj.group(1))
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == u'-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + u'.part'
|
||||
|
||||
def undo_temp_name(self, filename):
|
||||
if filename.endswith(u'.part'):
|
||||
return filename[:-len(u'.part')]
|
||||
return filename
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError):
|
||||
self.report_error(u'unable to rename file')
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
return
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_with_mplayer(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_m3u8_with_ffmpeg(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
||||
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
|
||||
from .downloader import FileDownloader as RealFileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
|
||||
|
||||
# This class reproduces the old behaviour of FileDownloader
|
||||
class FileDownloader(RealFileDownloader):
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
|
||||
# Check file already present
|
||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
return True
|
||||
|
||||
# Attempt to download using rtmpdump
|
||||
if url.startswith('rtmp'):
|
||||
return self._download_with_rtmpdump(filename, url,
|
||||
info_dict.get('player_url', None),
|
||||
info_dict.get('page_url', None),
|
||||
info_dict.get('play_path', None),
|
||||
info_dict.get('tc_url', None),
|
||||
info_dict.get('rtmp_live', False),
|
||||
info_dict.get('rtmp_conn', None))
|
||||
|
||||
# Attempt to download using mplayer
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
# m3u8 manifest are downloaded with ffmpeg
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return self._download_m3u8_with_ffmpeg(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
if self.params.get('test', False):
|
||||
request.add_header('Range','bytes=0-10240')
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', False):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range','bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
if count == 0 and 'urlhandle' in info_dict:
|
||||
data = info_dict['urlhandle']
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
# changing the file size slightly and causing problems for some users. So
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
while True:
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size)
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# Open file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
def _hook_progress(self, status):
|
||||
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
""" ph gets called on download progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
It can also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disks
|
||||
* total_bytes: Total bytes, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if unknown
|
||||
|
||||
Hooks are guaranteed to be called at least once (with status "finished")
|
||||
if the download is successful.
|
||||
"""
|
||||
self._progress_hooks.append(ph)
|
||||
real_fd.add_progress_hook(ph)
|
||||
return real_fd.download(filename, info_dict)
|
||||
|
@@ -10,6 +10,7 @@ from .utils import (
|
||||
PostProcessingError,
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
prepend_extension,
|
||||
)
|
||||
|
||||
|
||||
@@ -84,10 +85,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend(['-i', encodeFilename(path)])
|
||||
files_cmd.extend(['-i', encodeFilename(path, True)])
|
||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
|
||||
+ opts +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
|
||||
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||
@@ -120,7 +121,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']:
|
||||
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
|
||||
cmd = [
|
||||
self._exes['avprobe'] or self._exes['ffprobe'],
|
||||
'-show_streams',
|
||||
encodeFilename(self._ffmpeg_filename_argument(path), True)]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
output = handle.communicate()[0]
|
||||
if handle.wait() != 0:
|
||||
@@ -496,16 +500,22 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
return True, info
|
||||
|
||||
filename = info['filepath']
|
||||
ext = os.path.splitext(filename)[1][1:]
|
||||
temp_filename = filename + u'.temp'
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
|
||||
options = ['-c', 'copy']
|
||||
for (name, value) in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
options.extend(['-f', ext])
|
||||
|
||||
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return True, info
|
||||
|
||||
|
||||
class FFmpegMergerPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
filename = info['filepath']
|
||||
args = ['-c', 'copy']
|
||||
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
|
||||
return True, info
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import errno
|
||||
@@ -51,9 +51,11 @@ from .utils import (
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
)
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
from .PostProcessor import FFmpegMergerPP
|
||||
from .version import __version__
|
||||
|
||||
|
||||
@@ -148,6 +150,7 @@ class YoutubeDL(object):
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
debug_printtraffic:Print out sent and received HTTP traffic
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@@ -164,6 +167,8 @@ class YoutubeDL(object):
|
||||
|
||||
def __init__(self, params=None):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
if params is None:
|
||||
params = {}
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
self._pps = []
|
||||
@@ -172,7 +177,7 @@ class YoutubeDL(object):
|
||||
self._num_downloads = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self._err_file = sys.stderr
|
||||
self.params = {} if params is None else params
|
||||
self.params = params
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
@@ -192,13 +197,12 @@ class YoutubeDL(object):
|
||||
['bidiv'] + width_args, **sp_kwargs
|
||||
)
|
||||
except OSError:
|
||||
print('Falling back to fribidi')
|
||||
self._output_process = subprocess.Popen(
|
||||
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
||||
self._output_channel = os.fdopen(master, 'rb')
|
||||
except OSError as ose:
|
||||
if ose.errno == 2:
|
||||
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||
else:
|
||||
raise
|
||||
|
||||
@@ -207,15 +211,13 @@ class YoutubeDL(object):
|
||||
and not params['restrictfilenames']):
|
||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||
self.report_warning(
|
||||
u'Assuming --restrict-filenames since file system encoding '
|
||||
u'cannot encode all charactes. '
|
||||
u'Set the LC_ALL environment variable to fix this.')
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
'cannot encode all charactes. '
|
||||
'Set the LC_ALL environment variable to fix this.')
|
||||
self.params['restrictfilenames'] = True
|
||||
|
||||
self.fd = FileDownloader(self, self.params)
|
||||
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
@@ -249,19 +251,22 @@ class YoutubeDL(object):
|
||||
self._pps.append(pp)
|
||||
pp.set_downloader(self)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
"""Add the progress hook (currently only for the file downloader)"""
|
||||
self._progress_hooks.append(ph)
|
||||
|
||||
def _bidi_workaround(self, message):
|
||||
if not hasattr(self, '_output_channel'):
|
||||
print('WORKAROUND NOT ENABLED')
|
||||
return message
|
||||
|
||||
assert hasattr(self, '_output_process')
|
||||
assert type(message) == type(u'')
|
||||
line_count = message.count(u'\n') + 1
|
||||
self._output_process.stdin.write((message + u'\n').encode('utf-8'))
|
||||
assert type(message) == type('')
|
||||
line_count = message.count('\n') + 1
|
||||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
res = u''.join(self._output_channel.readline().decode('utf-8')
|
||||
res = ''.join(self._output_channel.readline().decode('utf-8')
|
||||
for _ in range(line_count))
|
||||
return res[:-len(u'\n')]
|
||||
return res[:-len('\n')]
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
@@ -273,19 +278,19 @@ class YoutubeDL(object):
|
||||
self.params['logger'].debug(message)
|
||||
elif not check_quiet or not self.params.get('quiet', False):
|
||||
message = self._bidi_workaround(message)
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
terminator = ['\n', ''][skip_eol]
|
||||
output = message + terminator
|
||||
|
||||
write_string(output, self._screen_file)
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type(u'')
|
||||
assert type(message) == type('')
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
message = self._bidi_workaround(message)
|
||||
output = message + u'\n'
|
||||
output = message + '\n'
|
||||
write_string(output, self._err_file)
|
||||
|
||||
def to_console_title(self, message):
|
||||
@@ -296,21 +301,21 @@ class YoutubeDL(object):
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
write_string(u'\033]0;%s\007' % message, self._screen_file)
|
||||
write_string('\033]0;%s\007' % message, self._screen_file)
|
||||
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
write_string(u'\033[22;0t', self._screen_file)
|
||||
write_string('\033[22;0t', self._screen_file)
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
write_string(u'\033[23;0t', self._screen_file)
|
||||
write_string('\033[23;0t', self._screen_file)
|
||||
|
||||
def __enter__(self):
|
||||
self.save_console_title()
|
||||
@@ -336,13 +341,13 @@ class YoutubeDL(object):
|
||||
if self.params.get('verbose'):
|
||||
if tb is None:
|
||||
if sys.exc_info()[0]: # if .trouble has been called from an except block
|
||||
tb = u''
|
||||
tb = ''
|
||||
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||
tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
|
||||
tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
|
||||
tb += compat_str(traceback.format_exc())
|
||||
else:
|
||||
tb_data = traceback.format_list(traceback.extract_stack())
|
||||
tb = u''.join(tb_data)
|
||||
tb = ''.join(tb_data)
|
||||
self.to_stderr(tb)
|
||||
if not self.params.get('ignoreerrors', False):
|
||||
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||
@@ -358,10 +363,10 @@ class YoutubeDL(object):
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = u'WARNING:'
|
||||
warning_message = u'%s %s' % (_msg_header, message)
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
@@ -370,18 +375,18 @@ class YoutubeDL(object):
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;31mERROR:\033[0m'
|
||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||
else:
|
||||
_msg_header = u'ERROR:'
|
||||
error_message = u'%s %s' % (_msg_header, message)
|
||||
_msg_header = 'ERROR:'
|
||||
error_message = '%s %s' % (_msg_header, message)
|
||||
self.trouble(error_message, tb)
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
self.to_screen('[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
self.to_screen('[download] The file has already been downloaded')
|
||||
|
||||
def increment_downloads(self):
|
||||
"""Increment the ordinal that assigns a number to each file."""
|
||||
@@ -396,61 +401,61 @@ class YoutubeDL(object):
|
||||
autonumber_size = self.params.get('autonumber_size')
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k == u'id'))
|
||||
is_id=(k == 'id'))
|
||||
template_dict = dict((k, sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: u'NA', template_dict)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
tmpl = os.path.expanduser(self.params['outtmpl'])
|
||||
filename = tmpl % template_dict
|
||||
return filename
|
||||
except ValueError as err:
|
||||
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
video_title = info_dict.get('title', info_dict.get('id', u'video'))
|
||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||
if 'title' in info_dict:
|
||||
# This can happen when we're just evaluating the playlist
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
return '"' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
date = info_dict.get('upload_date', None)
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return u'%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
view_count = info_dict.get('view_count', None)
|
||||
if view_count is not None:
|
||||
min_views = self.params.get('min_views')
|
||||
if min_views is not None and view_count < min_views:
|
||||
return u'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
||||
return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
|
||||
max_views = self.params.get('max_views')
|
||||
if max_views is not None and view_count > max_views:
|
||||
return u'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
age_limit = self.params.get('age_limit')
|
||||
if age_limit is not None:
|
||||
if age_limit < info_dict.get('age_limit', 0):
|
||||
return u'Skipping "' + title + '" because it is age restricted'
|
||||
return 'Skipping "' + title + '" because it is age restricted'
|
||||
if self.in_download_archive(info_dict):
|
||||
return u'%s has already been recorded in archive' % video_title
|
||||
return '%s has already been recorded in archive' % video_title
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@@ -477,8 +482,8 @@ class YoutubeDL(object):
|
||||
continue
|
||||
|
||||
if not ie.working():
|
||||
self.report_warning(u'The program functionality for this site has been marked as broken, '
|
||||
u'and will probably not work.')
|
||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||
'and will probably not work.')
|
||||
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
@@ -511,7 +516,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self.report_error(u'no suitable InfoExtractor: %s' % url)
|
||||
self.report_error('no suitable InfoExtractor: %s' % url)
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
@@ -542,7 +547,7 @@ class YoutubeDL(object):
|
||||
def make_result(embedded_info):
|
||||
new_result = ie_result.copy()
|
||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||
'entries', 'ie_key', 'duration',
|
||||
'subtitles', 'annotations', 'format',
|
||||
'thumbnail', 'thumbnails'):
|
||||
if f in new_result:
|
||||
@@ -562,7 +567,7 @@ class YoutubeDL(object):
|
||||
elif result_type == 'playlist':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
@@ -577,11 +582,11 @@ class YoutubeDL(object):
|
||||
n_entries = len(entries)
|
||||
|
||||
self.to_screen(
|
||||
u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
extra = {
|
||||
'playlist': playlist,
|
||||
'playlist_index': i + playliststart,
|
||||
@@ -593,7 +598,7 @@ class YoutubeDL(object):
|
||||
|
||||
reason = self._match_entry(entry)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
@@ -626,7 +631,7 @@ class YoutubeDL(object):
|
||||
elif format_spec == 'worst':
|
||||
return available_formats[0]
|
||||
else:
|
||||
extensions = [u'mp4', u'flv', u'webm', u'3gp']
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
@@ -645,7 +650,7 @@ class YoutubeDL(object):
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in [u'youtube', u'Youku']:
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
self.process_info(info_dict)
|
||||
return info_dict
|
||||
@@ -662,33 +667,32 @@ class YoutubeDL(object):
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
if format.get('format') is None:
|
||||
format['format'] = u'{id} - {res}{note}'.format(
|
||||
format['format'] = '{id} - {res}{note}'.format(
|
||||
id=format['format_id'],
|
||||
res=self.format_resolution(format),
|
||||
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
|
||||
if self.params.get('listformats', None):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
formats = list(takewhile_inclusive(
|
||||
lambda f: f['format_id'] != format_limit, formats
|
||||
))
|
||||
if self.params.get('prefer_free_formats'):
|
||||
def _free_formats_key(f):
|
||||
try:
|
||||
ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
|
||||
except ValueError:
|
||||
ext_ord = -1
|
||||
# We only compare the extension if they have the same height and width
|
||||
return (f.get('height'), f.get('width'), ext_ord)
|
||||
formats = sorted(formats, key=_free_formats_key)
|
||||
|
||||
# TODO Central sorting goes here
|
||||
|
||||
if formats[0] is not info_dict:
|
||||
# only set the 'formats' fields if the original info_dict list them
|
||||
# otherwise we end up with a circular reference, the first (and unique)
|
||||
# element in the 'formats' field in info_dict is info_dict itself,
|
||||
# wich can't be exported to json
|
||||
info_dict['formats'] = formats
|
||||
if self.params.get('listformats', None):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
req_format = self.params.get('format', 'best')
|
||||
if req_format is None:
|
||||
@@ -698,21 +702,35 @@ class YoutubeDL(object):
|
||||
if req_format in ('-1', 'all'):
|
||||
formats_to_download = formats
|
||||
else:
|
||||
# We can accept formats requestd in the format: 34/5/best, we pick
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = req_format.split('/')
|
||||
for rf in req_formats:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download = [selected_format]
|
||||
break
|
||||
if not formats_to_download:
|
||||
raise ExtractorError(u'requested format not available',
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
|
||||
if download:
|
||||
if len(formats_to_download) > 1:
|
||||
self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||
for format in formats_to_download:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(format)
|
||||
@@ -730,7 +748,7 @@ class YoutubeDL(object):
|
||||
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + u'...'
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
@@ -740,7 +758,7 @@ class YoutubeDL(object):
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
self.to_screen('[download] ' + reason)
|
||||
return
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
@@ -757,7 +775,7 @@ class YoutubeDL(object):
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
@@ -784,37 +802,37 @@ class YoutubeDL(object):
|
||||
if dn != '' and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to create directory ' + compat_str(err))
|
||||
self.report_error('unable to create directory ' + compat_str(err))
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = filename + u'.description'
|
||||
descfn = filename + '.description'
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen(u'[info] Video description is already present')
|
||||
self.to_screen('[info] Video description is already present')
|
||||
else:
|
||||
try:
|
||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(info_dict['description'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning(u'There\'s no description to write.')
|
||||
self.report_warning('There\'s no description to write.')
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
self.report_error('Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeannotations', False):
|
||||
annofn = filename + u'.annotations.xml'
|
||||
annofn = filename + '.annotations.xml'
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||
self.to_screen(u'[info] Video annotations are already present')
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
else:
|
||||
try:
|
||||
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||
annofile.write(info_dict['annotations'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning(u'There are no annotations to write.')
|
||||
self.report_warning('There are no annotations to write.')
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write annotations file: ' + annofn)
|
||||
self.report_error('Cannot write annotations file: ' + annofn)
|
||||
return
|
||||
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
@@ -832,46 +850,45 @@ class YoutubeDL(object):
|
||||
try:
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen(u'[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
else:
|
||||
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
self.report_error('Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||
infofn = os.path.splitext(filename)[0] + '.info.json'
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen(u'[info] Video description metadata is already present')
|
||||
self.to_screen('[info] Video description metadata is already present')
|
||||
else:
|
||||
self.to_screen(u'[info] Writing video description metadata as JSON to: ' + infofn)
|
||||
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||
write_json_file(info_dict, encodeFilename(infofn))
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
||||
return
|
||||
|
||||
if self.params.get('writethumbnail', False):
|
||||
if info_dict.get('thumbnail') is not None:
|
||||
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
||||
thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format
|
||||
thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
|
||||
thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen(u'[%s] %s: Thumbnail is already present' %
|
||||
self.to_screen('[%s] %s: Thumbnail is already present' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
else:
|
||||
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
try:
|
||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_warning(u'Unable to download thumbnail "%s": %s' %
|
||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||
(info_dict['thumbnail'], compat_str(err)))
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
@@ -879,21 +896,41 @@ class YoutubeDL(object):
|
||||
success = True
|
||||
else:
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
def dl(name, info):
|
||||
fd = get_suitable_downloader(info)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.download(name, info)
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
success = True
|
||||
for f in info_dict['requested_formats']:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(f)
|
||||
fname = self.prepare_filename(new_info)
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'])
|
||||
downloaded.append(fname)
|
||||
partial_success = dl(fname, new_info)
|
||||
success = success and partial_success
|
||||
info_dict['__postprocessors'] = [FFmpegMergerPP(self)]
|
||||
info_dict['__files_to_merge'] = downloaded
|
||||
else:
|
||||
# Just a single file
|
||||
success = dl(filename, info_dict)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
self.report_error('unable to download video data: %s' % str(err))
|
||||
return
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError(err)
|
||||
except (ContentTooShortError, ) as err:
|
||||
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
return
|
||||
|
||||
if success:
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error(u'postprocessing: %s' % str(err))
|
||||
self.report_error('postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
self.record_download_archive(info_dict)
|
||||
@@ -910,9 +947,9 @@ class YoutubeDL(object):
|
||||
#It also downloads the videos
|
||||
self.extract_info(url)
|
||||
except UnavailableVideoError:
|
||||
self.report_error(u'unable to download video')
|
||||
self.report_error('unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
self.to_screen(u'[info] Maximum number of downloaded files reached.')
|
||||
self.to_screen('[info] Maximum number of downloaded files reached.')
|
||||
raise
|
||||
|
||||
return self._download_retcode
|
||||
@@ -925,7 +962,7 @@ class YoutubeDL(object):
|
||||
except DownloadError:
|
||||
webpage_url = info.get('webpage_url')
|
||||
if webpage_url is not None:
|
||||
self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
|
||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||
return self.download([webpage_url])
|
||||
else:
|
||||
raise
|
||||
@@ -936,7 +973,11 @@ class YoutubeDL(object):
|
||||
info = dict(ie_info)
|
||||
info['filepath'] = filename
|
||||
keep_video = None
|
||||
for pp in self._pps:
|
||||
pps_chain = []
|
||||
if ie_info.get('__postprocessors') is not None:
|
||||
pps_chain.extend(ie_info['__postprocessors'])
|
||||
pps_chain.extend(self._pps)
|
||||
for pp in pps_chain:
|
||||
try:
|
||||
keep_video_wish, new_info = pp.run(info)
|
||||
if keep_video_wish is not None:
|
||||
@@ -949,10 +990,10 @@ class YoutubeDL(object):
|
||||
self.report_error(e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
||||
self.report_warning('Unable to remove downloaded video file')
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
# Future-proof against any change in case
|
||||
@@ -963,7 +1004,7 @@ class YoutubeDL(object):
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return None # Incomplete video information
|
||||
return extractor.lower() + u' ' + info_dict['id']
|
||||
return extractor.lower() + ' ' + info_dict['id']
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
@@ -991,53 +1032,59 @@ class YoutubeDL(object):
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
assert vid_id
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + u'\n')
|
||||
archive_file.write(vid_id + '\n')
|
||||
|
||||
@staticmethod
|
||||
def format_resolution(format, default='unknown'):
|
||||
if format.get('vcodec') == 'none':
|
||||
return 'audio only'
|
||||
if format.get('_resolution') is not None:
|
||||
return format['_resolution']
|
||||
if format.get('resolution') is not None:
|
||||
return format['resolution']
|
||||
if format.get('height') is not None:
|
||||
if format.get('width') is not None:
|
||||
res = u'%sx%s' % (format['width'], format['height'])
|
||||
res = '%sx%s' % (format['width'], format['height'])
|
||||
else:
|
||||
res = u'%sp' % format['height']
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width') is not None:
|
||||
res = '?x%d' % format['width']
|
||||
else:
|
||||
res = default
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def format_note(fdict):
|
||||
res = u''
|
||||
res = ''
|
||||
if fdict.get('ext') in ['f4f', 'f4m']:
|
||||
res += '(unsupported) '
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + u' '
|
||||
res += fdict['format_note'] + ' '
|
||||
if fdict.get('tbr') is not None:
|
||||
res += '%4dk ' % fdict['tbr']
|
||||
if (fdict.get('vcodec') is not None and
|
||||
fdict.get('vcodec') != 'none'):
|
||||
res += u'%-5s' % fdict['vcodec']
|
||||
elif fdict.get('vbr') is not None:
|
||||
res += u'video'
|
||||
res += '%-5s@' % fdict['vcodec']
|
||||
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
|
||||
res += 'video@'
|
||||
if fdict.get('vbr') is not None:
|
||||
res += u'@%4dk' % fdict['vbr']
|
||||
res += '%4dk' % fdict['vbr']
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += u'%-5s' % fdict['acodec']
|
||||
res += ', '
|
||||
res += '%-5s' % fdict['acodec']
|
||||
elif fdict.get('abr') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += ', '
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += u'@%3dk' % fdict['abr']
|
||||
res += '@%3dk' % fdict['abr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += ', '
|
||||
res += format_bytes(fdict['filesize'])
|
||||
return res
|
||||
|
||||
def line(format, idlen=20):
|
||||
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
@@ -1045,7 +1092,7 @@ class YoutubeDL(object):
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len(u'format code'),
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
if len(formats) > 1:
|
||||
@@ -1053,10 +1100,10 @@ class YoutubeDL(object):
|
||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': u'format code', 'ext': u'extension',
|
||||
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||
self.to_screen('[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
@@ -1065,7 +1112,7 @@ class YoutubeDL(object):
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
@@ -1074,20 +1121,20 @@ class YoutubeDL(object):
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
write_string('[debug] Git HEAD: ' + out + '\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
write_string(u'[debug] Python version %s - %s' %
|
||||
(platform.python_version(), platform_name()) + u'\n')
|
||||
write_string('[debug] Python version %s - %s' %
|
||||
(platform.python_version(), platform_name()) + '\n')
|
||||
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
|
||||
|
||||
def _setup_opener(self):
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
@@ -1117,10 +1164,13 @@ class YoutubeDL(object):
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(
|
||||
self.params.get('nocheckcertificate', False))
|
||||
self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(debuglevel=debuglevel)
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
https_handler, proxy_handler, cookie_processor, ydlh)
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
|
@@ -44,6 +44,7 @@ __license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import getpass
|
||||
import locale
|
||||
import optparse
|
||||
import os
|
||||
import random
|
||||
@@ -185,7 +186,7 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
@@ -333,7 +334,9 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option('--youtube-print-sig-code',
|
||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
verbosity.add_option('--print-traffic',
|
||||
dest='debug_printtraffic', action='store_true', default=False,
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
@@ -473,6 +476,8 @@ def parseOpts(overrideArguments=None):
|
||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
||||
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
||||
write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
|
||||
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
|
||||
|
||||
return parser, opts, args
|
||||
|
||||
@@ -693,6 +698,7 @@ def _real_main(argv=None):
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
'debug_printtraffic': opts.debug_printtraffic,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
23
youtube_dl/downloader/__init__.py
Normal file
23
youtube_dl/downloader/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .common import FileDownloader
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
def get_suitable_downloader(info_dict):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
url = info_dict['url']
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return MplayerFD
|
||||
else:
|
||||
return HttpFD
|
||||
|
317
youtube_dl/downloader/common.py
Normal file
317
youtube_dl/downloader/common.py
Normal file
@@ -0,0 +1,317 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
timeconvert,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
return '%02d:%02d' % (mins, secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (hours, mins, secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
if data_len is None:
|
||||
return None
|
||||
return float(byte_counter) / float(data_len) * 100.0
|
||||
|
||||
@staticmethod
|
||||
def format_percent(percent):
|
||||
if percent is None:
|
||||
return '---.-%'
|
||||
return '%6s' % ('%3.1f%%' % percent)
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def format_eta(eta):
|
||||
if eta is None:
|
||||
return '--:--'
|
||||
return FileDownloader.format_seconds(eta)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
dif = now - start
|
||||
if bytes == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
return float(bytes) / dif
|
||||
|
||||
@staticmethod
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
||||
if elapsed_time < 0.001:
|
||||
return int(new_max)
|
||||
rate = bytes / elapsed_time
|
||||
if rate > new_max:
|
||||
return int(new_max)
|
||||
if rate < new_min:
|
||||
return int(new_min)
|
||||
return int(rate)
|
||||
|
||||
@staticmethod
|
||||
def parse_bytes(bytestr):
|
||||
"""Parse a string indicating a byte quantity into an integer."""
|
||||
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
||||
if matchobj is None:
|
||||
return None
|
||||
number = float(matchobj.group(1))
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == u'-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + u'.part'
|
||||
|
||||
def undo_temp_name(self, filename):
|
||||
if filename.endswith(u'.part'):
|
||||
return filename[:-len(u'.part')]
|
||||
return filename
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
self.report_error(u'unable to rename file: %s' % str(err))
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
return
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def download(self, filename, info_dict):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
# Check file already present
|
||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
return True
|
||||
|
||||
return self.real_download(filename, info_dict)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError(u'This method must be implemented by sublcasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
""" ph gets called on download progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
It can also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disks
|
||||
* total_bytes: Total bytes, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if unknown
|
||||
|
||||
Hooks are guaranteed to be called at least once (with status "finished")
|
||||
if the download is successful.
|
||||
"""
|
||||
self._progress_hooks.append(ph)
|
||||
|
44
youtube_dl/downloader/hls.py
Normal file
44
youtube_dl/downloader/hls.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class HlsFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
186
youtube_dl/downloader/http.py
Normal file
186
youtube_dl/downloader/http.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_error,
|
||||
ContentTooShortError,
|
||||
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
if self.params.get('test', False):
|
||||
request.add_header('Range','bytes=0-10240')
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', False):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range','bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
# changing the file size slightly and causing problems for some users. So
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
while True:
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size)
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# Open file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
40
youtube_dl/downloader/mplayer.py
Normal file
40
youtube_dl/downloader/mplayer.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class MplayerFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
178
youtube_dl/downloader/rtmp.py
Normal file
178
youtube_dl/downloader/rtmp.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class RtmpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
return False
|
@@ -28,6 +28,7 @@ from .channel9 import Channel9IE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cmt import CMTIE
|
||||
from .cnn import CNNIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
@@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .hypem import HypemIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .imdb import ImdbIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
@@ -91,12 +95,18 @@ from .ivi import (
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kankan import KankanIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
|
@@ -110,7 +110,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'width': format['width'],
|
||||
'height': int(format['height']),
|
||||
})
|
||||
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
|
@@ -10,14 +10,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
IE_NAME = u'Bandcamp'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
u'file': u'1812978515.mp3',
|
||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||
u'md5': u'c557841d5e50261777a6585648adf439',
|
||||
u'info_dict': {
|
||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||
u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
u"duration": 10,
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||
}]
|
||||
@@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor):
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
d = data[0]
|
||||
|
||||
duration = int(round(d['duration']))
|
||||
formats = []
|
||||
for format_id, format_url in d['file'].items():
|
||||
ext, _, abr_str = format_id.partition('-')
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0],
|
||||
'vcodec': 'none',
|
||||
'acodec': format_id.partition('-')[0],
|
||||
'abr': int(format_id.partition('-')[2]),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
for d in data:
|
||||
formats = [{
|
||||
'format_id': 'format_id',
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0]
|
||||
} for format_id, format_url in sorted(d['file'].items())]
|
||||
return {
|
||||
'id': compat_str(d['id']),
|
||||
'title': d['title'],
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
}
|
||||
else:
|
||||
raise ExtractorError(u'No free songs found')
|
||||
else:
|
||||
raise ExtractorError(u'No free songs found')
|
||||
|
||||
download_link = m_download.group(1)
|
||||
id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE|re.DOTALL).group('id')
|
||||
video_id = re.search(
|
||||
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE | re.DOTALL).group('id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, id,
|
||||
download_webpage = self._download_webpage(download_link, video_id,
|
||||
'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascrip code
|
||||
info = re.search(r'items: (.*?),$',
|
||||
@@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor):
|
||||
m_url = re.match(re_url, initial_url)
|
||||
#We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
#in the "download_url" key
|
||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
||||
|
||||
track_info = {'id':id,
|
||||
'title' : info[u'title'],
|
||||
'ext' : 'mp3',
|
||||
'url' : final_url,
|
||||
'thumbnail' : info[u'thumb_url'],
|
||||
'uploader' : info[u'artist']
|
||||
}
|
||||
|
||||
return [track_info]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info[u'title'],
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'url': final_url,
|
||||
'thumbnail': info[u'thumb_url'],
|
||||
'uploader': info[u'artist'],
|
||||
}
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
@@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(u'The page doesn\'t contain any track')
|
||||
raise ExtractorError(u'The page doesn\'t contain any tracks')
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
|
@@ -61,9 +61,10 @@ class BlinkxIE(InfoExtractor):
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
||||
format_id = (u'%s-%sk-%s' %
|
||||
(vcodec,
|
||||
(int(m['vbr']) + int(m['abr'])) // 1000,
|
||||
tbr,
|
||||
m['w']))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -72,10 +73,12 @@ class BlinkxIE(InfoExtractor):
|
||||
'acodec': acodec,
|
||||
'abr': int(m['abr']) // 1000,
|
||||
'vbr': int(m['vbr']) // 1000,
|
||||
'tbr': tbr,
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
})
|
||||
formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr']))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -22,42 +23,35 @@ class BlipTVIE(InfoExtractor):
|
||||
"""Information extractor for blip.tv"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
||||
_URL_EXT = r'^.*\.([a-z0-9]+)$'
|
||||
IE_NAME = u'blip.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
u'file': u'5779306.m4v',
|
||||
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20111205",
|
||||
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
|
||||
u"uploader": u"Comic Book Resources - CBR TV",
|
||||
u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'file': '5779306.mov',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'upload_date': '20111205',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'uploader': 'Comic Book Resources - CBR TV',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
}
|
||||
}
|
||||
|
||||
def report_direct_download(self, title):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Direct download detected' % title)
|
||||
self.to_screen('%s: Direct download detected' % title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
# See https://github.com/rg3/youtube-dl/issues/857
|
||||
api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
|
||||
if api_mobj is not None:
|
||||
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
if urlp.path.startswith('/play/'):
|
||||
response = self._request_webpage(url, None, False)
|
||||
redirecturl = response.geturl()
|
||||
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
||||
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
||||
url = 'http://blip.tv/a/a-' + file_id
|
||||
return self._real_extract(url)
|
||||
|
||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||
if embed_mobj:
|
||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||
|
||||
if '?' in url:
|
||||
cchar = '&'
|
||||
@@ -67,67 +61,55 @@ class BlipTVIE(InfoExtractor):
|
||||
request = compat_urllib_request.Request(json_url)
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
self.report_extraction(mobj.group(1))
|
||||
info = None
|
||||
urlh = self._request_webpage(request, None, False,
|
||||
u'unable to download video info webpage')
|
||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||
basename = url.split('/')[-1]
|
||||
title,ext = os.path.splitext(basename)
|
||||
title = title.decode('UTF-8')
|
||||
ext = ext.replace('.', '')
|
||||
self.report_direct_download(title)
|
||||
info = {
|
||||
'id': title,
|
||||
'url': url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'urlhandle': urlh
|
||||
'unable to download video info webpage')
|
||||
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
||||
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
formats = []
|
||||
if 'additionalMedia' in data:
|
||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
||||
if not int(f['media_width']): # filter m3u8
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': f['role'],
|
||||
'width': int(f['media_width']),
|
||||
'height': int(f['media_height']),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': data['media']['url'],
|
||||
'width': int(data['media']['width']),
|
||||
'height': int(data['media']['height']),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(data['item_id']),
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
'formats': formats,
|
||||
}
|
||||
if info is None: # Regular URL
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
|
||||
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
if 'additionalMedia' in data:
|
||||
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||
best_format = formats[-1]
|
||||
video_url = best_format['url']
|
||||
else:
|
||||
video_url = data['media']['url']
|
||||
umobj = re.match(self._URL_EXT, video_url)
|
||||
if umobj is None:
|
||||
raise ValueError('Can not determine filename extension')
|
||||
ext = umobj.group(1)
|
||||
|
||||
info = {
|
||||
'id': compat_str(data['item_id']),
|
||||
'url': video_url,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'ext': ext,
|
||||
'format': data['media']['mimeType'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'player_url': data['embedUrl'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
}
|
||||
except (ValueError,KeyError) as err:
|
||||
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
|
||||
|
||||
return [info]
|
||||
except (ValueError, KeyError) as err:
|
||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
@@ -135,19 +117,19 @@ class BlipTVUserIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||
_PAGE_SIZE = 12
|
||||
IE_NAME = u'blip.tv:user'
|
||||
IE_NAME = 'blip.tv:user'
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
username = mobj.group(1)
|
||||
|
||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||
|
||||
page = self._download_webpage(url, username, u'Downloading user page')
|
||||
page = self._download_webpage(url, username, 'Downloading user page')
|
||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||
page_base = page_base % mobj.group(1)
|
||||
|
||||
@@ -163,7 +145,7 @@ class BlipTVUserIE(InfoExtractor):
|
||||
while True:
|
||||
url = page_base + "&page=" + str(pagenum)
|
||||
page = self._download_webpage(url, username,
|
||||
u'Downloading video ids from page %d' % pagenum)
|
||||
'Downloading video ids from page %d' % pagenum)
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = []
|
||||
@@ -185,6 +167,6 @@ class BlipTVUserIE(InfoExtractor):
|
||||
|
||||
pagenum += 1
|
||||
|
||||
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
||||
|
@@ -76,14 +76,18 @@ class Channel9IE(InfoExtractor):
|
||||
</div>)? # File size part may be missing
|
||||
'''
|
||||
# Extract known formats
|
||||
formats = [{'url': x.group('url'),
|
||||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||
# Sort according to known formats list
|
||||
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
|
||||
formats = [{
|
||||
'url': x.group('url'),
|
||||
'format_id': x.group('quality'),
|
||||
'format_note': x.group('note'),
|
||||
'format': u'%s (%s)' % (x.group('quality'), x.group('note')),
|
||||
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
|
||||
'preference': self._known_formats.index(x.group('quality')),
|
||||
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
|
||||
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_title(self, html):
|
||||
|
19
youtube_dl/extractor/cmt.py
Normal file
19
youtube_dl/extractor/cmt.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from .mtv import MTVIE
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = u'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
|
||||
u'info_dict': {
|
||||
u'id': u'989124',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
u'description': u'Blame It All On My Roots',
|
||||
},
|
||||
},
|
||||
]
|
@@ -1,7 +1,10 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
@@ -15,6 +18,8 @@ class CNNIE(InfoExtractor):
|
||||
u'info_dict': {
|
||||
u'title': u'Nadal wins 8th French Open title',
|
||||
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
u'duration': 135,
|
||||
u'upload_date': u'20130609',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -35,22 +40,58 @@ class CNNIE(InfoExtractor):
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
rex = re.compile(r'''(?x)
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
(?:_(?P<bitrate>[0-9]+)k)?
|
||||
''')
|
||||
for f in info.findall('files/file'):
|
||||
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
|
||||
if mf is not None:
|
||||
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
|
||||
formats = sorted(formats)
|
||||
(_,_,_, video_path) = formats[-1]
|
||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
|
||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
|
||||
fdct = {
|
||||
'format_id': f.attrib['bitrate'],
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
mf = rex.match(f.attrib['bitrate'])
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mf = rex.search(f.text)
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
||||
if mi:
|
||||
if mi.group(1) == 'audio':
|
||||
fdct['vcodec'] = 'none'
|
||||
fdct['ext'] = 'm4a'
|
||||
else:
|
||||
fdct['tbr'] = int(mi.group(1))
|
||||
|
||||
formats.append(fdct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
|
||||
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
|
||||
|
||||
return {'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': thumbnails[-1][1],
|
||||
'thumbnails': thumbs_dict,
|
||||
'description': info.find('description').text,
|
||||
}
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
||||
|
||||
duration_el = info.find('length')
|
||||
duration = parse_duration(duration_el.text)
|
||||
|
||||
return {
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnails[-1][1],
|
||||
'thumbnails': thumbs_dict,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
@@ -1,82 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
determine_ext,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
u'info_dict': {
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
'file': '6902724.mp4',
|
||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||
'info_dict': {
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': 'Fans get creative this year at San Diego. Too',
|
||||
'age_limit': 13,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
u'file': u'3505939.mp4',
|
||||
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||
u'info_dict': {
|
||||
u'title': u'Font Conference',
|
||||
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
'file': '3505939.mp4',
|
||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||
'info_dict': {
|
||||
'title': 'Font Conference',
|
||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
'age_limit': 10,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
|
||||
data = json.loads(self._download_webpage(
|
||||
jsonUrl, video_id, 'Downloading info JSON'))
|
||||
vdata = data['video']
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
mdoc = self._download_xml(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
youtubeIdNode = videoNode.find('./youtubeID')
|
||||
if youtubeIdNode is not None:
|
||||
return self.url_result(youtubeIdNode.text, 'Youtube')
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
next_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
adoc = self._download_xml(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
try:
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
||||
rating = vdata.get('rating')
|
||||
if rating:
|
||||
age_limit = AGE_LIMITS.get(rating.lower())
|
||||
else:
|
||||
# Old-style direct links
|
||||
info['url'] = next_url
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
age_limit = None # None = No idea
|
||||
|
||||
return info
|
||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
||||
formats = []
|
||||
for format_key in ('mp4', 'webm'):
|
||||
for qname, qurl in vdata[format_key].items():
|
||||
formats.append({
|
||||
'format_id': format_key + '_' + qname,
|
||||
'url': qurl,
|
||||
'format': format_key,
|
||||
'preference': PREFS.get(qname),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vdata['title'],
|
||||
'description': vdata.get('description'),
|
||||
'thumbnail': vdata.get('thumbnail'),
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -12,7 +12,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||
_VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
|
||||
(video-clips|episodes|cc-studios|video-collections)
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -9,6 +9,7 @@ import xml.etree.ElementTree
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
@@ -37,10 +38,12 @@ class InfoExtractor(object):
|
||||
id: Video identifier.
|
||||
title: Video title, unescaped.
|
||||
|
||||
Additionally, it must contain either a formats entry or url and ext:
|
||||
Additionally, it must contain either a formats entry or a url one:
|
||||
|
||||
formats: A list of dictionaries for each format available, it must
|
||||
be ordered from worst to best quality. Potential fields:
|
||||
formats: A list of dictionaries for each format available, ordered
|
||||
from worst to best quality.
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* ext Will be calculated from url if missing
|
||||
* format A human-readable description of the format
|
||||
@@ -48,23 +51,32 @@ class InfoExtractor(object):
|
||||
Calculated from the format_id, width, height.
|
||||
and format_note fields if missing.
|
||||
* format_id A short description of the format
|
||||
("mp4_h264_opus" or "19")
|
||||
("mp4_h264_opus" or "19").
|
||||
Technically optional, but strongly recommended.
|
||||
* format_note Additional info about the format
|
||||
("3D" or "DASH video")
|
||||
* width Width of the video, if known
|
||||
* height Height of the video, if known
|
||||
* resolution Textual description of width and height
|
||||
* tbr Average bitrate of audio and video in KBit/s
|
||||
* abr Average audio bitrate in KBit/s
|
||||
* acodec Name of the audio codec in use
|
||||
* vbr Average video bitrate in KBit/s
|
||||
* vcodec Name of the video codec in use
|
||||
* filesize The number of bytes, if known in advance
|
||||
* player_url SWF Player URL (used for rtmpdump).
|
||||
* protocol The protocol that will be used for the actual
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp" or so.
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field.
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
player_url: SWF Player URL (used for rtmpdump).
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
@@ -244,6 +256,11 @@ class InfoExtractor(object):
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def report_warning(self, msg, video_id=None):
|
||||
idstr = u'' if video_id is None else u'%s: ' % video_id
|
||||
self._downloader.report_warning(
|
||||
u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
|
||||
|
||||
def to_screen(self, msg):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||
@@ -361,7 +378,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
||||
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
template % (property_re, content_re),
|
||||
@@ -426,6 +443,56 @@ class InfoExtractor(object):
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
def _sort_formats(self, formats):
|
||||
def _formats_key(f):
|
||||
# TODO remove the following workaround
|
||||
from ..utils import determine_ext
|
||||
if not f.get('ext') and 'url' in f:
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
preference = f.get('preference')
|
||||
if preference is None:
|
||||
proto = f.get('protocol')
|
||||
if proto is None:
|
||||
proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
|
||||
|
||||
preference = 0 if proto in ['http', 'https'] else -0.1
|
||||
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
||||
preference -= 0.5
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
|
||||
else:
|
||||
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
|
||||
ext_preference = 0
|
||||
try:
|
||||
audio_ext_preference = ORDER.index(f['ext'])
|
||||
except ValueError:
|
||||
audio_ext_preference = -1
|
||||
else:
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = [u'flv', u'mp4', u'webm']
|
||||
else:
|
||||
ORDER = [u'webm', u'flv', u'mp4']
|
||||
try:
|
||||
ext_preference = ORDER.index(f['ext'])
|
||||
except ValueError:
|
||||
ext_preference = -1
|
||||
audio_ext_preference = 0
|
||||
|
||||
return (
|
||||
preference,
|
||||
f.get('height') if f.get('height') is not None else -1,
|
||||
f.get('width') if f.get('width') is not None else -1,
|
||||
ext_preference,
|
||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||
f.get('abr') if f.get('abr') is not None else -1,
|
||||
audio_ext_preference,
|
||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||
f.get('format_id'),
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
|
@@ -1,20 +1,25 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
|
||||
IE_DESC = 'C-SPAN'
|
||||
_TEST = {
|
||||
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
||||
u'file': u'315139.flv',
|
||||
u'md5': u'74a623266956f69e4df0068ab6c80fe4',
|
||||
u'info_dict': {
|
||||
u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
|
||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
||||
'file': '315139.mp4',
|
||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||
'info_dict': {
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -22,30 +27,22 @@ class CSpanIE(InfoExtractor):
|
||||
prog_name = mobj.group(1)
|
||||
webpage = self._download_webpage(url, prog_name)
|
||||
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
|
||||
data = compat_urllib_parse.urlencode({'programid': video_id,
|
||||
'dynamic':'1'})
|
||||
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
|
||||
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
|
||||
video_info, 'title')
|
||||
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
||||
webpage, 'description',
|
||||
flags=re.MULTILINE|re.DOTALL)
|
||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||
data_json = self._download_webpage(
|
||||
info_url, video_id, 'Downloading video info')
|
||||
data = json.loads(data_json)
|
||||
|
||||
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
||||
video_info, 'video url')
|
||||
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
|
||||
path = self._search_regex(r'<string name="path">(.*?)</string>',
|
||||
video_info, 'rtmp play path')
|
||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'ext': 'flv',
|
||||
'url': url,
|
||||
'play_path': path,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -4,18 +4,17 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
|
||||
u'file': u'36983.mp4',
|
||||
u'md5': u'9dcfe344732808dbfcc901537973c922',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
@@ -52,18 +51,12 @@ class DreiSatIE(InfoExtractor):
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'ext': determine_ext(fe.find('./url').text),
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
'3sat_qualityname': fe.find('./quality').text,
|
||||
} for fe in format_els
|
||||
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||
|
||||
def _sortkey(format):
|
||||
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
|
||||
prefer_http = 1 if 'rtmp' in format['url'] else 0
|
||||
return (qidx, prefer_http, format['video_bitrate'])
|
||||
formats.sort(key=_sortkey)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
|
@@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result('http://' + url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
|
||||
self.to_screen(u'%s: Requesting header' % video_id)
|
||||
|
||||
try:
|
||||
response = self._send_head(url)
|
||||
|
||||
@@ -271,16 +273,12 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Look for embedded blip.tv player
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV')
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage)
|
||||
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
|
||||
if mobj:
|
||||
player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1)
|
||||
player_page = self._download_webpage(player_url, mobj.group(1))
|
||||
blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False)
|
||||
if blip_video_id:
|
||||
return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV')
|
||||
return self.url_result(mobj.group(1), 'BlipTV')
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
|
@@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
|
||||
'description': descr,
|
||||
'thumbnail': format_info['slate'],
|
||||
}
|
||||
|
||||
class ImdbListIE(InfoExtractor):
|
||||
IE_NAME = u'imdb:list'
|
||||
IE_DESC = u'Internet Movie Database lists'
|
||||
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
list_id = mobj.group('id')
|
||||
|
||||
# RSS XML is sometimes malformed
|
||||
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
|
||||
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
|
||||
|
||||
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
|
||||
# However, passing dummy author_id seems to be enough.
|
||||
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
|
||||
list_id, u'Downloading list CSV')
|
||||
|
||||
entries = []
|
||||
for item in csv.split('\n')[1:]:
|
||||
cols = item.split(',')
|
||||
if len(cols) < 2:
|
||||
continue
|
||||
item_id = cols[1][1:-1]
|
||||
if item_id.startswith('vi'):
|
||||
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
|
||||
|
||||
return self.playlist_result(entries, list_id, list_title)
|
@@ -5,7 +5,6 @@ from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
xpath_with_ns,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
for content in item.findall(_bp('media:group/media:content')):
|
||||
attr = content.attrib
|
||||
f_url = attr['url']
|
||||
width = int(attr['width'])
|
||||
bitrate = int(attr['bitrate'])
|
||||
format_id = '%d-%dk' % (width, bitrate)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'ext': determine_ext(f_url),
|
||||
'width': int(attr['width']),
|
||||
'bitrate': int(attr['bitrate']),
|
||||
'width': width,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -84,14 +84,16 @@ class IviIE(InfoExtractor):
|
||||
|
||||
result = video_json[u'result']
|
||||
|
||||
formats = [{'url': x[u'url'],
|
||||
'format_id': x[u'content_format']
|
||||
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
|
||||
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
|
||||
formats = [{
|
||||
'url': x[u'url'],
|
||||
'format_id': x[u'content_format'],
|
||||
'preference': self._known_formats.index(x[u'content_format']),
|
||||
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
|
||||
|
||||
if len(formats) == 0:
|
||||
self._downloader.report_warning(u'No media links available for %s' % video_id)
|
||||
return
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError(u'No media links available for %s' % video_id)
|
||||
|
||||
duration = result[u'duration']
|
||||
compilation = result[u'compilation']
|
||||
|
73
youtube_dl/extractor/jpopsukitv.py
Normal file
73
youtube_dl/extractor/jpopsukitv.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# coding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class JpopsukiIE(InfoExtractor):
|
||||
IE_NAME = 'jpopsuki.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
||||
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
||||
'file': '00be659d23b0b40508169cdee4545771.mp4',
|
||||
'info_dict': {
|
||||
'id': '00be659d23b0b40508169cdee4545771',
|
||||
'title': 'ayumi hamasaki - evolution',
|
||||
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
||||
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
||||
'uploader': 'plama_chan',
|
||||
'uploader_id': '404',
|
||||
'upload_date': '20121101'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage, 'video url')
|
||||
|
||||
video_title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<li>from: <a href="/user/view/user/(.*?)/uid/',
|
||||
webpage, 'video uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
||||
webpage, 'video uploader_id', fatal=False)
|
||||
upload_date = self._html_search_regex(
|
||||
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
||||
fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
view_count_str = self._html_search_regex(
|
||||
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
||||
fatal=False)
|
||||
comment_count_str = self._html_search_regex(
|
||||
r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
|
||||
fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'view_count': int_or_none(view_count_str),
|
||||
'comment_count': int_or_none(comment_count_str),
|
||||
}
|
102
youtube_dl/extractor/lynda.py
Normal file
102
youtube_dl/extractor/lynda.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class LyndaIE(InfoExtractor):
|
||||
IE_NAME = 'lynda'
|
||||
IE_DESC = 'lynda.com videos'
|
||||
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'file': '114408.mp4',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
u"info_dict": {
|
||||
'title': 'Using the exercise files',
|
||||
'duration': 68
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json and video_json['Status'] == 'NotFound':
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError('Video %s is only available for members' % video_id, expected=True)
|
||||
|
||||
video_id = video_json['ID']
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
|
||||
formats = [{'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': fmt['Resolution']
|
||||
} for fmt in video_json['Formats']]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class LyndaCourseIE(InfoExtractor):
|
||||
IE_NAME = 'lynda:course'
|
||||
IE_DESC = 'lynda.com online courses'
|
||||
|
||||
# Course link equals to welcome/introduction video link of same course
|
||||
# We will recognize it as course link
|
||||
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
course_json = json.loads(page)
|
||||
|
||||
if 'Status' in course_json and course_json['Status'] == 'NotFound':
|
||||
raise ExtractorError('Course %s does not exist' % course_id, expected=True)
|
||||
|
||||
unaccessible_videos = 0
|
||||
videos = []
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if video['HasAccess'] is not True:
|
||||
unaccessible_videos += 1
|
||||
continue
|
||||
videos.append(video['ID'])
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.lynda.com/%s/%s-4.html' %
|
||||
(course_path, video_id),
|
||||
'Lynda')
|
||||
for video_id in videos]
|
||||
|
||||
course_title = course_json['Title']
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
43
youtube_dl/extractor/macgamestore.py
Normal file
43
youtube_dl/extractor/macgamestore.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MacGameStoreIE(InfoExtractor):
|
||||
IE_NAME = 'macgamestore'
|
||||
IE_DESC = 'MacGameStore trailers'
|
||||
_VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
|
||||
'file': '2450.m4v',
|
||||
'md5': '8649b8ea684b6666b4c5be736ecddc61',
|
||||
'info_dict': {
|
||||
'title': 'Crow',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
|
||||
|
||||
if re.search(r'>Missing Media<', webpage) is not None:
|
||||
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
|
||||
webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title
|
||||
}
|
@@ -52,10 +52,11 @@ class MDRIE(InfoExtractor):
|
||||
'format_id': u'%s-%d' % (media_type, vbr),
|
||||
})
|
||||
formats.append(format)
|
||||
formats.sort(key=lambda f: (f.get('vbr'), f['abr']))
|
||||
if not formats:
|
||||
raise ExtractorError(u'Could not find any valid formats')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
@@ -33,8 +33,18 @@ class TechTVMITIE(InfoExtractor):
|
||||
raw_page, u'base url')
|
||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
|
||||
u'video formats')
|
||||
formats = json.loads(formats_json)
|
||||
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||
formats_mit = json.loads(formats_json)
|
||||
formats = [
|
||||
{
|
||||
'format_id': f['label'],
|
||||
'url': base_url + f['url'].partition(':')[2],
|
||||
'ext': f['url'].partition(':')[0],
|
||||
'format': f['label'],
|
||||
'width': f['width'],
|
||||
'vbr': f['bitrate'],
|
||||
}
|
||||
for f in formats_mit
|
||||
]
|
||||
|
||||
title = get_element_by_id('edit-title', clean_page)
|
||||
description = clean_html(get_element_by_id('edit-description', clean_page))
|
||||
@@ -43,8 +53,7 @@ class TechTVMITIE(InfoExtractor):
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
|
||||
'ext': 'mp4',
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):
|
||||
info = json.loads(json_data)
|
||||
|
||||
preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||
final_song_url = self._get_url(template_url)
|
||||
if final_song_url is None:
|
||||
|
@@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
uri = mobj.group('mgid')
|
||||
uri = mobj.groupdict().get('mgid')
|
||||
if uri is None:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
@@ -143,8 +143,10 @@ class MyVideoIE(InfoExtractor):
|
||||
if mobj:
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self._downloader.report_warning(u'forcing RTMPT ...')
|
||||
video_url = video_url.replace('rtmpe://', 'rtmpt://')
|
||||
self.report_warning(
|
||||
u'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
video_id)
|
||||
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
||||
|
||||
if not video_url:
|
||||
# extract non rtmp videos
|
||||
|
@@ -5,7 +5,7 @@ from ..utils import compat_urllib_parse
|
||||
|
||||
|
||||
class PornHdIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||
u'file': u'1962.flv',
|
||||
|
@@ -138,7 +138,7 @@ class SmotriIE(InfoExtractor):
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', video_page,
|
||||
u'warning messagef', default=None)
|
||||
u'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
u'Video %s may not be available; smotri said: %s ' %
|
||||
|
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
(?!sets/)(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
|
||||
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
@@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
elif mobj.group('widget'):
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||
else:
|
||||
|
@@ -51,9 +51,10 @@ class SpiegelIE(InfoExtractor):
|
||||
# Blacklist type 6, it's extremely LQ and not available on the same server
|
||||
if n.tag.startswith('type') and n.tag != 'type6'
|
||||
]
|
||||
formats.sort(key=lambda f: f['vbr'])
|
||||
duration = float(idoc[0].findall('./duration')[0].text)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
|
@@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor):
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': int(attr['width']),
|
||||
'height': int(attr['height']),
|
||||
'vbr': int(attr['system-bitrate']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -44,8 +44,10 @@ class WistiaIE(InfoExtractor):
|
||||
'height': a['height'],
|
||||
'filesize': a['size'],
|
||||
'ext': a['ext'],
|
||||
'preference': 1 if atype == 'original' else None,
|
||||
})
|
||||
formats.sort(key=lambda a: a['filesize'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -6,8 +6,8 @@ from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
determine_ext,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -68,9 +68,9 @@ class YahooIE(InfoExtractor):
|
||||
formats = []
|
||||
for s in info['streams']:
|
||||
format_info = {
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'bitrate': s.get('bitrate'),
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'tbr': int_or_none(s.get('bitrate')),
|
||||
}
|
||||
|
||||
host = s['host']
|
||||
@@ -84,10 +84,10 @@ class YahooIE(InfoExtractor):
|
||||
else:
|
||||
format_url = compat_urlparse.urljoin(host, path)
|
||||
format_info['url'] = format_url
|
||||
format_info['ext'] = determine_ext(format_url)
|
||||
|
||||
formats.append(format_info)
|
||||
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
@@ -16,6 +15,7 @@ from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
|
||||
class YouPornIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_TEST = {
|
||||
@@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor):
|
||||
u'file': u'505835.mp4',
|
||||
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20101221",
|
||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
||||
u"uploader": u"Ask Dan And Jennifer",
|
||||
u"upload_date": u"20101221",
|
||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
||||
u"uploader": u"Ask Dan And Jennifer",
|
||||
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
@@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor):
|
||||
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
|
||||
links.append(link)
|
||||
|
||||
if not links:
|
||||
raise ExtractorError(u'ERROR: no known formats available for video')
|
||||
|
||||
formats = []
|
||||
for link in links:
|
||||
|
||||
# A link looks like this:
|
||||
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
|
||||
# A path looks like this:
|
||||
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
|
||||
video_url = unescapeHTML(link)
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
format_parts = path.split('/')[4].split('_')[:2]
|
||||
|
||||
# size = format[0]
|
||||
# bitrate = format[1]
|
||||
format = "-".join(format)
|
||||
# title = u'%s-%s-%s' % (video_title, size, bitrate)
|
||||
dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0]
|
||||
|
||||
resolution = format_parts[0]
|
||||
height = int(resolution[:-len('p')])
|
||||
bitrate = int(format_parts[1][:-len('k')])
|
||||
format = u'-'.join(format_parts) + u'-' + dn
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'resolution': resolution,
|
||||
})
|
||||
|
||||
# Sort and remove doubles
|
||||
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
||||
for i in range(len(formats)-1,0,-1):
|
||||
if formats[i]['format_id'] == formats[i-1]['format_id']:
|
||||
del formats[i]
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError(u'ERROR: no known formats available for video')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -150,168 +150,69 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$"""
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
# Listed in order of quality
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
# Dash video
|
||||
'138', '248', '137', '247', '136', '246', '245',
|
||||
'244', '135', '243', '134', '242', '133', '160',
|
||||
# Dash audio
|
||||
'172', '141', '171', '140', '139',
|
||||
]
|
||||
_video_formats_map = {
|
||||
'flv': ['35', '34', '6', '5'],
|
||||
'3gp': ['36', '17', '13'],
|
||||
'mp4': ['38', '37', '22', '18'],
|
||||
'webm': ['46', '45', '44', '43'],
|
||||
}
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': '3gp',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
'36': '3gp',
|
||||
'37': 'mp4',
|
||||
'38': 'mp4',
|
||||
'43': 'webm',
|
||||
'44': 'webm',
|
||||
'45': 'webm',
|
||||
'46': 'webm',
|
||||
_formats = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270},
|
||||
'13': {'ext': '3gp'},
|
||||
'17': {'ext': '3gp', 'width': 176, 'height': 144},
|
||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360},
|
||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
|
||||
'34': {'ext': 'flv', 'width': 640, 'height': 360},
|
||||
'35': {'ext': 'flv', 'width': 854, 'height': 480},
|
||||
'36': {'ext': '3gp', 'width': 320, 'height': 240},
|
||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
|
||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
|
||||
'43': {'ext': 'webm', 'width': 640, 'height': 360},
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
|
||||
|
||||
|
||||
# 3d videos
|
||||
'82': 'mp4',
|
||||
'83': 'mp4',
|
||||
'84': 'mp4',
|
||||
'85': 'mp4',
|
||||
'100': 'webm',
|
||||
'101': 'webm',
|
||||
'102': 'webm',
|
||||
'82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
||||
|
||||
# Apple HTTP Live Streaming
|
||||
'92': 'mp4',
|
||||
'93': 'mp4',
|
||||
'94': 'mp4',
|
||||
'95': 'mp4',
|
||||
'96': 'mp4',
|
||||
'132': 'mp4',
|
||||
'151': 'mp4',
|
||||
'92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
|
||||
|
||||
# Dash mp4
|
||||
'133': 'mp4',
|
||||
'134': 'mp4',
|
||||
'135': 'mp4',
|
||||
'136': 'mp4',
|
||||
'137': 'mp4',
|
||||
'138': 'mp4',
|
||||
'160': 'mp4',
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': 'm4a',
|
||||
'140': 'm4a',
|
||||
'141': 'm4a',
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
|
||||
# Dash webm
|
||||
'171': 'webm',
|
||||
'172': 'webm',
|
||||
'242': 'webm',
|
||||
'243': 'webm',
|
||||
'244': 'webm',
|
||||
'245': 'webm',
|
||||
'246': 'webm',
|
||||
'247': 'webm',
|
||||
'248': 'webm',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'5': '400x240',
|
||||
'6': '???',
|
||||
'13': '???',
|
||||
'17': '176x144',
|
||||
'18': '640x360',
|
||||
'22': '1280x720',
|
||||
'34': '640x360',
|
||||
'35': '854x480',
|
||||
'36': '320x240',
|
||||
'37': '1920x1080',
|
||||
'38': '4096x3072',
|
||||
'43': '640x360',
|
||||
'44': '854x480',
|
||||
'45': '1280x720',
|
||||
'46': '1920x1080',
|
||||
'82': '360p',
|
||||
'83': '480p',
|
||||
'84': '720p',
|
||||
'85': '1080p',
|
||||
'92': '240p',
|
||||
'93': '360p',
|
||||
'94': '480p',
|
||||
'95': '720p',
|
||||
'96': '1080p',
|
||||
'100': '360p',
|
||||
'101': '480p',
|
||||
'102': '720p',
|
||||
'132': '240p',
|
||||
'151': '72p',
|
||||
'133': '240p',
|
||||
'134': '360p',
|
||||
'135': '480p',
|
||||
'136': '720p',
|
||||
'137': '1080p',
|
||||
'138': '>1080p',
|
||||
'139': '48k',
|
||||
'140': '128k',
|
||||
'141': '256k',
|
||||
'160': '192p',
|
||||
'171': '128k',
|
||||
'172': '256k',
|
||||
'242': '240p',
|
||||
'243': '360p',
|
||||
'244': '480p',
|
||||
'245': '480p',
|
||||
'246': '480p',
|
||||
'247': '720p',
|
||||
'248': '1080p',
|
||||
}
|
||||
_special_itags = {
|
||||
'82': '3D',
|
||||
'83': '3D',
|
||||
'84': '3D',
|
||||
'85': '3D',
|
||||
'100': '3D',
|
||||
'101': '3D',
|
||||
'102': '3D',
|
||||
'133': 'DASH Video',
|
||||
'134': 'DASH Video',
|
||||
'135': 'DASH Video',
|
||||
'136': 'DASH Video',
|
||||
'137': 'DASH Video',
|
||||
'138': 'DASH Video',
|
||||
'139': 'DASH Audio',
|
||||
'140': 'DASH Audio',
|
||||
'141': 'DASH Audio',
|
||||
'160': 'DASH Video',
|
||||
'171': 'DASH Audio',
|
||||
'172': 'DASH Audio',
|
||||
'242': 'DASH Video',
|
||||
'243': 'DASH Video',
|
||||
'244': 'DASH Video',
|
||||
'245': 'DASH Video',
|
||||
'246': 'DASH Video',
|
||||
'247': 'DASH Video',
|
||||
'248': 'DASH Video',
|
||||
'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
|
||||
}
|
||||
|
||||
IE_NAME = u'youtube'
|
||||
@@ -1153,13 +1054,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
|
||||
self._video_dimensions.get(x, '???'),
|
||||
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
@@ -1172,48 +1066,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
Transform a dictionary in the format {itag:url} to a list of (itag, url)
|
||||
with the requested formats.
|
||||
"""
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
existing_formats = [x for x in self._formats if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
|
||||
# available in the specified format. For example,
|
||||
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
|
||||
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if rf in self._video_formats_map:
|
||||
for srf in self._video_formats_map[rf]:
|
||||
if srf in url_map:
|
||||
video_url_list = [(srf, url_map[srf])]
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
video_url_list.reverse() # order worst to best
|
||||
return video_url_list
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
@@ -1462,50 +1319,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
results = []
|
||||
formats = []
|
||||
for itag, video_real_url in video_url_list:
|
||||
# Extension
|
||||
video_extension = self._video_extensions.get(itag, 'flv')
|
||||
|
||||
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
|
||||
self._video_dimensions.get(itag, '???'),
|
||||
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
|
||||
|
||||
results.append({
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
dct = {
|
||||
'format_id': itag,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'player_url': player_url,
|
||||
'subtitles': video_subtitles,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
})
|
||||
return results
|
||||
'url': video_real_url,
|
||||
'player_url': player_url,
|
||||
}
|
||||
dct.update(self._formats[itag])
|
||||
formats.append(dct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'subtitles': video_subtitles,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com playlists'
|
||||
|
@@ -1,10 +1,10 @@
|
||||
# coding: utf-8
|
||||
|
||||
import operator
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -67,29 +67,13 @@ class ZDFIE(InfoExtractor):
|
||||
''', format_id)
|
||||
|
||||
ext = format_m.group('container')
|
||||
is_supported = ext != 'f4f'
|
||||
|
||||
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
|
||||
try:
|
||||
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
|
||||
except ValueError:
|
||||
proto_pref = -999
|
||||
proto = format_m.group('proto').lower()
|
||||
|
||||
quality = fnode.find('./quality').text
|
||||
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
|
||||
try:
|
||||
quality_pref = -QUALITY_ORDER.index(quality)
|
||||
except ValueError:
|
||||
quality_pref = -999
|
||||
|
||||
abr = int(fnode.find('./audioBitrate').text) // 1000
|
||||
vbr = int(fnode.find('./videoBitrate').text) // 1000
|
||||
pref = (is_available, is_supported,
|
||||
proto_pref, quality_pref, vbr, abr)
|
||||
|
||||
format_note = u''
|
||||
if not is_supported:
|
||||
format_note += u'(unsupported)'
|
||||
if not format_note:
|
||||
format_note = None
|
||||
|
||||
@@ -101,18 +85,20 @@ class ZDFIE(InfoExtractor):
|
||||
'vcodec': format_m.group('vcodec'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'width': int(fnode.find('./width').text),
|
||||
'height': int(fnode.find('./height').text),
|
||||
'filesize': int(fnode.find('./filesize').text),
|
||||
'width': int_or_none(fnode.find('./width').text),
|
||||
'height': int_or_none(fnode.find('./height').text),
|
||||
'filesize': int_or_none(fnode.find('./filesize').text),
|
||||
'format_note': format_note,
|
||||
'_pref': pref,
|
||||
'protocol': proto,
|
||||
'_available': is_available,
|
||||
}
|
||||
|
||||
format_nodes = doc.findall('.//formitaeten/formitaet')
|
||||
formats = sorted(filter(lambda f: f['_available'],
|
||||
map(xml_to_format, format_nodes)),
|
||||
key=operator.itemgetter('_pref'))
|
||||
formats = list(filter(
|
||||
lambda f: f['_available'],
|
||||
map(xml_to_format, format_nodes)))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -500,12 +500,13 @@ def unescapeHTML(s):
|
||||
result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
|
||||
return result
|
||||
|
||||
def encodeFilename(s):
|
||||
|
||||
def encodeFilename(s, for_subprocess=False):
|
||||
"""
|
||||
@param s The name of the file
|
||||
"""
|
||||
|
||||
assert type(s) == type(u'')
|
||||
assert type(s) == compat_str
|
||||
|
||||
# Python 3 has a Unicode API
|
||||
if sys.version_info >= (3, 0):
|
||||
@@ -515,12 +516,18 @@ def encodeFilename(s):
|
||||
# Pass u'' directly to use Unicode APIs on Windows 2000 and up
|
||||
# (Detecting Windows NT 4 is tricky because 'major >= 4' would
|
||||
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
|
||||
return s
|
||||
if not for_subprocess:
|
||||
return s
|
||||
else:
|
||||
# For subprocess calls, encode with locale encoding
|
||||
# Refer to http://stackoverflow.com/a/9951851/35070
|
||||
encoding = preferredencoding()
|
||||
else:
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return s.encode(encoding, 'ignore')
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
return s.encode(encoding, 'ignore')
|
||||
|
||||
|
||||
def decodeOption(optval):
|
||||
if optval is None:
|
||||
@@ -539,7 +546,8 @@ def formatSeconds(secs):
|
||||
else:
|
||||
return '%d' % secs
|
||||
|
||||
def make_HTTPS_handler(opts_no_check_certificate):
|
||||
|
||||
def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
|
||||
if sys.version_info < (3, 2):
|
||||
import httplib
|
||||
|
||||
@@ -560,7 +568,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
|
||||
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
|
||||
def https_open(self, req):
|
||||
return self.do_open(HTTPSConnectionV3, req)
|
||||
return HTTPSHandlerV3()
|
||||
return HTTPSHandlerV3(**kwargs)
|
||||
else:
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
|
||||
context.verify_mode = (ssl.CERT_NONE
|
||||
@@ -571,7 +579,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
|
||||
context.load_default_certs()
|
||||
except AttributeError:
|
||||
pass # Python < 3.4
|
||||
return compat_urllib_request.HTTPSHandler(context=context)
|
||||
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Error during info extraction."""
|
||||
@@ -849,12 +857,22 @@ def platform_name():
|
||||
def write_string(s, out=None):
|
||||
if out is None:
|
||||
out = sys.stderr
|
||||
assert type(s) == type(u'')
|
||||
assert type(s) == compat_str
|
||||
|
||||
if ('b' in getattr(out, 'mode', '') or
|
||||
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
|
||||
s = s.encode(preferredencoding(), 'ignore')
|
||||
out.write(s)
|
||||
try:
|
||||
out.write(s)
|
||||
except UnicodeEncodeError:
|
||||
# In Windows shells, this can fail even when the codec is just charmap!?
|
||||
# See https://wiki.python.org/moin/PrintFails#Issue
|
||||
if sys.platform == 'win32' and hasattr(out, 'encoding'):
|
||||
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
|
||||
out.write(s)
|
||||
else:
|
||||
raise
|
||||
|
||||
out.flush()
|
||||
|
||||
|
||||
@@ -1070,7 +1088,7 @@ def fix_xml_all_ampersand(xml_str):
|
||||
|
||||
|
||||
def setproctitle(title):
|
||||
assert isinstance(title, type(u''))
|
||||
assert isinstance(title, compat_str)
|
||||
try:
|
||||
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
||||
except OSError:
|
||||
@@ -1098,3 +1116,28 @@ def url_basename(url):
|
||||
class HEADRequest(compat_urllib_request.Request):
|
||||
def get_method(self):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
def int_or_none(v):
|
||||
return v if v is None else int(v)
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
if s is None:
|
||||
return None
|
||||
|
||||
m = re.match(
|
||||
r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
|
||||
if not m:
|
||||
return None
|
||||
res = int(m.group('secs'))
|
||||
if m.group('mins'):
|
||||
res += int(m.group('mins')) * 60
|
||||
if m.group('hours'):
|
||||
res += int(m.group('hours')) * 60 * 60
|
||||
return res
|
||||
|
||||
|
||||
def prepend_extension(filename, ext):
|
||||
name, real_ext = os.path.splitext(filename)
|
||||
return u'{0}.{1}{2}'.format(name, ext, real_ext)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.12.23.1'
|
||||
__version__ = '2014.01.05'
|
||||
|
Reference in New Issue
Block a user