Compare commits
71 Commits
2013.11.24
...
2013.11.29
Author | SHA1 | Date | |
---|---|---|---|
|
17769d5a6c | ||
|
677c18092d | ||
|
3862402ff3 | ||
|
b03d0d064c | ||
|
d8d6148628 | ||
|
2be54167d0 | ||
|
4e0084d92e | ||
|
fc9e1cc697 | ||
|
f8f60d2793 | ||
|
ea07dbb8b1 | ||
|
2a275ab007 | ||
|
a2e6db365c | ||
|
9d93e7da6c | ||
|
0e44d8381a | ||
|
35907e23ec | ||
|
76d1700b28 | ||
|
dcca796ce4 | ||
|
4b19e38954 | ||
|
5f09bbff4d | ||
|
c1f9c59d11 | ||
|
652cdaa269 | ||
|
e26f871228 | ||
|
6e47b51eef | ||
|
4a98cdbf3b | ||
|
c5ed4e8f7e | ||
|
c2e52508cc | ||
|
d8ec4959c8 | ||
|
d31209a144 | ||
|
529a2e2cc3 | ||
|
781a7d0546 | ||
|
fb04e40396 | ||
|
d9b011f201 | ||
|
b0b9eaa196 | ||
|
8b134b1062 | ||
|
0c75c3fa7a | ||
|
a3927cf7ee | ||
|
1a62c18f65 | ||
|
2a15e7063b | ||
|
d46cc192d7 | ||
|
bb2bebdbe1 | ||
|
5db07df634 | ||
|
ea36cbac5e | ||
|
d0d2b49ab7 | ||
|
31cb6d8fef | ||
|
daa0dd2973 | ||
|
de79c46c8f | ||
|
94ccb6fa2e | ||
|
07e4035879 | ||
|
d0efb9ec9a | ||
|
ac05067d3d | ||
|
113577e155 | ||
|
79d09f47c2 | ||
|
c059bdd432 | ||
|
02dbf93f0e | ||
|
1fb2bcbbf7 | ||
|
16e055849e | ||
|
66cfab4226 | ||
|
6d88bc37a3 | ||
|
b7553b2554 | ||
|
e03db0a077 | ||
|
a1ee09e815 | ||
|
267ed0c5d3 | ||
|
f459d17018 | ||
|
dc65dcbb6d | ||
|
d214fdb8fe | ||
|
138df537ff | ||
|
0c7c19d6bc | ||
|
dca0872056 | ||
|
2b35c9ef74 | ||
|
4894fe8c5b | ||
|
d5a9bb4ea9 |
@@ -1,10 +1,21 @@
|
|||||||
__youtube_dl()
|
__youtube_dl()
|
||||||
{
|
{
|
||||||
local cur prev opts
|
local cur prev opts fileopts diropts keywords
|
||||||
COMPREPLY=()
|
COMPREPLY=()
|
||||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||||
|
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
||||||
opts="{{flags}}"
|
opts="{{flags}}"
|
||||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
||||||
|
fileopts="-a|--batch-file|--download-archive|--cookies"
|
||||||
|
diropts="--cache-dir"
|
||||||
|
|
||||||
|
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ ${cur} =~ : ]]; then
|
if [[ ${cur} =~ : ]]; then
|
||||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||||
|
@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
|
|||||||
from youtube_dl.utils import preferredencoding
|
from youtube_dl.utils import preferredencoding
|
||||||
|
|
||||||
|
|
||||||
def global_setup():
|
|
||||||
youtube_dl._setup_opener(timeout=10)
|
|
||||||
|
|
||||||
|
|
||||||
def get_params(override=None):
|
def get_params(override=None):
|
||||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
"parameters.json")
|
"parameters.json")
|
||||||
|
@@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup, try_rm
|
from test.helper import try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
@@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_keywords(self):
|
def test_keywords(self):
|
||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':tds', ['ComedyCentral'])
|
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':cr', ['ComedyCentral'])
|
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
||||||
|
self.assertMatch(':cr', ['ComedyCentralShows'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import (
|
from test.helper import (
|
||||||
get_params,
|
get_params,
|
||||||
get_testcases,
|
get_testcases,
|
||||||
global_setup,
|
|
||||||
try_rm,
|
try_rm,
|
||||||
md5,
|
md5,
|
||||||
report_warning
|
report_warning
|
||||||
)
|
)
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@@ -8,8 +8,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
@@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
from test.helper import FakeYDL, md5
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['subtitlesformat'] = 'vtt'
|
self.DL.params['subtitlesformat'] = 'vtt'
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
def test_youtube_list_subtitles(self):
|
||||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||||
|
@@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup, try_rm
|
from test.helper import get_params, try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
@@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup
|
from test.helper import get_params
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
@@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
@@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||||
self.assertTrue(len(result) >= 3)
|
self.assertTrue(len(result) >= 3)
|
||||||
|
|
||||||
|
def test_youtube_mix(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertTrue(len(entries) >= 20)
|
||||||
|
original_video = entries[0]
|
||||||
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -6,9 +6,6 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
|
@@ -1,4 +1,3 @@
|
|||||||
import math
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -11,6 +10,7 @@ from .utils import (
|
|||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
format_bytes,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
)
|
)
|
||||||
@@ -53,20 +53,6 @@ class FileDownloader(object):
|
|||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format_bytes(bytes):
|
|
||||||
if bytes is None:
|
|
||||||
return 'N/A'
|
|
||||||
if type(bytes) is str:
|
|
||||||
bytes = float(bytes)
|
|
||||||
if bytes == 0.0:
|
|
||||||
exponent = 0
|
|
||||||
else:
|
|
||||||
exponent = int(math.log(bytes, 1024.0))
|
|
||||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
|
||||||
converted = float(bytes) / float(1024 ** exponent)
|
|
||||||
return '%.2f%s' % (converted, suffix)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
(mins, secs) = divmod(seconds, 60)
|
(mins, secs) = divmod(seconds, 60)
|
||||||
@@ -117,7 +103,7 @@ class FileDownloader(object):
|
|||||||
def format_speed(speed):
|
def format_speed(speed):
|
||||||
if speed is None:
|
if speed is None:
|
||||||
return '%10s' % '---b/s'
|
return '%10s' % '---b/s'
|
||||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
@@ -270,6 +256,61 @@ class FileDownloader(object):
|
|||||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||||
|
|
||||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||||
|
def run_rtmpdump(args):
|
||||||
|
start = time.time()
|
||||||
|
resume_percent = None
|
||||||
|
resume_downloaded_data_len = None
|
||||||
|
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||||
|
cursor_in_new_line = True
|
||||||
|
proc_stderr_closed = False
|
||||||
|
while not proc_stderr_closed:
|
||||||
|
# read line from stderr
|
||||||
|
line = u''
|
||||||
|
while True:
|
||||||
|
char = proc.stderr.read(1)
|
||||||
|
if not char:
|
||||||
|
proc_stderr_closed = True
|
||||||
|
break
|
||||||
|
if char in [b'\r', b'\n']:
|
||||||
|
break
|
||||||
|
line += char.decode('ascii', 'replace')
|
||||||
|
if not line:
|
||||||
|
# proc_stderr_closed is True
|
||||||
|
continue
|
||||||
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||||
|
if mobj:
|
||||||
|
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||||
|
percent = float(mobj.group(2))
|
||||||
|
if not resume_percent:
|
||||||
|
resume_percent = percent
|
||||||
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
|
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||||
|
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||||
|
data_len = None
|
||||||
|
if percent > 0:
|
||||||
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
|
data_len_str = u'~' + format_bytes(data_len)
|
||||||
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
cursor_in_new_line = False
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
|
})
|
||||||
|
elif self.params.get('verbose', False):
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
cursor_in_new_line = True
|
||||||
|
self.to_screen(u'[rtmpdump] '+line)
|
||||||
|
proc.wait()
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
return proc.returncode
|
||||||
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
test = self.params.get('test', False)
|
test = self.params.get('test', False)
|
||||||
@@ -280,12 +321,11 @@ class FileDownloader(object):
|
|||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||||
return False
|
return False
|
||||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
# the connection was interrumpted and resuming appears to be
|
# the connection was interrumpted and resuming appears to be
|
||||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||||
if player_url is not None:
|
if player_url is not None:
|
||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
@@ -299,30 +339,48 @@ class FileDownloader(object):
|
|||||||
if live:
|
if live:
|
||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||||
|
|
||||||
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
|
# Windows subprocess module does not actually support Unicode
|
||||||
|
# on Python 2.x
|
||||||
|
# See http://stackoverflow.com/a/9951851/35070
|
||||||
|
subprocess_encoding = sys.getfilesystemencoding()
|
||||||
|
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||||
|
else:
|
||||||
|
subprocess_encoding = None
|
||||||
|
|
||||||
if self.params.get('verbose', False):
|
if self.params.get('verbose', False):
|
||||||
|
if subprocess_encoding:
|
||||||
|
str_args = [
|
||||||
|
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||||
|
for a in args]
|
||||||
|
else:
|
||||||
|
str_args = args
|
||||||
try:
|
try:
|
||||||
import pipes
|
import pipes
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
except ImportError:
|
except ImportError:
|
||||||
shell_quote = repr
|
shell_quote = repr
|
||||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
|
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||||
retval = subprocess.call(args)
|
|
||||||
|
retval = run_rtmpdump(args)
|
||||||
|
|
||||||
while (retval == 2 or retval == 1) and not test:
|
while (retval == 2 or retval == 1) and not test:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == 1:
|
if prevsize == cursize and retval == 1:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||||
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = 0
|
retval = 0
|
||||||
break
|
break
|
||||||
if retval == 0 or (test and retval == 2):
|
if retval == 0 or (test and retval == 2):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
@@ -525,7 +583,7 @@ class FileDownloader(object):
|
|||||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = self.format_bytes(data_len)
|
data_len_str = format_bytes(data_len)
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
@@ -7,8 +7,10 @@ import errno
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import subprocess
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@@ -18,6 +20,7 @@ if os.name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
compat_cookiejar,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -30,9 +33,12 @@ from .utils import (
|
|||||||
DownloadError,
|
DownloadError,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
format_bytes,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
platform_name,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@@ -41,9 +47,11 @@ from .utils import (
|
|||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
|
YoutubeDLHandler,
|
||||||
)
|
)
|
||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
from .FileDownloader import FileDownloader
|
from .FileDownloader import FileDownloader
|
||||||
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
@@ -118,9 +126,12 @@ class YoutubeDL(object):
|
|||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
age_limit: An integer representing the user's age in years.
|
age_limit: An integer representing the user's age in years.
|
||||||
Unsuitable videos for the given age are skipped.
|
Unsuitable videos for the given age are skipped.
|
||||||
downloadarchive: File name of a file where all downloads are recorded.
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
|
proxy: URL of the proxy server to use
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@@ -135,7 +146,7 @@ class YoutubeDL(object):
|
|||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
_screen_file = None
|
_screen_file = None
|
||||||
|
|
||||||
def __init__(self, params):
|
def __init__(self, params={}):
|
||||||
"""Create a FileDownloader object with the given options."""
|
"""Create a FileDownloader object with the given options."""
|
||||||
self._ies = []
|
self._ies = []
|
||||||
self._ies_instances = {}
|
self._ies_instances = {}
|
||||||
@@ -144,6 +155,7 @@ class YoutubeDL(object):
|
|||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
|
self.params = params
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
@@ -153,14 +165,15 @@ class YoutubeDL(object):
|
|||||||
u'Assuming --restrict-filenames since file system encoding '
|
u'Assuming --restrict-filenames since file system encoding '
|
||||||
u'cannot encode all charactes. '
|
u'cannot encode all charactes. '
|
||||||
u'Set the LC_ALL environment variable to fix this.')
|
u'Set the LC_ALL environment variable to fix this.')
|
||||||
params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
self.params = params
|
|
||||||
self.fd = FileDownloader(self, self.params)
|
self.fd = FileDownloader(self, self.params)
|
||||||
|
|
||||||
if '%(stitle)s' in self.params['outtmpl']:
|
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||||
|
|
||||||
|
self._setup_opener()
|
||||||
|
|
||||||
def add_info_extractor(self, ie):
|
def add_info_extractor(self, ie):
|
||||||
"""Add an InfoExtractor object to the end of the list."""
|
"""Add an InfoExtractor object to the end of the list."""
|
||||||
self._ies.append(ie)
|
self._ies.append(ie)
|
||||||
@@ -241,10 +254,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.restore_console_title()
|
self.restore_console_title()
|
||||||
|
|
||||||
def fixed_template(self):
|
if self.params.get('cookiefile') is not None:
|
||||||
"""Checks if the output template is fixed."""
|
self.cookiejar.save()
|
||||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, message=None, tb=None):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
@@ -782,13 +794,15 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
if len(url_list) > 1 and self.fixed_template():
|
if (len(url_list) > 1 and
|
||||||
|
'%' not in self.params['outtmpl']
|
||||||
|
and self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(self.params['outtmpl'])
|
raise SameFileError(self.params['outtmpl'])
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
#It also downloads the videos
|
#It also downloads the videos
|
||||||
videos = self.extract_info(url)
|
self.extract_info(url)
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error(u'unable to download video')
|
self.report_error(u'unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
@@ -820,20 +834,26 @@ class YoutubeDL(object):
|
|||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning(u'Unable to remove downloaded video file')
|
self.report_warning(u'Unable to remove downloaded video file')
|
||||||
|
|
||||||
def in_download_archive(self, info_dict):
|
def _make_archive_id(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
# Future-proof against any change in case
|
||||||
if fn is None:
|
# and backwards compatibility with prior versions
|
||||||
return False
|
extractor = info_dict.get('extractor_key')
|
||||||
extractor = info_dict.get('extractor_id')
|
|
||||||
if extractor is None:
|
if extractor is None:
|
||||||
if 'id' in info_dict:
|
if 'id' in info_dict:
|
||||||
extractor = info_dict.get('ie_key') # key in a playlist
|
extractor = info_dict.get('ie_key') # key in a playlist
|
||||||
if extractor is None:
|
if extractor is None:
|
||||||
|
return None # Incomplete video information
|
||||||
|
return extractor.lower() + u' ' + info_dict['id']
|
||||||
|
|
||||||
|
def in_download_archive(self, info_dict):
|
||||||
|
fn = self.params.get('download_archive')
|
||||||
|
if fn is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
if vid_id is None:
|
||||||
return False # Incomplete video information
|
return False # Incomplete video information
|
||||||
# Future-proof against any change in case
|
|
||||||
# and backwards compatibility with prior versions
|
|
||||||
extractor = extractor.lower()
|
|
||||||
vid_id = extractor + u' ' + info_dict['id']
|
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
@@ -848,12 +868,15 @@ class YoutubeDL(object):
|
|||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return
|
return
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
assert vid_id
|
||||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
archive_file.write(vid_id + u'\n')
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_resolution(format, default='unknown'):
|
def format_resolution(format, default='unknown'):
|
||||||
|
if format.get('vcodec') == 'none':
|
||||||
|
return 'audio only'
|
||||||
if format.get('_resolution') is not None:
|
if format.get('_resolution') is not None:
|
||||||
return format['_resolution']
|
return format['_resolution']
|
||||||
if format.get('height') is not None:
|
if format.get('height') is not None:
|
||||||
@@ -867,10 +890,11 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
def format_note(fdict):
|
def format_note(fdict):
|
||||||
if fdict.get('format_note') is not None:
|
|
||||||
return fdict['format_note']
|
|
||||||
res = u''
|
res = u''
|
||||||
if fdict.get('vcodec') is not None:
|
if fdict.get('format_note') is not None:
|
||||||
|
res += fdict['format_note'] + u' '
|
||||||
|
if (fdict.get('vcodec') is not None and
|
||||||
|
fdict.get('vcodec') != 'none'):
|
||||||
res += u'%-5s' % fdict['vcodec']
|
res += u'%-5s' % fdict['vcodec']
|
||||||
elif fdict.get('vbr') is not None:
|
elif fdict.get('vbr') is not None:
|
||||||
res += u'video'
|
res += u'video'
|
||||||
@@ -886,25 +910,100 @@ class YoutubeDL(object):
|
|||||||
res += 'audio'
|
res += 'audio'
|
||||||
if fdict.get('abr') is not None:
|
if fdict.get('abr') is not None:
|
||||||
res += u'@%3dk' % fdict['abr']
|
res += u'@%3dk' % fdict['abr']
|
||||||
|
if fdict.get('filesize') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += format_bytes(fdict['filesize'])
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def line(format):
|
def line(format, idlen=20):
|
||||||
return (u'%-20s%-10s%-12s%s' % (
|
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||||
format['format_id'],
|
format['format_id'],
|
||||||
format['ext'],
|
format['ext'],
|
||||||
self.format_resolution(format),
|
self.format_resolution(format),
|
||||||
format_note(format),
|
format_note(format),
|
||||||
)
|
))
|
||||||
)
|
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
formats_s = list(map(line, formats))
|
idlen = max(len(u'format code'),
|
||||||
|
max(len(f['format_id']) for f in formats))
|
||||||
|
formats_s = [line(f, idlen) for f in formats]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||||
|
|
||||||
header_line = line({
|
header_line = line({
|
||||||
'format_id': u'format code', 'ext': u'extension',
|
'format_id': u'format code', 'ext': u'extension',
|
||||||
'_resolution': u'resolution', 'format_note': u'note'})
|
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||||
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||||
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||||
|
|
||||||
|
def urlopen(self, req):
|
||||||
|
""" Start an HTTP download """
|
||||||
|
return self._opener.open(req)
|
||||||
|
|
||||||
|
def print_debug_header(self):
|
||||||
|
if not self.params.get('verbose'):
|
||||||
|
return
|
||||||
|
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||||
|
try:
|
||||||
|
sp = subprocess.Popen(
|
||||||
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
out, err = sp.communicate()
|
||||||
|
out = out.decode().strip()
|
||||||
|
if re.match('[0-9a-f]+', out):
|
||||||
|
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
sys.exc_clear()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
write_string(u'[debug] Python version %s - %s' %
|
||||||
|
(platform.python_version(), platform_name()) + u'\n')
|
||||||
|
|
||||||
|
proxy_map = {}
|
||||||
|
for handler in self._opener.handlers:
|
||||||
|
if hasattr(handler, 'proxies'):
|
||||||
|
proxy_map.update(handler.proxies)
|
||||||
|
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||||
|
|
||||||
|
def _setup_opener(self, timeout=20):
|
||||||
|
opts_cookiefile = self.params.get('cookiefile')
|
||||||
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
|
if opts_cookiefile is None:
|
||||||
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
|
else:
|
||||||
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
|
opts_cookiefile)
|
||||||
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
self.cookiejar.load()
|
||||||
|
|
||||||
|
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
||||||
|
self.cookiejar)
|
||||||
|
if opts_proxy is not None:
|
||||||
|
if opts_proxy == '':
|
||||||
|
proxies = {}
|
||||||
|
else:
|
||||||
|
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||||
|
else:
|
||||||
|
proxies = compat_urllib_request.getproxies()
|
||||||
|
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||||
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
|
proxies['https'] = proxies['http']
|
||||||
|
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||||
|
https_handler = make_HTTPS_handler(
|
||||||
|
self.params.get('nocheckcertificate', False))
|
||||||
|
opener = compat_urllib_request.build_opener(
|
||||||
|
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||||
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
|
opener.addheaders = []
|
||||||
|
self._opener = opener
|
||||||
|
|
||||||
|
# TODO remove this global modification
|
||||||
|
compat_urllib_request.install_opener(opener)
|
||||||
|
socket.setdefaulttimeout(timeout)
|
||||||
|
@@ -41,45 +41,35 @@ __authors__ = (
|
|||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import collections
|
|
||||||
import getpass
|
import getpass
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import socket
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
|
||||||
import platform
|
|
||||||
|
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
compat_cookiejar,
|
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_str,
|
|
||||||
compat_urllib_request,
|
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
make_HTTPS_handler,
|
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
platform_name,
|
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
YoutubeDLHandler,
|
|
||||||
)
|
)
|
||||||
from .update import update_self
|
from .update import update_self
|
||||||
from .version import __version__
|
|
||||||
from .FileDownloader import (
|
from .FileDownloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors
|
||||||
|
from .version import __version__
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
from .PostProcessor import (
|
from .PostProcessor import (
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
@@ -216,7 +206,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
||||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||||
|
dest='max_downloads', type=int, default=None,
|
||||||
|
help='Abort after downloading NUMBER files')
|
||||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
@@ -452,19 +444,6 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
|
|
||||||
# Open appropriate CookieJar
|
|
||||||
if opts.cookiefile is None:
|
|
||||||
jar = compat_cookiejar.CookieJar()
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
|
|
||||||
if os.access(opts.cookiefile, os.R_OK):
|
|
||||||
jar.load()
|
|
||||||
except (IOError, OSError) as err:
|
|
||||||
if opts.verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
write_string(u'ERROR: unable to open cookie file\n')
|
|
||||||
sys.exit(101)
|
|
||||||
# Set user agent
|
# Set user agent
|
||||||
if opts.user_agent is not None:
|
if opts.user_agent is not None:
|
||||||
std_headers['User-Agent'] = opts.user_agent
|
std_headers['User-Agent'] = opts.user_agent
|
||||||
@@ -496,8 +475,6 @@ def _real_main(argv=None):
|
|||||||
all_urls = batchurls + args
|
all_urls = batchurls + args
|
||||||
all_urls = [url.strip() for url in all_urls]
|
all_urls = [url.strip() for url in all_urls]
|
||||||
|
|
||||||
opener = _setup_opener(jar=jar, opts=opts)
|
|
||||||
|
|
||||||
extractors = gen_extractors()
|
extractors = gen_extractors()
|
||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
@@ -552,7 +529,7 @@ def _real_main(argv=None):
|
|||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
try:
|
try:
|
||||||
opts.retries = int(opts.retries)
|
opts.retries = int(opts.retries)
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid retry count specified')
|
parser.error(u'invalid retry count specified')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
@@ -563,13 +540,13 @@ def _real_main(argv=None):
|
|||||||
opts.playliststart = int(opts.playliststart)
|
opts.playliststart = int(opts.playliststart)
|
||||||
if opts.playliststart <= 0:
|
if opts.playliststart <= 0:
|
||||||
raise ValueError(u'Playlist start must be positive')
|
raise ValueError(u'Playlist start must be positive')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist start number specified')
|
parser.error(u'invalid playlist start number specified')
|
||||||
try:
|
try:
|
||||||
opts.playlistend = int(opts.playlistend)
|
opts.playlistend = int(opts.playlistend)
|
||||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
||||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist end number specified')
|
parser.error(u'invalid playlist end number specified')
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
@@ -672,34 +649,13 @@ def _real_main(argv=None):
|
|||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': opts.download_archive,
|
'download_archive': opts.download_archive,
|
||||||
|
'cookiefile': opts.cookiefile,
|
||||||
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
|
'proxy': opts.proxy,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
if opts.verbose:
|
ydl.print_debug_header()
|
||||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
|
||||||
try:
|
|
||||||
sp = subprocess.Popen(
|
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
||||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
out, err = sp.communicate()
|
|
||||||
out = out.decode().strip()
|
|
||||||
if re.match('[0-9a-f]+', out):
|
|
||||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
sys.exc_clear()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
write_string(u'[debug] Python version %s - %s' %
|
|
||||||
(platform.python_version(), platform_name()) + u'\n')
|
|
||||||
|
|
||||||
proxy_map = {}
|
|
||||||
for handler in opener.handlers:
|
|
||||||
if hasattr(handler, 'proxies'):
|
|
||||||
proxy_map.update(handler.proxies)
|
|
||||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
|
||||||
|
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
@@ -730,46 +686,9 @@ def _real_main(argv=None):
|
|||||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||||
retcode = 101
|
retcode = 101
|
||||||
|
|
||||||
# Dump cookie jar if requested
|
|
||||||
if opts.cookiefile is not None:
|
|
||||||
try:
|
|
||||||
jar.save()
|
|
||||||
except (IOError, OSError):
|
|
||||||
sys.exit(u'ERROR: unable to save cookie jar')
|
|
||||||
|
|
||||||
sys.exit(retcode)
|
sys.exit(retcode)
|
||||||
|
|
||||||
|
|
||||||
def _setup_opener(jar=None, opts=None, timeout=300):
|
|
||||||
if opts is None:
|
|
||||||
FakeOptions = collections.namedtuple(
|
|
||||||
'FakeOptions', ['proxy', 'no_check_certificate'])
|
|
||||||
opts = FakeOptions(proxy=None, no_check_certificate=False)
|
|
||||||
|
|
||||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
|
||||||
if opts.proxy is not None:
|
|
||||||
if opts.proxy == '':
|
|
||||||
proxies = {}
|
|
||||||
else:
|
|
||||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
|
||||||
else:
|
|
||||||
proxies = compat_urllib_request.getproxies()
|
|
||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
|
||||||
proxies['https'] = proxies['http']
|
|
||||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
|
||||||
https_handler = make_HTTPS_handler(opts)
|
|
||||||
opener = compat_urllib_request.build_opener(
|
|
||||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
|
||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
|
||||||
opener.addheaders = []
|
|
||||||
compat_urllib_request.install_opener(opener)
|
|
||||||
socket.setdefaulttimeout(timeout)
|
|
||||||
return opener
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
try:
|
try:
|
||||||
_real_main(argv)
|
_real_main(argv)
|
||||||
|
@@ -20,9 +20,11 @@ from .c56 import C56IE
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
|
from .clipfish import ClipfishIE
|
||||||
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cnn import CNNIE
|
from .cnn import CNNIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
@@ -70,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
|
|||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .ign import IGNIE, OneUPIE
|
from .ign import IGNIE, OneUPIE
|
||||||
|
from .imdb import ImdbIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
@@ -104,6 +107,7 @@ from .ooyala import OoyalaIE
|
|||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .podomatic import PodomaticIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@@ -185,6 +189,7 @@ from .youtube import (
|
|||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
|
YoutubeHistoryIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
|
|||||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||||
webpage, u'key')
|
webpage, u'key')
|
||||||
|
|
||||||
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||||
key)
|
key)
|
||||||
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
|
||||||
|
|
||||||
video_title = config_xml.find('title').text
|
video_title = config_xml.find('title').text
|
||||||
|
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
@@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract form http://liveweb.arte.tv/"""
|
"""Extract form http://liveweb.arte.tv/"""
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||||
video_id, u'Downloading information')
|
video_id, u'Downloading information')
|
||||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
event_doc = config_doc.find('event')
|
event_doc = config_doc.find('event')
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
url_node = event_doc.find('video').find('urlHd')
|
||||||
if url_node is None:
|
if url_node is None:
|
||||||
|
@@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
|
|||||||
u'uploader': u'pixelversity',
|
u'uploader': u'pixelversity',
|
||||||
u'uploader_id': u'344706',
|
u'uploader_id': u'344706',
|
||||||
},
|
},
|
||||||
|
u'params': {
|
||||||
|
# It doesn't respect the 'Range' header, it would download the whole video
|
||||||
|
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -76,18 +76,21 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||||
}
|
}
|
||||||
def find_param(name):
|
def find_param(name):
|
||||||
return find_xpath_attr(object_doc, './param', 'name', name)
|
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
return None
|
||||||
playerKey = find_param('playerKey')
|
playerKey = find_param('playerKey')
|
||||||
# Not all pages define this value
|
# Not all pages define this value
|
||||||
if playerKey is not None:
|
if playerKey is not None:
|
||||||
params['playerKey'] = playerKey.attrib['value']
|
params['playerKey'] = playerKey
|
||||||
# The three fields hold the id of the video
|
# The three fields hold the id of the video
|
||||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
params['@videoPlayer'] = videoPlayer
|
||||||
linkBase = find_param('linkBaseURL')
|
linkBase = find_param('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
params['linkBaseURL'] = linkBase.attrib['value']
|
params['linkBaseURL'] = linkBase
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
info_page = self._download_webpage(info_url,video_id,
|
doc = self._download_xml(info_url,video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
infos = video_info.find('INFOS')
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
|
57
youtube_dl/extractor/clipfish.py
Normal file
57
youtube_dl/extractor/clipfish.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import re
|
||||||
|
import time
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class ClipfishIE(InfoExtractor):
|
||||||
|
IE_NAME = u'clipfish'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||||
|
u'file': u'3966754.mp4',
|
||||||
|
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||||
|
u'duration': 82,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||||
|
(video_id, int(time.time())))
|
||||||
|
doc = self._download_xml(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
title = doc.find('title').text
|
||||||
|
video_url = doc.find('filename').text
|
||||||
|
if video_url is None:
|
||||||
|
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||||
|
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||||
|
xml_bytes)
|
||||||
|
thumbnail = doc.find('imageurl').text
|
||||||
|
duration_str = doc.find('duration').text
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||||
|
duration_str)
|
||||||
|
if m:
|
||||||
|
duration = (
|
||||||
|
(int(m.group('hours')) * 60 * 60) +
|
||||||
|
(int(m.group('minutes')) * 60) +
|
||||||
|
(int(m.group('seconds')))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
52
youtube_dl/extractor/clipsyndicate.py
Normal file
52
youtube_dl/extractor/clipsyndicate.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipsyndicateIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||||
|
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'4629301',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Brick Briscoe',
|
||||||
|
u'duration': 612,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
js_player = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||||
|
video_id, u'Downlaoding player')
|
||||||
|
# it includes a required token
|
||||||
|
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||||
|
|
||||||
|
playlist_page = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||||
|
video_id, u'Downloading video info')
|
||||||
|
# Fix broken xml
|
||||||
|
playlist_page = re.sub('&', '&', playlist_page)
|
||||||
|
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
|
||||||
|
|
||||||
|
track_doc = pdoc.find('trackList/track')
|
||||||
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': find_param('title'),
|
||||||
|
'url': track_doc.find('location').text,
|
||||||
|
'thumbnail': find_param('thumbnail'),
|
||||||
|
'duration': int(find_param('duration')),
|
||||||
|
}
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
|
|||||||
path = mobj.group('path')
|
path = mobj.group('path')
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
info_xml = self._download_webpage(info_url, page_title)
|
info = self._download_xml(info_url, page_title)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in info.findall('files/file'):
|
for f in info.findall('files/file'):
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
mdoc = self._download_xml(xmlUrl, video_id,
|
||||||
u'Downloading info XML',
|
u'Downloading info XML',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
|
||||||
try:
|
try:
|
||||||
videoNode = mdoc.findall('./video')[0]
|
videoNode = mdoc.findall('./video')[0]
|
||||||
youtubeIdNode = videoNode.find('./youtubeID')
|
youtubeIdNode = videoNode.find('./youtubeID')
|
||||||
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
if next_url.endswith(u'manifest.f4m'):
|
if next_url.endswith(u'manifest.f4m'):
|
||||||
manifest_url = next_url + '?hdcore=2.10.3'
|
manifest_url = next_url + '?hdcore=2.10.3'
|
||||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
adoc = self._download_xml(manifest_url, video_id,
|
||||||
u'Downloading XML manifest',
|
u'Downloading XML manifest',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
|
||||||
try:
|
try:
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .mtv import MTVIE, _media_xml_tag
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@@ -11,7 +11,37 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(InfoExtractor):
|
class ComedyCentralIE(MTVIE):
|
||||||
|
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||||
|
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||||
|
u'md5': u'4167875aae411f903b751a21f357f1ee',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
|
||||||
|
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
# Overwrite MTVIE properties we don't want
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
|
return itemdoc.find(search_path).attrib['url']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
|
||||||
|
webpage, u'mgid')
|
||||||
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
class ComedyCentralShowsIE(InfoExtractor):
|
||||||
IE_DESC = u'The Daily Show / Colbert Report'
|
IE_DESC = u'The Daily Show / Colbert Report'
|
||||||
# urls can be abbreviations like :thedailyshow or :colbert
|
# urls can be abbreviations like :thedailyshow or :colbert
|
||||||
# urls for episodes like:
|
# urls for episodes like:
|
||||||
@@ -127,13 +157,12 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||||
indexXml = self._download_webpage(indexUrl, epTitle,
|
idoc = self._download_xml(indexUrl, epTitle,
|
||||||
u'Downloading show index',
|
u'Downloading show index',
|
||||||
u'unable to download episode index')
|
u'unable to download episode index')
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
|
||||||
itemEls = idoc.findall('.//item')
|
itemEls = idoc.findall('.//item')
|
||||||
for partNum,itemEl in enumerate(itemEls):
|
for partNum,itemEl in enumerate(itemEls):
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
mediaId = itemEl.findall('./guid')[0].text
|
||||||
@@ -144,10 +173,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
configXml = self._download_webpage(configUrl, epTitle,
|
cdoc = self._download_xml(configUrl, epTitle,
|
||||||
u'Downloading configuration for %s' % shortMediaId)
|
u'Downloading configuration for %s' % shortMediaId)
|
||||||
|
|
||||||
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
|
||||||
turls = []
|
turls = []
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
for rendition in cdoc.findall('.//rendition'):
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||||
|
@@ -4,11 +4,11 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import netrc
|
import netrc
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
@@ -19,6 +19,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
"""Information Extractor class.
|
"""Information Extractor class.
|
||||||
|
|
||||||
@@ -75,6 +76,7 @@ class InfoExtractor(object):
|
|||||||
* acodec Name of the audio codec in use
|
* acodec Name of the audio codec in use
|
||||||
* vbr Average video bitrate in KBit/s
|
* vbr Average video bitrate in KBit/s
|
||||||
* vcodec Name of the video codec in use
|
* vcodec Name of the video codec in use
|
||||||
|
* filesize The number of bytes, if known in advance
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
@@ -156,7 +158,7 @@ class InfoExtractor(object):
|
|||||||
elif note is not False:
|
elif note is not False:
|
||||||
self.to_screen(u'%s: %s' % (video_id, note))
|
self.to_screen(u'%s: %s' % (video_id, note))
|
||||||
try:
|
try:
|
||||||
return compat_urllib_request.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
@@ -208,6 +210,12 @@ class InfoExtractor(object):
|
|||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||||
|
|
||||||
|
def _download_xml(self, url_or_request, video_id,
|
||||||
|
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
||||||
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
|
|||||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||||
webpage, u'full id')
|
webpage, u'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
|
|||||||
'vid': full_id,
|
'vid': full_id,
|
||||||
'profile': profile,
|
'profile': profile,
|
||||||
})
|
})
|
||||||
url_xml = self._download_webpage(
|
url_doc = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
|
||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config_xml = self._download_webpage(
|
config = self._download_xml(
|
||||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
video_url = config.find('file').text
|
video_url = config.find('file').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||||
u'config xml url')
|
u'config xml url')
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id,
|
config = self._download_xml(config_xml_url, video_id,
|
||||||
u'Downloading config xml')
|
u'Downloading config xml')
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
encodings = config.find('ENCODINGS')
|
encodings = config.find('ENCODINGS')
|
||||||
formats = []
|
formats = []
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,11 +10,10 @@ from ..utils import (
|
|||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
xml_desc = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||||
'getInfosOeuvre.php?id-diffusion='
|
'getInfosOeuvre.php?id-diffusion='
|
||||||
+ video_id, video_id, 'Downloading XML config')
|
+ video_id, video_id, 'Downloading XML config')
|
||||||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
|
||||||
|
|
||||||
manifest_url = info.find('videos/video/url').text
|
manifest_url = info.find('videos/video/url').text
|
||||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||||
|
@@ -209,7 +209,7 @@ class GenericIE(InfoExtractor):
|
|||||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Broaden the search a little bit: JWPlayer JS loader
|
# Broaden the search a little bit: JWPlayer JS loader
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||||
@@ -236,18 +236,16 @@ class GenericIE(InfoExtractor):
|
|||||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||||
|
|
||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_extension = os.path.splitext(video_id)[1][1:]
|
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
# video uploader is domain name
|
# video uploader is domain name
|
||||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||||
url, u'video uploader')
|
url, u'video uploader')
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
}
|
||||||
}]
|
|
||||||
|
60
youtube_dl/extractor/imdb.py
Normal file
60
youtube_dl/extractor/imdb.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
get_element_by_attribute,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImdbIE(InfoExtractor):
|
||||||
|
IE_NAME = u'imdb'
|
||||||
|
IE_DESC = u'Internet Movie Database trailers'
|
||||||
|
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
|
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2524815897',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||||
|
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||||
|
u'duration': 151,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url,video_id)
|
||||||
|
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||||
|
available_formats = re.findall(
|
||||||
|
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
formats = []
|
||||||
|
for f_id, f_path in available_formats:
|
||||||
|
format_page = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, f_path),
|
||||||
|
u'Downloading info for %s format' % f_id)
|
||||||
|
json_data = self._search_regex(
|
||||||
|
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||||
|
format_page, u'json data', flags=re.DOTALL)
|
||||||
|
info = json.loads(json_data)
|
||||||
|
format_info = info['videoPlayerObject']['video']
|
||||||
|
formats.append({
|
||||||
|
'format_id': f_id,
|
||||||
|
'url': format_info['url'],
|
||||||
|
'height': format_info['height'],
|
||||||
|
'width': format_info['width'],
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': descr,
|
||||||
|
'thumbnail': format_info['slate'],
|
||||||
|
'duration': int(info['titleObject']['title']['duration_seconds']),
|
||||||
|
}
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
video_id = query_dic['publishedid'][0]
|
video_id = query_dic['publishedid'][0]
|
||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration_xml = self._download_webpage(url, video_id,
|
flashconfiguration = self._download_xml(url, video_id,
|
||||||
u'Downloading flash configuration')
|
u'Downloading flash configuration')
|
||||||
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info_xml = self._download_webpage(file_url, video_id,
|
info = self._download_xml(file_url, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, u'video ID')
|
||||||
|
|
||||||
xml_config = self._download_webpage(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, u'Downloading XML config')
|
||||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
info_json = config.find('format.json').text
|
||||||
info_json = self._search_regex(
|
|
||||||
r'(?sm)<format\.json>(.*?)</format\.json>',
|
|
||||||
xml_config, u'JSON information')
|
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
|
|||||||
archive_id = m.group(1)
|
archive_id = m.group(1)
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
chapter_info_xml = self._download_webpage(api, chapter_id,
|
doc = self._download_xml(api, chapter_id,
|
||||||
note=u'Downloading chapter information',
|
note=u'Downloading chapter information',
|
||||||
errnote=u'Chapter information download failed')
|
errnote=u'Chapter information download failed')
|
||||||
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
|
||||||
for a in doc.findall('.//archive'):
|
for a in doc.findall('.//archive'):
|
||||||
if archive_id == a.find('./id').text:
|
if archive_id == a.find('./id').text:
|
||||||
break
|
break
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||||
|
|
||||||
api_response = self._download_webpage(api_url, video_id)
|
info = self._download_xml(api_url, video_id)
|
||||||
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
|
||||||
item = info.find('channel').find('item')
|
item = info.find('channel').find('item')
|
||||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||||
|
@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'url': final_song_url,
|
'url': final_song_url,
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'description': info['description'],
|
'description': info.get('description'),
|
||||||
'thumbnail': info['pictures'].get('extra_large'),
|
'thumbnail': info['pictures'].get('extra_large'),
|
||||||
'uploader': info['user']['name'],
|
'uploader': info['user']['name'],
|
||||||
'uploader_id': info['user']['username'],
|
'uploader_id': info['user']['username'],
|
||||||
|
@@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
|
|||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
||||||
u'Downloading info')
|
u'Downloading info')
|
||||||
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import os.path
|
import os.path
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
|
|||||||
|
|
||||||
# get metadata
|
# get metadata
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||||
metadata_text = self._download_webpage(metadata_url, video_id)
|
metadata = self._download_xml(metadata_url, video_id)
|
||||||
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
|
||||||
|
|
||||||
# extract values from metadata
|
# extract values from metadata
|
||||||
url_flv_el = metadata.find('url_flv')
|
url_flv_el = metadata.find('url_flv')
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
|
|||||||
'protocol': 'p2p',
|
'protocol': 'p2p',
|
||||||
'inKey': key,
|
'inKey': key,
|
||||||
})
|
})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||||
video_id, u'Downloading video info')
|
video_id, u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import find_xpath_attr, compat_str
|
from ..utils import find_xpath_attr, compat_str
|
||||||
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
info = all_info.find('video')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': info.find('headline').text,
|
'title': info.find('headline').text,
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||||
path_response = self._download_webpage(path_url, video_id,
|
path_doc = self._download_xml(path_url, video_id,
|
||||||
u'Downloading final video url')
|
u'Downloading final video url')
|
||||||
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
|
||||||
video_url = path_doc.find('path').text
|
video_url = path_doc.find('path').text
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
join = compat_urlparse.urljoin
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
# the cookies in order to be able to download the info webpage
|
# the cookies in order to be able to download the info webpage
|
||||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
|
|
||||||
video_info_webpage = self._download_webpage(
|
video_info = self._download_xml(
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
note=u'Downloading video info page')
|
note=u'Downloading video info page')
|
||||||
|
|
||||||
@@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
|
|
||||||
video_title = video_info.find('.//title').text
|
video_title = video_info.find('.//title').text
|
||||||
video_extension = video_info.find('.//movie_type').text
|
video_extension = video_info.find('.//movie_type').text
|
||||||
video_format = video_extension.upper()
|
video_format = video_extension.upper()
|
||||||
@@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_uploader = video_uploader_id
|
video_uploader = video_uploader_id
|
||||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
try:
|
try:
|
||||||
user_info_webpage = self._download_webpage(
|
user_info = self._download_xml(
|
||||||
url, video_id, note=u'Downloading user information')
|
url, video_id, note=u'Downloading user information')
|
||||||
|
video_uploader = user_info.find('.//nickname').text
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||||
else:
|
|
||||||
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
|
|
||||||
video_uploader = user_info.find('.//nickname').text
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
49
youtube_dl/extractor/podomatic.py
Normal file
49
youtube_dl/extractor/podomatic.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PodomaticIE(InfoExtractor):
|
||||||
|
IE_NAME = 'podomatic'
|
||||||
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||||
|
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
||||||
|
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
||||||
|
u"info_dict": {
|
||||||
|
u"uploader": u"Science Teaching Tips",
|
||||||
|
u"uploader_id": u"scienceteachingtips",
|
||||||
|
u"title": u"64. When the Moon Hits Your Eye",
|
||||||
|
u"duration": 446,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
channel = mobj.group('channel')
|
||||||
|
|
||||||
|
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
||||||
|
'?permalink=true&rtmp=0') %
|
||||||
|
(mobj.group('proto'), channel, video_id))
|
||||||
|
data_json = self._download_webpage(
|
||||||
|
json_url, video_id, note=u'Downloading video info')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
video_url = data['downloadLink']
|
||||||
|
uploader = data['podcast']
|
||||||
|
title = data['title']
|
||||||
|
thumbnail = data['imageLocation']
|
||||||
|
duration = int(data['length'] / 1000.0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': channel,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||||
video_id, u'Downloading video url')
|
video_id, u'Downloading video url')
|
||||||
image_page = self._download_webpage(
|
image_page = self._download_webpage(
|
||||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||||
video_id, u'Downloading thumbnail info')
|
video_id, u'Downloading thumbnail info')
|
||||||
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'url': url_doc.find('./durl/url').text,
|
'url': url_doc.find('./durl/url').text,
|
||||||
|
@@ -76,44 +76,78 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||||
track_id = compat_str(info['id'])
|
track_id = compat_str(info['id'])
|
||||||
name = full_title or track_id
|
name = full_title or track_id
|
||||||
if quiet == False:
|
if quiet:
|
||||||
self.report_extraction(name)
|
self.report_extraction(name)
|
||||||
|
|
||||||
thumbnail = info['artwork_url']
|
thumbnail = info['artwork_url']
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||||
|
ext = info.get('original_format', u'mp3')
|
||||||
result = {
|
result = {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'uploader': info['user']['username'],
|
'uploader': info['user']['username'],
|
||||||
'upload_date': unified_strdate(info['created_at']),
|
'upload_date': unified_strdate(info['created_at']),
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'ext': info.get('original_format', u'mp3'),
|
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
if info.get('downloadable', False):
|
if info.get('downloadable', False):
|
||||||
# We can build a direct link to the song
|
# We can build a direct link to the song
|
||||||
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
|
format_url = (
|
||||||
|
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||||
|
track_id, self._CLIENT_ID))
|
||||||
|
result['formats'] = [{
|
||||||
|
'format_id': 'download',
|
||||||
|
'ext': ext,
|
||||||
|
'url': format_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}]
|
||||||
else:
|
else:
|
||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
stream_json = self._download_webpage(
|
stream_json = self._download_webpage(
|
||||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||||
track_id, u'Downloading track url')
|
track_id, u'Downloading track url')
|
||||||
# There should be only one entry in the dictionary
|
|
||||||
key, stream_url = list(json.loads(stream_json).items())[0]
|
formats = []
|
||||||
if key.startswith(u'http'):
|
format_dict = json.loads(stream_json)
|
||||||
result['url'] = stream_url
|
for key, stream_url in format_dict.items():
|
||||||
elif key.startswith(u'rtmp'):
|
if key.startswith(u'http'):
|
||||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
formats.append({
|
||||||
url, path = stream_url.split('mp3:', 1)
|
'format_id': key,
|
||||||
result.update({
|
'ext': ext,
|
||||||
'url': url,
|
'url': stream_url,
|
||||||
'play_path': 'mp3:' + path,
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
else:
|
elif key.startswith(u'rtmp'):
|
||||||
|
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||||
|
url, path = stream_url.split('mp3:', 1)
|
||||||
|
formats.append({
|
||||||
|
'format_id': key,
|
||||||
|
'url': url,
|
||||||
|
'play_path': 'mp3:' + path,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
# We fallback to the stream_url in the original info, this
|
# We fallback to the stream_url in the original info, this
|
||||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||||
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
formats.append({
|
||||||
|
'format_id': u'fallback',
|
||||||
|
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
def format_pref(f):
|
||||||
|
if f['format_id'].startswith('http'):
|
||||||
|
return 2
|
||||||
|
if f['format_id'].startswith('rtmp'):
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
formats.sort(key=format_pref)
|
||||||
|
result['formats'] = formats
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
|
|||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||||
xml_code = self._download_webpage(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
|
||||||
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
|
|
||||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
|
|||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_webpage = self._download_webpage(
|
streams_doc = self._download_xml(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note=u'Downloading stream list')
|
||||||
|
|
||||||
streams_doc = xml.etree.ElementTree.fromstring(
|
|
||||||
streams_webpage.encode('utf-8'))
|
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if u'//ad.doubleclick' not in n.text)
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
|
|||||||
|
|
||||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||||
u'video-formats2' % log)
|
u'video-formats2' % log)
|
||||||
format_str = self._download_webpage(
|
format_doc = self._download_xml(
|
||||||
format_url, video_id,
|
format_url, video_id,
|
||||||
note=u'Downloading formats',
|
note=u'Downloading formats',
|
||||||
errnote=u'Error while downloading formats')
|
errnote=u'Error while downloading formats')
|
||||||
|
|
||||||
format_doc = xml.etree.ElementTree.fromstring(format_str)
|
|
||||||
|
|
||||||
video_url_template = (
|
video_url_template = (
|
||||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
video = config.find('video')
|
video = config.find('video')
|
||||||
sources = video.find('sources')
|
sources = video.find('sources')
|
||||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class VideoPremiumIE(InfoExtractor):
|
class VideoPremiumIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
||||||
u'file': u'4w7oadjsf156.f4v',
|
u'file': u'4w7oadjsf156.f4v',
|
||||||
@@ -41,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,8 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
@@ -20,7 +22,8 @@ class VikiIE(SubtitlesInfoExtractor):
|
|||||||
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||||
u'upload_date': u'20131121',
|
u'upload_date': u'20131121',
|
||||||
u'age_limit': 13,
|
u'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
u'skip': u'Blocked in the US',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -32,11 +35,12 @@ class VikiIE(SubtitlesInfoExtractor):
|
|||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader_m = re.search(
|
||||||
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
|
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
|
||||||
u'uploader')
|
if uploader_m is None:
|
||||||
if uploader is not None:
|
uploader = None
|
||||||
uploader = uploader.strip()
|
else:
|
||||||
|
uploader = uploader_m.group(1).strip()
|
||||||
|
|
||||||
rating_str = self._html_search_regex(
|
rating_str = self._html_search_regex(
|
||||||
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||||
@@ -51,7 +55,12 @@ class VikiIE(SubtitlesInfoExtractor):
|
|||||||
age_limit = RATINGS.get(rating_str)
|
age_limit = RATINGS.get(rating_str)
|
||||||
|
|
||||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||||
info_webpage = self._download_webpage(info_url, video_id)
|
info_webpage = self._download_webpage(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||||
|
raise ExtractorError(
|
||||||
|
u'Video %s is blocked from your location.' % video_id,
|
||||||
|
expected=True)
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||||
|
|
||||||
@@ -83,7 +92,8 @@ class VikiIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _get_available_subtitles(self, video_id, info_webpage):
|
def _get_available_subtitles(self, video_id, info_webpage):
|
||||||
res = {}
|
res = {}
|
||||||
for sturl in re.findall(r'<track src="([^"]+)"/>'):
|
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
|
||||||
|
sturl = unescapeHTML(sturl_html)
|
||||||
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
|
@@ -46,7 +46,7 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
|
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
||||||
webpage, u'items', flags=re.MULTILINE)
|
webpage, u'items', flags=re.MULTILINE)
|
||||||
items = json.loads(items_json)
|
items = json.loads(items_json)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
@@ -91,17 +91,13 @@ class YahooIE(InfoExtractor):
|
|||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': meta['title'],
|
'title': meta['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(meta['description']),
|
'description': clean_html(meta['description']),
|
||||||
'thumbnail': meta['thumbnail'],
|
'thumbnail': meta['thumbnail'],
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
|
@@ -11,7 +11,6 @@ import socket
|
|||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
import xml.etree.ElementTree
|
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
@@ -29,6 +28,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'248': 'webm',
|
'248': 'webm',
|
||||||
}
|
}
|
||||||
_video_dimensions = {
|
_video_dimensions = {
|
||||||
'5': '240x400',
|
'5': '400x240',
|
||||||
'6': '???',
|
'6': '???',
|
||||||
'13': '???',
|
'13': '???',
|
||||||
'17': '144x176',
|
'17': '176x144',
|
||||||
'18': '360x640',
|
'18': '640x360',
|
||||||
'22': '720x1280',
|
'22': '1280x720',
|
||||||
'34': '360x640',
|
'34': '640x360',
|
||||||
'35': '480x854',
|
'35': '854x480',
|
||||||
'36': '240x320',
|
'36': '320x240',
|
||||||
'37': '1080x1920',
|
'37': '1920x1080',
|
||||||
'38': '3072x4096',
|
'38': '4096x3072',
|
||||||
'43': '360x640',
|
'43': '640x360',
|
||||||
'44': '480x854',
|
'44': '854x480',
|
||||||
'45': '720x1280',
|
'45': '1280x720',
|
||||||
'46': '1080x1920',
|
'46': '1920x1080',
|
||||||
'82': '360p',
|
'82': '360p',
|
||||||
'83': '480p',
|
'83': '480p',
|
||||||
'84': '720p',
|
'84': '720p',
|
||||||
@@ -1144,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'asrs': 1,
|
'asrs': 1,
|
||||||
})
|
})
|
||||||
list_url = caption_url + '&' + list_params
|
list_url = caption_url + '&' + list_params
|
||||||
list_page = self._download_webpage(list_url, video_id)
|
caption_list = self._download_xml(list_url, video_id)
|
||||||
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||||
@@ -1528,7 +1527,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -1539,6 +1538,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
def _ids_to_results(self, ids):
|
||||||
|
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||||
|
for vid_id in ids]
|
||||||
|
|
||||||
|
def _extract_mix(self, playlist_id):
|
||||||
|
# The mixes are generated from a a single video
|
||||||
|
# the id of the playlist is just 'RD' + video_id
|
||||||
|
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
|
||||||
|
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||||
|
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
||||||
|
get_element_by_attribute('class', 'title ', webpage))
|
||||||
|
title = clean_html(title_span)
|
||||||
|
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
|
||||||
|
ids = orderedSet(re.findall(video_re, webpage))
|
||||||
|
url_results = self._ids_to_results(ids)
|
||||||
|
|
||||||
|
return self.playlist_result(url_results, playlist_id, title)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
@@ -1556,14 +1573,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
|
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
|
||||||
|
# Mixes require a custom extraction process
|
||||||
|
return self._extract_mix(playlist_id)
|
||||||
|
|
||||||
# Extract the video ids from the playlist pages
|
# Extract the video ids from the playlist pages
|
||||||
ids = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||||
# The ids are duplicated
|
matches = re.finditer(self._VIDEO_RE, page)
|
||||||
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
# We remove the duplicates and the link with index 0
|
||||||
|
# (it's not the first video of the playlist)
|
||||||
|
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||||
@@ -1571,8 +1594,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
playlist_title = self._og_search_title(page)
|
playlist_title = self._og_search_title(page)
|
||||||
|
|
||||||
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
url_results = self._ids_to_results(ids)
|
||||||
for vid_id in ids]
|
|
||||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
@@ -1769,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||||
"""
|
"""
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
_PAGING_STEP = 30
|
|
||||||
# use action_load_personal_feed instead of action_load_system_feed
|
# use action_load_personal_feed instead of action_load_system_feed
|
||||||
_PERSONAL_FEED = False
|
_PERSONAL_FEED = False
|
||||||
|
|
||||||
@@ -1789,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
feed_entries = []
|
feed_entries = []
|
||||||
# The step argument is available only in 2.7 or higher
|
paging = 0
|
||||||
for i in itertools.count(0):
|
for i in itertools.count(1):
|
||||||
paging = i*self._PAGING_STEP
|
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
@@ -1804,6 +1824,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
for video_id in ids)
|
for video_id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
|
paging = info['paging']
|
||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
@@ -1823,9 +1844,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
||||||
_FEED_NAME = 'watch_later'
|
_FEED_NAME = 'watch_later'
|
||||||
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
||||||
_PAGING_STEP = 100
|
|
||||||
_PERSONAL_FEED = True
|
_PERSONAL_FEED = True
|
||||||
|
|
||||||
|
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||||
|
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
|
||||||
|
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||||
|
_FEED_NAME = 'history'
|
||||||
|
_PERSONAL_FEED = True
|
||||||
|
_PLAYLIST_TITLE = u'Youtube Watch History'
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||||
IE_NAME = u'youtube:favorites'
|
IE_NAME = u'youtube:favorites'
|
||||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||||
|
@@ -1,75 +1,125 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
unified_strdate,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ZDFIE(InfoExtractor):
|
class ZDFIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
||||||
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
|
||||||
|
u"file": u"2037704.webm",
|
||||||
|
u"info_dict": {
|
||||||
|
u"upload_date": u"20131127",
|
||||||
|
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
|
||||||
|
u"uploader": u"spezial",
|
||||||
|
u"title": u"ZDFspezial - Ende des Machtpokers"
|
||||||
|
},
|
||||||
|
u"skip": u"Videos on ZDF.de are depublicised in short order",
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
if mobj.group('hash'):
|
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
url = url.replace(u'#', u'', 1)
|
doc = self._download_xml(
|
||||||
|
xml_url, video_id,
|
||||||
|
note=u'Downloading video info',
|
||||||
|
errnote=u'Failed to download video info')
|
||||||
|
|
||||||
html = self._download_webpage(url, video_id)
|
title = doc.find('.//information/title').text
|
||||||
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
|
description = doc.find('.//information/detail').text
|
||||||
if streams is None:
|
uploader_node = doc.find('.//details/originChannelTitle')
|
||||||
raise ExtractorError(u'No media url found.')
|
uploader = None if uploader_node is None else uploader_node.text
|
||||||
|
duration_str = doc.find('.//details/length').text
|
||||||
|
duration_m = re.match(r'''(?x)^
|
||||||
|
(?P<hours>[0-9]{2})
|
||||||
|
:(?P<minutes>[0-9]{2})
|
||||||
|
:(?P<seconds>[0-9]{2})
|
||||||
|
(?:\.(?P<ms>[0-9]+)?)
|
||||||
|
''', duration_str)
|
||||||
|
duration = (
|
||||||
|
(
|
||||||
|
(int(duration_m.group('hours')) * 60 * 60) +
|
||||||
|
(int(duration_m.group('minutes')) * 60) +
|
||||||
|
int(duration_m.group('seconds'))
|
||||||
|
)
|
||||||
|
if duration_m
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
||||||
|
|
||||||
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
|
def xml_to_format(fnode):
|
||||||
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
|
video_url = fnode.find('url').text
|
||||||
# choose first/default media type and highest quality for now
|
is_available = u'http://www.metafilegenerator' not in video_url
|
||||||
def stream_pref(s):
|
|
||||||
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
|
format_id = fnode.attrib['basetype']
|
||||||
|
format_m = re.match(r'''(?x)
|
||||||
|
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||||
|
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||||
|
''', format_id)
|
||||||
|
|
||||||
|
ext = format_m.group('container')
|
||||||
|
is_supported = ext != 'f4f'
|
||||||
|
|
||||||
|
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
|
||||||
try:
|
try:
|
||||||
type_pref = TYPE_ORDER.index(s['media_type'])
|
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
type_pref = 999
|
proto_pref = 999
|
||||||
|
|
||||||
QUALITY_ORDER = ['veryhigh', '300']
|
quality = fnode.find('./quality').text
|
||||||
|
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
|
||||||
try:
|
try:
|
||||||
quality_pref = QUALITY_ORDER.index(s['quality'])
|
quality_pref = -QUALITY_ORDER.index(quality)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
quality_pref = 999
|
quality_pref = 999
|
||||||
|
|
||||||
return (type_pref, quality_pref)
|
abr = int(fnode.find('./audioBitrate').text) // 1000
|
||||||
|
vbr = int(fnode.find('./videoBitrate').text) // 1000
|
||||||
|
pref = (is_available, is_supported,
|
||||||
|
proto_pref, quality_pref, vbr, abr)
|
||||||
|
|
||||||
sorted_streams = sorted(streams, key=stream_pref)
|
format_note = u''
|
||||||
if not sorted_streams:
|
if not is_supported:
|
||||||
raise ExtractorError(u'No stream found.')
|
format_note += u'(unsupported)'
|
||||||
stream = sorted_streams[0]
|
if not format_note:
|
||||||
|
format_note = None
|
||||||
|
|
||||||
media_link = self._download_webpage(
|
return {
|
||||||
stream['video_url'],
|
'format_id': format_id + u'-' + quality,
|
||||||
video_id,
|
'url': video_url,
|
||||||
u'Get stream URL')
|
'ext': ext,
|
||||||
|
'acodec': format_m.group('acodec'),
|
||||||
|
'vcodec': format_m.group('vcodec'),
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'width': int(fnode.find('./width').text),
|
||||||
|
'height': int(fnode.find('./height').text),
|
||||||
|
'filesize': int(fnode.find('./filesize').text),
|
||||||
|
'format_note': format_note,
|
||||||
|
'_pref': pref,
|
||||||
|
'_available': is_available,
|
||||||
|
}
|
||||||
|
|
||||||
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
format_nodes = doc.findall('.//formitaeten/formitaet')
|
||||||
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
|
formats = sorted(filter(lambda f: f['_available'],
|
||||||
|
map(xml_to_format, format_nodes)),
|
||||||
mobj = re.search(self._MEDIA_STREAM, media_link)
|
key=operator.itemgetter('_pref'))
|
||||||
if mobj is None:
|
|
||||||
mobj = re.search(RTSP_STREAM, media_link)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
|
|
||||||
video_url = mobj.group('video_url')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
|
|
||||||
html, u'title')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': determine_ext(video_url)
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
@@ -8,6 +8,7 @@ import gzip
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
|
import math
|
||||||
import os
|
import os
|
||||||
import pipes
|
import pipes
|
||||||
import platform
|
import platform
|
||||||
@@ -16,6 +17,7 @@ import ssl
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -536,8 +538,7 @@ def formatSeconds(secs):
|
|||||||
else:
|
else:
|
||||||
return '%d' % secs
|
return '%d' % secs
|
||||||
|
|
||||||
|
def make_HTTPS_handler(opts_no_check_certificate):
|
||||||
def make_HTTPS_handler(opts):
|
|
||||||
if sys.version_info < (3, 2):
|
if sys.version_info < (3, 2):
|
||||||
import httplib
|
import httplib
|
||||||
|
|
||||||
@@ -552,7 +553,7 @@ def make_HTTPS_handler(opts):
|
|||||||
self._tunnel()
|
self._tunnel()
|
||||||
try:
|
try:
|
||||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
|
||||||
except ssl.SSLError as e:
|
except ssl.SSLError:
|
||||||
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
||||||
|
|
||||||
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
|
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
|
||||||
@@ -564,7 +565,7 @@ def make_HTTPS_handler(opts):
|
|||||||
context.set_default_verify_paths()
|
context.set_default_verify_paths()
|
||||||
|
|
||||||
context.verify_mode = (ssl.CERT_NONE
|
context.verify_mode = (ssl.CERT_NONE
|
||||||
if opts.no_check_certificate
|
if opts_no_check_certificate
|
||||||
else ssl.CERT_REQUIRED)
|
else ssl.CERT_REQUIRED)
|
||||||
return compat_urllib_request.HTTPSHandler(context=context)
|
return compat_urllib_request.HTTPSHandler(context=context)
|
||||||
|
|
||||||
@@ -1006,3 +1007,17 @@ def unsmuggle_url(smug_url):
|
|||||||
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
|
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
|
||||||
data = json.loads(jsond)
|
data = json.loads(jsond)
|
||||||
return url, data
|
return url, data
|
||||||
|
|
||||||
|
|
||||||
|
def format_bytes(bytes):
|
||||||
|
if bytes is None:
|
||||||
|
return u'N/A'
|
||||||
|
if type(bytes) is str:
|
||||||
|
bytes = float(bytes)
|
||||||
|
if bytes == 0.0:
|
||||||
|
exponent = 0
|
||||||
|
else:
|
||||||
|
exponent = int(math.log(bytes, 1024.0))
|
||||||
|
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
|
||||||
|
converted = float(bytes) / float(1024 ** exponent)
|
||||||
|
return u'%.2f%s' % (converted, suffix)
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.11.24'
|
__version__ = '2013.11.29'
|
||||||
|
Reference in New Issue
Block a user