Compare commits
54 Commits
2014.07.24
...
2014.08.02
Author | SHA1 | Date | |
---|---|---|---|
|
61aabb9d70 | ||
|
62af3a0eb5 | ||
|
60064c53f1 | ||
|
98eb1c3fa2 | ||
|
201e9eaa0e | ||
|
9afa6ede21 | ||
|
f4776371ae | ||
|
328a20bf9c | ||
|
5622f29ae4 | ||
|
b4f23afbd1 | ||
|
0138968a6a | ||
|
4f31d0f2b7 | ||
|
bff74bdd1a | ||
|
10b04ff7f4 | ||
|
1f7ccb9014 | ||
|
c7b3209668 | ||
|
895ba7d1dd | ||
|
a2a1b0baa2 | ||
|
8646eb790e | ||
|
f036a6328e | ||
|
31bb8d3f51 | ||
|
4958ae2058 | ||
|
7e8d73c183 | ||
|
65bc504db8 | ||
|
0fc74a0d91 | ||
|
8d2cc6fbb1 | ||
|
a954584f63 | ||
|
cb3ff6fb01 | ||
|
71aa656d13 | ||
|
366b1f3cfe | ||
|
64ce58db38 | ||
|
11b85ce62e | ||
|
1220352ff7 | ||
|
8f3034d871 | ||
|
7fa547ab02 | ||
|
3182f3e2dc | ||
|
cbf915f3f6 | ||
|
b490b8849a | ||
|
5d2519e5bf | ||
|
c3415d1bac | ||
|
36f3542883 | ||
|
4cb71e9b6a | ||
|
4bc7009e8a | ||
|
16f8e9df8a | ||
|
b081cebefa | ||
|
916c145217 | ||
|
4192b51c7c | ||
|
052421ff09 | ||
|
4e99f48817 | ||
|
a11165ecc6 | ||
|
fbb2fc5580 | ||
|
2fe3d240cc | ||
|
42f4dcfe41 | ||
|
892e3192fb |
71
README.md
71
README.md
@@ -38,12 +38,6 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--list-extractors List all supported extractors and the URLs
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
@@ -51,35 +45,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--socket-timeout None Time to wait before giving up, in seconds
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess. The default value "error"
|
||||
just throws an error.
|
||||
youtube-dl guess ("auto_warning" to emit a
|
||||
warning when guessing). "error" just throws
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -125,9 +106,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
of SIZE.
|
||||
|
||||
## Filesystem Options:
|
||||
-t, --title use title in file name (default)
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id use only video ID in file name
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
@@ -160,18 +141,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
-t, --title [deprecated] use title in file name
|
||||
(default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
downloads if possible.
|
||||
--no-continue do not resume partially downloaded files
|
||||
(restart from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--no-part do not use .part files
|
||||
--no-mtime do not use the Last-modified header to set
|
||||
the file modification time
|
||||
@@ -181,6 +159,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-annotations write video annotations to a .annotation
|
||||
file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
@@ -210,6 +201,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18".
|
||||
|
@@ -193,10 +193,10 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_bandcamp_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = BandcampAlbumIE(dl)
|
||||
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||
result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'Nightmare Night EP')
|
||||
assertGreaterEqual(self, len(result['entries']), 4)
|
||||
self.assertEqual(result['title'], 'Hierophany of the Open Grave')
|
||||
assertGreaterEqual(self, len(result['entries']), 9)
|
||||
|
||||
def test_smotri_community(self):
|
||||
dl = FakeYDL()
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -16,59 +18,65 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
'js',
|
||||
86,
|
||||
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
'js',
|
||||
85,
|
||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
'js',
|
||||
90,
|
||||
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
'js',
|
||||
84,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
u'js',
|
||||
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
'js',
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
u'swf',
|
||||
'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
'swf',
|
||||
86,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
u'swf',
|
||||
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
'swf',
|
||||
'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
'js',
|
||||
84,
|
||||
u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
'js',
|
||||
83,
|
||||
u'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@@ -81,7 +89,7 @@ class TestSignature(unittest.TestCase):
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group(1)
|
||||
|
||||
|
@@ -275,7 +275,7 @@ class YoutubeDL(object):
|
||||
return message
|
||||
|
||||
assert hasattr(self, '_output_process')
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
line_count = message.count('\n') + 1
|
||||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
@@ -303,7 +303,7 @@ class YoutubeDL(object):
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
@@ -849,7 +849,7 @@ class YoutubeDL(object):
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if not 'format' in info_dict:
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
@@ -1234,21 +1234,18 @@ class YoutubeDL(object):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
if type('') is not compat_str:
|
||||
# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
|
||||
self.report_warning(
|
||||
'Your Python is broken! Update to a newer and supported version')
|
||||
|
||||
encoding_str = (
|
||||
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
sys.stdout.encoding,
|
||||
self.get_encoding()))
|
||||
try:
|
||||
write_string(encoding_str, encoding=None)
|
||||
except:
|
||||
errmsg = 'Failed to write encoding string %r' % encoding_str
|
||||
try:
|
||||
sys.stdout.write(errmsg)
|
||||
except:
|
||||
pass
|
||||
raise IOError(errmsg)
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||
try:
|
||||
|
@@ -76,6 +76,7 @@ import optparse
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
|
||||
@@ -222,6 +223,7 @@ def parseOpts(overrideArguments=None):
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
|
||||
general.add_option('-h', '--help',
|
||||
@@ -238,14 +240,6 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
general.add_option('--user-agent',
|
||||
dest='user_agent', help='specify a custom user agent', metavar='UA')
|
||||
general.add_option('--referer',
|
||||
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
metavar='REF', default=None)
|
||||
general.add_option('--add-header',
|
||||
dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
|
||||
metavar='FIELD:VALUE')
|
||||
general.add_option('--list-extractors',
|
||||
action='store_true', dest='list_extractors',
|
||||
help='List all supported extractors and the URLs they would handle', default=False)
|
||||
@@ -255,33 +249,17 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
general.add_option(
|
||||
'--socket-timeout', dest='socket_timeout',
|
||||
type=float, default=None, help=u'Time to wait before giving up, in seconds')
|
||||
general.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
|
||||
general.add_option(
|
||||
'--ignore-config',
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
general.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
@@ -382,6 +360,33 @@ def parseOpts(overrideArguments=None):
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
|
||||
|
||||
workarounds.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
workarounds.add_option(
|
||||
'--no-check-certificate', action='store_true',
|
||||
dest='no_check_certificate', default=False,
|
||||
help='Suppress HTTPS certificate validation.')
|
||||
workarounds.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
workarounds.add_option(
|
||||
'--user-agent', metavar='UA',
|
||||
dest='user_agent', help='specify a custom user agent')
|
||||
workarounds.add_option(
|
||||
'--referer', metavar='REF',
|
||||
dest='referer', default=None,
|
||||
help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--add-header', metavar='FIELD:VALUE',
|
||||
dest='headers', action='append',
|
||||
help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option(
|
||||
@@ -439,12 +444,10 @@ def parseOpts(overrideArguments=None):
|
||||
help='Display sent and read HTTP traffic')
|
||||
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--id',
|
||||
action='store_true', dest='useid', help='use only video ID in file name', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-A', '--auto-number',
|
||||
action='store_true', dest='autonumber',
|
||||
help='number downloaded files starting from 00000', default=False)
|
||||
@@ -470,11 +473,10 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames',
|
||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-w', '--no-overwrites',
|
||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||
filesystem.add_option('-c', '--continue',
|
||||
@@ -482,8 +484,6 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--no-continue',
|
||||
action='store_false', dest='continue_dl',
|
||||
help='do not resume partially downloaded files (restart from beginning)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option('--no-part',
|
||||
action='store_true', dest='nopart', help='do not use .part files', default=False)
|
||||
filesystem.add_option('--no-mtime',
|
||||
@@ -501,6 +501,20 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail',
|
||||
help='write thumbnail image to disk', default=False)
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
filesystem.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
filesystem.add_option(
|
||||
'--rm-cache-dir', action='store_true', dest='rm_cachedir',
|
||||
help='Delete all filesystem cache files')
|
||||
|
||||
|
||||
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
||||
@@ -534,6 +548,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(downloader)
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(workarounds)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
@@ -694,7 +709,7 @@ def _real_main(argv=None):
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
|
||||
if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
|
||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||
|
||||
# Do not download videos when there are audio-only formats
|
||||
@@ -833,9 +848,26 @@ def _real_main(argv=None):
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
if opts.cachedir is None:
|
||||
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
else:
|
||||
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
|
||||
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
|
||||
retcode = 141
|
||||
else:
|
||||
ydl.to_screen(
|
||||
u'Removing cache dir %s .' % opts.cachedir,
|
||||
skip_eol=True)
|
||||
if os.path.exists(opts.cachedir):
|
||||
ydl.to_screen(u'.', skip_eol=True)
|
||||
shutil.rmtree(opts.cachedir)
|
||||
ydl.to_screen(u'.')
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
if not opts.update_self:
|
||||
if not (opts.update_self or opts.rm_cachedir):
|
||||
parser.error(u'you must provide at least one URL')
|
||||
else:
|
||||
sys.exit()
|
||||
|
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
if requested_bitrate is None:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adultswim import AdultSwimIE
|
||||
@@ -111,9 +112,11 @@ from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import GameOneIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .godtube import GodTubeIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
@@ -140,6 +143,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .izlesene import IzleseneIE
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
@@ -259,6 +263,7 @@ from .savefrom import SaveFromIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -321,6 +326,7 @@ from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
@@ -342,6 +348,7 @@ from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
|
48
youtube_dl/extractor/abc.py
Normal file
48
youtube_dl/extractor/abc.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
|
||||
'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
|
||||
'info_dict': {
|
||||
'id': '5624716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
|
||||
'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
urls_info_json = self._search_regex(
|
||||
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
|
||||
flags=re.DOTALL)
|
||||
urls_info = json.loads(urls_info_json.replace('\'', '"'))
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'width': int(url_info['width']),
|
||||
'height': int(url_info['height']),
|
||||
'tbr': int(url_info['bitrate']),
|
||||
'filesize': int(url_info['filesize']),
|
||||
} for url_info in urls_info]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -8,6 +8,8 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
@@ -44,6 +46,9 @@ class ARDIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
|
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = m['d']
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
|
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
|
||||
return {
|
||||
'id': name.split('-')[-1],
|
||||
'title': title,
|
||||
'url': f4m_url,
|
||||
'ext': 'flv',
|
||||
'formats': self._extract_f4m_formats(f4m_url, name),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Am 1. und 2. August in Oberammergau',
|
||||
'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Über den Pass',
|
||||
'description': 'Die Eroberung der Alpen: Über den Pass',
|
||||
'duration': 2588,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'aac',
|
||||
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||
'duration': 64,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Umweltbewusster Häuslebauer',
|
||||
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
|
||||
'duration': 116,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 - Metaphysik',
|
||||
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20140117',
|
||||
}
|
||||
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
|
||||
media = {
|
||||
'id': xml_media.get('externalId'),
|
||||
'title': xml_media.find('title').text,
|
||||
'duration': parse_duration(xml_media.find('duration').text),
|
||||
'formats': self._extract_formats(xml_media.find('assets')),
|
||||
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
|
||||
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
|
||||
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
@@ -373,7 +374,8 @@ class InfoExtractor(object):
|
||||
else:
|
||||
for p in pattern:
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj: break
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if os.name != 'nt' and sys.stderr.isatty():
|
||||
_name = u'\033[0;34m%s\033[0m' % name
|
||||
@@ -589,6 +591,24 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
|
||||
formats = []
|
||||
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
|
||||
formats.append({
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'tbr': int_or_none(media_el.attrib.get('bitrate')),
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||
video_url = video_url.replace('/z/', '/i/')
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
74
youtube_dl/extractor/gamestar.py
Normal file
74
youtube_dl/extractor/gamestar.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
'info_dict': {
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
|
||||
'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = og_title.replace(' - Video bei GameStar.de', '').strip()
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ',
|
||||
webpage, 'upload_date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r' Zuschauer: ([0-9\.]+) ', webpage,
|
||||
'view_count', fatal=False))
|
||||
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
}
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_TESTS = [
|
||||
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
|
||||
'md5': 'a5eb77996ef82118afbbe8e48731b98e',
|
||||
'info_dict': {
|
||||
'id': '1015301',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||
direct_url = self._search_regex(
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
start_page, 'url', default=None)
|
||||
if direct_url:
|
||||
video_url = 'http://www.gdcvault.com/' + direct_url
|
||||
title = self._html_search_regex(
|
||||
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
|
||||
start_page, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
}
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root', default=None)
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
start_page = self._login(webpage_url, video_id)
|
||||
if start_page is None:
|
||||
login_res = self._login(webpage_url, video_id)
|
||||
if login_res is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
start_page = login_res
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename', default=None)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
|
||||
if not parsed_url.scheme:
|
||||
default_search = self._downloader.params.get('default_search')
|
||||
if default_search is None:
|
||||
default_search = 'error'
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning'):
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
else:
|
||||
elif default_search != 'fixup_error':
|
||||
if default_search == 'auto_warning':
|
||||
if re.match(r'^(?:url|URL)$', url):
|
||||
raise ExtractorError(
|
||||
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
elif default_search == 'error':
|
||||
|
||||
if default_search in ('error', 'fixup_error'):
|
||||
raise ExtractorError(
|
||||
('%r is not a valid URL. '
|
||||
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
|
||||
|
58
youtube_dl/extractor/godtube.py
Normal file
58
youtube_dl/extractor/godtube.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class GodTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
|
||||
'md5': '77108c1e4ab58f48031101a1a2119789',
|
||||
'info_dict': {
|
||||
'id': '0C0CNNNU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Woman at the well.',
|
||||
'duration': 159,
|
||||
'timestamp': 1205712000,
|
||||
'uploader': 'beverlybmusic',
|
||||
'upload_date': '20080317',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
config = self._download_xml(
|
||||
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
||||
video_id, 'Downloading player config XML')
|
||||
|
||||
video_url = config.find('.//file').text
|
||||
uploader = config.find('.//author').text
|
||||
timestamp = parse_iso8601(config.find('.//date').text)
|
||||
duration = parse_duration(config.find('.//duration').text)
|
||||
thumbnail = config.find('.//image').text
|
||||
|
||||
media = self._download_xml(
|
||||
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
||||
|
||||
title = media.find('.//title').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
}
|
97
youtube_dl/extractor/izlesene.py
Normal file
97
youtube_dl/extractor/izlesene.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
|
||||
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
||||
|
||||
content_url = self._html_search_meta(
|
||||
'contentURL', webpage, 'content URL', fatal=False)
|
||||
ext = determine_ext(content_url, 'mp4')
|
||||
|
||||
# Might be empty for some videos.
|
||||
qualities = self._html_search_regex(
|
||||
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
|
||||
|
||||
formats = []
|
||||
for quality in qualities.split('|'):
|
||||
json = self._download_json(
|
||||
self._STREAM_URL.format(id=video_id, format=quality), video_id,
|
||||
note='Getting video URL for "%s" quality' % quality,
|
||||
errnote='Failed to get video URL for "%s" quality' % quality
|
||||
)
|
||||
formats.append({
|
||||
'url': json.get('streamurl'),
|
||||
'ext': ext,
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'age_limit': 18 if family_friendly == 'False' else 0,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Return count of items, list of *valid* items
|
||||
def _parse_page(self, url, video_id):
|
||||
info_json = self._download_webpage(url, video_id,
|
||||
'Downloading video info JSON',
|
||||
'unable to download video info JSON')
|
||||
def _parse_page(self, url, video_id, counter):
|
||||
info_json = self._download_webpage(
|
||||
url, video_id,
|
||||
'Downloading video info JSON on page %d' % counter,
|
||||
'Unable to download video info JSON %d' % counter)
|
||||
|
||||
response = json.loads(info_json)
|
||||
if type(response) != list:
|
||||
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._JUSTIN_PAGE_LIMIT
|
||||
while True:
|
||||
if paged:
|
||||
self.report_download_page(video_id, offset)
|
||||
for counter in itertools.count(1):
|
||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||
page_count, page_info = self._parse_page(page_url, video_id)
|
||||
page_count, page_info = self._parse_page(
|
||||
page_url, video_id, counter)
|
||||
entries.extend(page_info)
|
||||
if not paged or page_count != limit:
|
||||
break
|
||||
|
@@ -5,11 +5,14 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
xpath_with_ns,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live from Webster Hall NYC',
|
||||
'upload_date': '20121012',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^http://.*\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _parse_smil(self, video_id, smil_url):
|
||||
formats = []
|
||||
_SWITCH_XPATH = (
|
||||
'.//{http://www.w3.org/2001/SMIL20/Language}body/'
|
||||
'{http://www.w3.org/2001/SMIL20/Language}switch')
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id,
|
||||
note='Downloading SMIL information',
|
||||
errnote='Unable to download SMIL information',
|
||||
fatal=False)
|
||||
if smil_doc is False: # Download failed
|
||||
return formats
|
||||
title_node = find_xpath_attr(
|
||||
smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
|
||||
'name', 'title')
|
||||
if title_node is None:
|
||||
self.report_warning('Cannot find SMIL id')
|
||||
switch_node = smil_doc.find(_SWITCH_XPATH)
|
||||
else:
|
||||
title_id = title_node.attrib['content']
|
||||
switch_node = find_xpath_attr(
|
||||
smil_doc, _SWITCH_XPATH, 'id', title_id)
|
||||
if switch_node is None:
|
||||
raise ExtractorError('Cannot find switch node')
|
||||
video_nodes = switch_node.findall(
|
||||
'{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
|
||||
for vn in video_nodes:
|
||||
tbr = int_or_none(vn.attrib.get('system-bitrate'))
|
||||
furl = (
|
||||
'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
|
||||
(vn.attrib['src']))
|
||||
if 'clipBegin' in vn.attrib:
|
||||
furl += '&ssek=' + vn.attrib['clipBegin']
|
||||
formats.append({
|
||||
'url': furl,
|
||||
'format_id': 'smil_%d' % tbr,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'preference': -1000,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_url = (
|
||||
video_data.get('progressive_url_hd') or
|
||||
video_data.get('progressive_url')
|
||||
video_id = compat_str(video_data['id'])
|
||||
|
||||
FORMAT_KEYS = (
|
||||
('sd', 'progressive_url'),
|
||||
('hd', 'progressive_url_hd'),
|
||||
)
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': video_data[key],
|
||||
'quality': i + 1,
|
||||
} for i, (format_id, key) in enumerate(FORMAT_KEYS)
|
||||
if video_data.get(key)]
|
||||
|
||||
smil_url = video_data.get('smil_url')
|
||||
if smil_url:
|
||||
formats.extend(self._parse_smil(video_id, smil_url))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(video_data['id']),
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'thumbnail': video_data.get('thumbnail_url'),
|
||||
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
||||
'like_count': video_data.get('likes', {}).get('total'),
|
||||
'view_count': video_data.get('views'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
57
youtube_dl/extractor/shared.py
Normal file
57
youtube_dl/extractor/shared.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72',
|
||||
'info_dict': {
|
||||
'id': '0060718775',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny Trailer',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'>File does not exist<', page) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
|
||||
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
|
||||
title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'filesize': filesize,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
@@ -10,18 +12,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = u'streamcloud.eu'
|
||||
IE_NAME = 'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
u'file': u'skp9j99s4bpz.mp4',
|
||||
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||
u'info_dict': {
|
||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||
u'duration': 9,
|
||||
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
'md5': '6bea4c7fa5daaacc2a946b7146286686',
|
||||
'info_dict': {
|
||||
'id': 'skp9j99s4bpz',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'/\\ ä ↭',
|
||||
},
|
||||
u'skip': u'Only available from the EU'
|
||||
'skip': 'Only available from the EU'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note=u'Downloading video page ...')
|
||||
req, video_id, note='Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||
r'<h1[^>]*>([^<]+)<', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||
duration_str = self._search_regex(
|
||||
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
r'file:\s*"([^"]+)"', webpage, 'video URL')
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import parse_duration
|
||||
|
||||
|
||||
class SWRMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
|
||||
'uploader': 'SWR 2',
|
||||
'uploader_id': '284670',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'md5': '881531487d0633080a8cc88d31ef896f',
|
||||
'info_dict': {
|
||||
'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Familienspaß am Bodensee',
|
||||
'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 1784,
|
||||
'upload_date': '20140727',
|
||||
'uploader': 'SWR Fernsehen BW',
|
||||
'uploader_id': '281130',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
85
youtube_dl/extractor/tvplay.py
Normal file
85
youtube_dl/extractor/tvplay.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '418113',
|
||||
'ext': 'flv',
|
||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||
'duration': 25,
|
||||
'timestamp': 1406097056,
|
||||
'upload_date': '20140723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
if video['is_geo_blocked']:
|
||||
raise ExtractorError(
|
||||
'This content is not available in your country due to copyright reasons', expected=True)
|
||||
|
||||
streams = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
|
||||
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'preference': quality(format_id),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
continue
|
||||
fmt.update({
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'timestamp': parse_iso8601(video['created_at']),
|
||||
'view_count': video['views']['total'],
|
||||
'age_limit': video.get('age_limit', 0),
|
||||
'formats': formats,
|
||||
}
|
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||
|
||||
|
68
youtube_dl/extractor/vidme.py
Normal file
68
youtube_dl/extractor/vidme.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class VidmeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/QNB',
|
||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||
'info_dict': {
|
||||
'id': 'QNB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fishing for piranha - the easy way',
|
||||
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
||||
'duration': 119.92,
|
||||
'timestamp': 1406313244,
|
||||
'upload_date': '20140725',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default='')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Indicate the download will use the RTMP protocol."""
|
||||
self.to_screen(u'RTMP download detected')
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, slen):
|
||||
def _signature_cache_id(self, example_sig):
|
||||
""" Return a string representation of a signature """
|
||||
return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_id = id_m.group('id')
|
||||
|
||||
# Read from filesystem cache
|
||||
func_id = '%s_%s_%d' % (player_type, player_id, slen)
|
||||
func_id = '%s_%s_%s' % (
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
cache_dir = get_cachedir(self._downloader.params)
|
||||
|
||||
@@ -388,7 +393,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
if cache_enabled:
|
||||
try:
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
try:
|
||||
@@ -404,7 +409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, slen):
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = u'' if start == 0 else str(start)
|
||||
@@ -433,11 +438,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
yield _genslice(start, i, step)
|
||||
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = func(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
expr_code = u' + '.join(gen_sig_code(cache_spec))
|
||||
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
|
||||
signature_id_tuple = '(%s)' % (
|
||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
u' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen(u'Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
@@ -465,20 +473,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if player_url.startswith(u'//'):
|
||||
player_url = u'https:' + player_url
|
||||
try:
|
||||
player_id = (player_url, len(s))
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
if player_id not in self._player_cache:
|
||||
func = self._extract_signature_function(
|
||||
video_id, player_url, len(s)
|
||||
video_id, player_url, s
|
||||
)
|
||||
self._player_cache[player_id] = func
|
||||
func = self._player_cache[player_id]
|
||||
if self._downloader.params.get('youtube_print_sig_code'):
|
||||
self._print_sig_code(func, len(s))
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
raise ExtractorError(
|
||||
u'Automatic signature extraction failed: ' + tb, cause=e)
|
||||
u'Signature extraction failed: ' + tb, cause=e)
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
@@ -806,50 +814,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url_map = {}
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' in url_data and 'url' in url_data:
|
||||
url = url_data['url'][0]
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
if 'itag' not in url_data or 'url' not in url_data:
|
||||
continue
|
||||
format_id = url_data['itag'][0]
|
||||
url = url_data['url'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
|
||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen(u'{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[format_id] = url
|
||||
formats = _map_to_format_list(url_map)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
|
@@ -61,7 +61,7 @@ class JSInterpreter(object):
|
||||
pass
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<var>[a-z]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||
r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||
expr)
|
||||
if m:
|
||||
variable = m.group('var')
|
||||
@@ -108,7 +108,7 @@ class JSInterpreter(object):
|
||||
index, howMany = argvals
|
||||
res = []
|
||||
for i in range(index, min(index + howMany, len(obj))):
|
||||
res.append(obj.pop(i))
|
||||
res.append(obj.pop(index))
|
||||
return res
|
||||
|
||||
return obj[member](argvals)
|
||||
@@ -130,7 +130,7 @@ class JSInterpreter(object):
|
||||
return a % b
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<func>[.a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
if m:
|
||||
fname = m.group('func')
|
||||
argvals = tuple([
|
||||
|
@@ -242,8 +242,8 @@ else:
|
||||
if sys.version_info >= (2,7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
|
||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
@@ -852,6 +852,8 @@ def unified_strdate(date_str):
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url, default_ext=u'unknown_video'):
|
||||
if url is None:
|
||||
return default_ext
|
||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.07.24'
|
||||
__version__ = '2014.08.02'
|
||||
|
Reference in New Issue
Block a user