Compare commits
114 Commits
2015.02.16
...
2015.02.23
Author | SHA1 | Date | |
---|---|---|---|
bd61a9e770 | |||
3438e7acd2 | |||
09c200acf2 | |||
716889cab1 | |||
409693984f | |||
04e8c11080 | |||
80af2b73ab | |||
3cc57f9645 | |||
a65d4e7f14 | |||
543ec2136b | |||
93b5071f73 | |||
ddc369f073 | |||
fcc3e6138b | |||
9fe6ef7ab2 | |||
c010af6f19 | |||
35b7982303 | |||
f311cfa231 | |||
e086e0eb6c | |||
314368c822 | |||
c5181ab410 | |||
ea5152cae1 | |||
255fca5eea | |||
4aeccadf4e | |||
93540ee10e | |||
8fb3ac3649 | |||
77b2986b5b | |||
62b013df0d | |||
fad6768bd1 | |||
a78125f925 | |||
a00a8bcc8a | |||
1e9a9e167d | |||
3da0db62e6 | |||
e14ced7918 | |||
ab9d02f53b | |||
a461a11989 | |||
1bd838608f | |||
365577f567 | |||
50efb383f0 | |||
5da6bd0083 | |||
5e9a033e6e | |||
dd0a58f5f0 | |||
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
5c8a3f862a | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 | |||
81975f4693 | |||
b8b928d5cb | |||
3eff81fbf7 | |||
785521bf4f | |||
6d1a55a521 | |||
9cad27008b | |||
11e611a7fa | |||
72c1f8de06 | |||
6e99868e4c | |||
4d278fde64 | |||
f21e915fb9 | |||
6f53c63df6 | |||
1def5f359e | |||
15ec669374 | |||
a3fa5da496 | |||
30965ac66a | |||
09ab40b7d1 | |||
fa15607773 | |||
a91a2c1a83 | |||
16e7711e22 | |||
5cda4eda72 | |||
98f000409f | |||
4a8d4a53b1 | |||
4cd95bcbc3 | |||
be24c8697f | |||
0d93378887 | |||
4069766c52 | |||
7010577720 | |||
8ac27a68e6 | |||
46312e0b46 | |||
f9216ed6ad | |||
65bf37ef83 | |||
f740fae2a4 | |||
fbc503d696 | |||
662435f728 | |||
163d966707 | |||
85729c51af | |||
1db5fbcfe3 | |||
59b8ab5834 |
1
AUTHORS
1
AUTHORS
@ -111,3 +111,4 @@ Paul Hartmann
|
||||
Frans de Jonge
|
||||
Robin de Rooij
|
||||
Ryan Schmidt
|
||||
Leslie P. Polzer
|
||||
|
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
|
12
README.md
12
README.md
@ -161,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize (experimental) set file xattribute
|
||||
ytdl.filesize with expected filesize
|
||||
--hls-prefer-native (experimental) Use the native HLS
|
||||
downloader instead of ffmpeg.
|
||||
--external-downloader COMMAND (experimental) Use the specified external
|
||||
downloader. Currently supports
|
||||
aria2c,curl,wget
|
||||
@ -513,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c
|
||||
|
||||
### ERROR: no fmt_url_map or conn information found in video info
|
||||
|
||||
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
||||
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### ERROR: unable to download video ###
|
||||
|
||||
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
||||
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### ExtractorError: Could not find JS function u'OF'
|
||||
|
||||
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### SyntaxError: Non-ASCII character ###
|
||||
|
||||
@ -565,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
|
||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||
|
||||
|
@ -45,12 +45,12 @@ for test in get_testcases():
|
||||
|
||||
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
||||
|
||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
|
||||
or test['info_dict']['age_limit'] != 18):
|
||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
|
||||
test['info_dict']['age_limit'] != 18):
|
||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||
|
||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
|
||||
and test['info_dict']['age_limit'] == 18):
|
||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
|
||||
test['info_dict']['age_limit'] == 18):
|
||||
print('\nPotential false negative: {0}'.format(test['name']))
|
||||
|
||||
else:
|
||||
|
@ -68,9 +68,12 @@
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
@ -121,6 +124,7 @@
|
||||
- **EllenTV**
|
||||
- **EllenTV:clips**
|
||||
- **ElPais**: El País
|
||||
- **Embedly**
|
||||
- **EMPFlix**
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
@ -190,6 +194,7 @@
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **Ina**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
@ -262,6 +267,7 @@
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@ -319,12 +325,14 @@
|
||||
- **podomatic**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHubPlaylist**
|
||||
- **Pornotube**
|
||||
- **PornoXO**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Pyvideo**
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiofrance**
|
||||
@ -338,9 +346,9 @@
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
- **Rte**
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **RTL2**
|
||||
- **RTLnow**
|
||||
- **rtlxl.nl**
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
@ -352,6 +360,7 @@
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@ -379,7 +388,8 @@
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:user**
|
||||
- **Soundgasm**
|
||||
- **soundgasm**
|
||||
- **soundgasm:profile**
|
||||
- **southpark.cc.com**
|
||||
- **southpark.de**
|
||||
- **Space**
|
||||
@ -445,6 +455,7 @@
|
||||
- **Turbo**
|
||||
- **Tutv**
|
||||
- **tv.dfb.de**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
@ -552,6 +563,7 @@
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
|
@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
self.assertTrue(
|
||||
got.startswith(start_str),
|
||||
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||
got = got_dict.get(info_field)
|
||||
contains_str = expected[len('contains:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
'Expected a %s object, but got %s for field %s' % (
|
||||
compat_str.__name__, type(got).__name__, info_field))
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
@ -163,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||
info_dict_str += '\n'
|
||||
|
||||
if info_dict_str:
|
||||
info_dict_str += '\n'
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||
for k in missing_keys)
|
||||
write_string(
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
|
@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self.assertEqual(jsi.call_function('f'), -11)
|
||||
|
||||
def test_comments(self):
|
||||
'Skipping: Not yet fully implemented'
|
||||
return
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
var x = /* 1 + */ 2;
|
||||
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 52)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function f() {
|
||||
var x = "/*";
|
||||
var y = 1 /* comment */ + 2;
|
||||
return y;
|
||||
}
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('f'), 3)
|
||||
|
||||
def test_precedence(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
|
@ -34,8 +34,8 @@ def _make_testfunc(testfile):
|
||||
def test_func(self):
|
||||
as_file = os.path.join(TEST_DIR, testfile)
|
||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
||||
if ((not os.path.exists(swf_file))
|
||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||
if ((not os.path.exists(swf_file)) or
|
||||
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||
# Recompile
|
||||
try:
|
||||
subprocess.check_call([
|
||||
|
@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase):
|
||||
"playlist":[{"controls":{"all":null}}]
|
||||
}''')
|
||||
|
||||
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||
json_code = js_to_json(inp)
|
||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
|
@ -64,6 +64,12 @@ _TESTS = [
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
)
|
||||
]
|
||||
|
||||
|
@ -199,18 +199,25 @@ class YoutubeDL(object):
|
||||
postprocessor.
|
||||
progress_hooks: A list of functions that get called on download
|
||||
progress, with a dictionary with the entries
|
||||
* status: One of "downloading" and "finished".
|
||||
* status: One of "downloading", "error", or "finished".
|
||||
Check this first and ignore unknown values.
|
||||
|
||||
If status is one of "downloading" or "finished", the
|
||||
If status is one of "downloading", or "finished", the
|
||||
following properties may also be present:
|
||||
* filename: The final filename (always present)
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* downloaded_bytes: Bytes on disk
|
||||
* total_bytes: Size of the whole file, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* total_bytes_estimate: Guess of the eventual file size,
|
||||
None if unavailable.
|
||||
* elapsed: The number of seconds since download started.
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if
|
||||
unknown
|
||||
* fragment_index: The counter of the currently
|
||||
downloaded video fragment.
|
||||
* fragment_count: The number of fragments (= individual
|
||||
files that will be merged)
|
||||
|
||||
Progress hooks are guaranteed to be called at least once
|
||||
(with status "finished") if the download is successful.
|
||||
@ -225,7 +232,6 @@ class YoutubeDL(object):
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
match_filter: A function that gets called with the info_dict of
|
||||
@ -235,6 +241,10 @@ class YoutubeDL(object):
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@ -298,8 +308,8 @@ class YoutubeDL(object):
|
||||
raise
|
||||
|
||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
and not params.get('restrictfilenames', False)):
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||
not params.get('restrictfilenames', False)):
|
||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
@ -951,30 +961,9 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
class _PseudoRequest(object):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.headers = {}
|
||||
self.unverifiable = False
|
||||
|
||||
def add_unredirected_header(self, k, v):
|
||||
self.headers[k] = v
|
||||
|
||||
def get_full_url(self):
|
||||
return self.url
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def has_header(self, h):
|
||||
return h in self.headers
|
||||
|
||||
def get_header(self, h, default=None):
|
||||
return self.headers.get(h, default)
|
||||
|
||||
pr = _PseudoRequest(info_dict['url'])
|
||||
pr = compat_urllib_request.Request(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.headers.get('Cookie')
|
||||
return pr.get_header('Cookie')
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
@ -1298,7 +1287,7 @@ class YoutubeDL(object):
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
if not merger.available():
|
||||
if not merger.available:
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
'formats but ffmpeg or avconv are not installed.'
|
||||
@ -1377,8 +1366,8 @@ class YoutubeDL(object):
|
||||
"""Download a given list of URLs."""
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
if (len(url_list) > 1 and
|
||||
'%' not in outtmpl
|
||||
and self.params.get('max_downloads') != 1):
|
||||
'%' not in outtmpl and
|
||||
self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(outtmpl)
|
||||
|
||||
for url in url_list:
|
||||
@ -1545,29 +1534,18 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def line(format, idlen=20):
|
||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
self._format_note(format),
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [
|
||||
line(f, idlen) for f in formats
|
||||
table = [
|
||||
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||
for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
if len(formats) > 1:
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||
self.to_screen(
|
||||
'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
'[info] Available formats for %s:\n%s' %
|
||||
(info_dict['id'], render_table(header_line, table)))
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
|
@ -189,14 +189,14 @@ def _real_main(argv=None):
|
||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||
if opts.outtmpl is not None:
|
||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.useid and '%(id)s.%(ext)s')
|
||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
||||
or DEFAULT_OUTTMPL)
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
|
||||
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
|
||||
(opts.useid and '%(id)s.%(ext)s') or
|
||||
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
|
||||
DEFAULT_OUTTMPL)
|
||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||
parser.error('Cannot download a video and extract audio into the same'
|
||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
@ -351,6 +351,7 @@ def _real_main(argv=None):
|
||||
'match_filter': match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.supports(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||
return NativeHlsFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
@ -54,6 +54,7 @@ class FileDownloader(object):
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
self.add_progress_hook(self.report_progress)
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
@ -226,42 +227,64 @@ class FileDownloader(object):
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title('youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
def report_progress(self, s):
|
||||
if s['status'] == 'finished':
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
else:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
if s.get('elapsed') is not None:
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||
else:
|
||||
msg_template = '100%% of %(_total_bytes_str)s'
|
||||
self._report_progress_status(
|
||||
msg_template % s, is_last_line=True)
|
||||
|
||||
if self.params.get('noprogress'):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = ('%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
if s['status'] != 'downloading':
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
if s.get('eta') is not None:
|
||||
s['_eta_str'] = self.format_eta(s['eta'])
|
||||
else:
|
||||
self._report_progress_status(
|
||||
('100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
s['_eta_str'] = 'Unknown ETA'
|
||||
|
||||
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||
else:
|
||||
if s.get('downloaded_bytes') == 0:
|
||||
s['_percent_str'] = self.format_percent(0)
|
||||
else:
|
||||
s['_percent_str'] = 'Unknown %'
|
||||
|
||||
if s.get('speed') is not None:
|
||||
s['_speed_str'] = self.format_speed(s['speed'])
|
||||
else:
|
||||
s['_speed_str'] = 'Unknown speed'
|
||||
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
elif s.get('total_bytes_estimate') is not None:
|
||||
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
else:
|
||||
if s.get('downloaded_bytes') is not None:
|
||||
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||
if s.get('elapsed'):
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||
else:
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||
else:
|
||||
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||
|
||||
self._report_progress_status(msg_template % s)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
@ -288,14 +311,14 @@ class FileDownloader(object):
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
self.params.get('nooverwrites', False)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
self.params.get('nooverwrites', False) and
|
||||
os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', False)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and not self.params.get('nopart', False)
|
||||
self.params.get('continuedl', False) and
|
||||
os.path.isfile(encodeFilename(filename)) and
|
||||
not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
# Check file already present
|
||||
|
@ -75,7 +75,7 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
@ -15,7 +15,6 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
@ -252,17 +251,6 @@ class F4mFD(FileDownloader):
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
@ -298,39 +286,65 @@ class F4mFD(FileDownloader):
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
|
||||
write_flv_header(dest_stream)
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_counter': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': filename,
|
||||
'tmpfilename': tmpfilename,
|
||||
}
|
||||
start = time.time()
|
||||
|
||||
def frag_progress_hook(status):
|
||||
frag_total_bytes = status.get('total_bytes', 0)
|
||||
estimated_size = (state['downloaded_bytes'] +
|
||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||
if status['status'] == 'finished':
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_counter'] += 1
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
byte_counter = state['downloaded_bytes']
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = status['downloaded_bytes']
|
||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
||||
self.report_progress(progress, format_bytes(estimated_size),
|
||||
status.get('speed'), eta)
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
|
||||
frags_filenames = []
|
||||
@ -354,8 +368,8 @@ class F4mFD(FileDownloader):
|
||||
frags_filenames.append(frag_filename)
|
||||
|
||||
dest_stream.close()
|
||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||
|
||||
elapsed = time.time() - start
|
||||
self.try_rename(tmpfilename, filename)
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
@ -366,6 +380,7 @@ class F4mFD(FileDownloader):
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
|
||||
return True
|
||||
|
@ -1,10 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from socket import error as SocketError
|
||||
import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@ -15,7 +14,6 @@ from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except SocketError as e:
|
||||
except socket.error as e:
|
||||
if e.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
eta = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
|
||||
return False
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'status': 'error',
|
||||
})
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
return True
|
||||
|
@ -11,7 +11,6 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
|
||||
time_now = time.time()
|
||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = '~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'total_bytes_estimate': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
|
@ -58,10 +58,15 @@ from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
ChirbitIE,
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
@ -121,6 +126,7 @@ from .ellentv import (
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
@ -204,6 +210,7 @@ from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
@ -282,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@ -350,13 +358,17 @@ from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
@ -371,7 +383,7 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
@ -386,6 +398,7 @@ from .rutube import (
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .sandia import SandiaIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
@ -416,7 +429,10 @@ from .soundcloud import (
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
)
|
||||
from .soundgasm import SoundgasmIE
|
||||
from .soundgasm import (
|
||||
SoundgasmIE,
|
||||
SoundgasmProfileIE
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthparkDeIE,
|
||||
@ -482,6 +498,7 @@ from .tumblr import TumblrIE
|
||||
from .tunein import TuneInIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tv4 import TV4IE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
@ -603,6 +620,7 @@ from .youtube import (
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
|
@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._parse_json(
|
||||
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
|
||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration')
|
||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
||||
self._html_search_meta('duration', webpage, 'duration') or
|
||||
self._search_regex(
|
||||
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||
|
@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
}
|
||||
@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
|
@ -11,9 +11,12 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TESTS = [{
|
||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||
'info_dict': {
|
||||
'id': 'manofsteel',
|
||||
},
|
||||
"playlist": [
|
||||
{
|
||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||
@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||
|
||||
|
@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
'skip': 'Bandcamp imposes download limits.'
|
||||
}, {
|
||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||
'info_dict': {
|
||||
'title': 'Hierophany of the Open Grave',
|
||||
'uploader_id': 'nightbringer',
|
||||
'id': 'hierophany-of-the-open-grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
'id': 'dotscale',
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('subdomain')
|
||||
title = mobj.group('title')
|
||||
display_id = title or playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@ -1,40 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': '8aQUy7GV',
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Police Car Rolls Away',
|
||||
'uploader': 'stupidvideos.com',
|
||||
'upload_date': '20131215',
|
||||
'timestamp': 1387068000,
|
||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
||||
'duration': 14.886,
|
||||
'thumbnails': [{
|
||||
'width': 100,
|
||||
'height': 76,
|
||||
'resolution': '100x76',
|
||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
||||
}],
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, rl):
|
||||
m = re.match(self._VALID_URL, rl)
|
||||
video_id = m.group('id')
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': int(m['abr']) // 1000,
|
||||
'vbr': int(m['vbr']) // 1000,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
'title': 'Sealife',
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
|
@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'id': 'look-at-this-cute-dog-omg',
|
||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'description': 're:© 2014 Munchkin the',
|
||||
'uploader': 're:^Munchkin the',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
'info_dict': {
|
||||
'id': '_d5_GbO8p1sT',
|
||||
'ext': 'flv',
|
||||
'title': 'US Open flashbacks: 1990s',
|
||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
section = mobj.group('section')
|
||||
video_id = mobj.group('id')
|
||||
all_videos = self._download_json(
|
||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||
video_id)
|
||||
# The json file contains the info of all the videos in the section
|
||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
84
youtube_dl/extractor/chirbit.py
Normal file
84
youtube_dl/extractor/chirbit.py
Normal file
@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/PrIPv5',
|
||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||
'info_dict': {
|
||||
'id': 'PrIPv5',
|
||||
'ext': 'mp3',
|
||||
'title': 'Фасадстрой',
|
||||
'duration': 52,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||
|
||||
title = self._search_regex(
|
||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||
'listen count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>(\d+) Comments?:', webpage,
|
||||
'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
class ChirbitProfileIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
'id': 'ScarletBeauty',
|
||||
'title': 'Chirbits by ScarletBeauty',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(audio_url.text, 'Chirbit')
|
||||
for audio_url in rss.findall('./channel/item/link')]
|
||||
|
||||
title = rss.find('./channel/title').text
|
||||
|
||||
return self.playlist_result(entries, profile_id, title)
|
@ -27,7 +27,6 @@ from ..utils import (
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@ -392,6 +391,16 @@ class InfoExtractor(object):
|
||||
if blocked_iframe:
|
||||
msg += ' Visit %s for more details' % blocked_iframe
|
||||
raise ExtractorError(msg, expected=True)
|
||||
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||
msg = (
|
||||
'Access to this webpage has been blocked by Indian censorship. '
|
||||
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||
block_msg = self._html_search_regex(
|
||||
r'</h1><p>(.*?)</p>',
|
||||
content, 'block message', default=None)
|
||||
if block_msg:
|
||||
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
return content
|
||||
|
||||
@ -753,9 +762,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
@ -801,8 +808,8 @@ class InfoExtractor(object):
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
|
||||
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||
@ -826,7 +833,7 @@ class InfoExtractor(object):
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
'preference': -1,
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
@ -841,6 +848,7 @@ class InfoExtractor(object):
|
||||
note='Downloading m3u8 information',
|
||||
errnote='Failed to download m3u8 information')
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||
for line in m3u8_doc.splitlines():
|
||||
@ -851,6 +859,13 @@ class InfoExtractor(object):
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_info[m.group('key')] = v
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_media[m.group('key')] = v
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@ -879,6 +894,9 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
self._sort_formats(formats)
|
||||
|
@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
json_url = (
|
||||
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||
video_id)
|
||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||
video_url = info['renditions'][0]['url']
|
||||
|
||||
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EmbedlyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor):
|
||||
'uploader_id': 'the-escapist-presents',
|
||||
'uploader': 'The Escapist Presents',
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
uploader_id = self._html_search_regex(
|
||||
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
|
||||
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||
webpage, 'uploader ID', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r"<h1 class='headline'>(.*?)</a>",
|
||||
r"<h1\s+class='headline'>(.*?)</a>",
|
||||
webpage, 'uploader', fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||
title = raw_title.partition(' : ')[2]
|
||||
|
||||
player_url = self._og_search_video_url(webpage, name='player URL')
|
||||
config_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
r'config=(.*)$', player_url, 'config URL'))
|
||||
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
|
||||
|
||||
formats = []
|
||||
|
||||
@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'player_url': player_url,
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
@ -7,6 +7,7 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor):
|
||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
|
@ -473,6 +473,7 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'info_dict': {
|
||||
'id': '1986',
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
@ -531,13 +532,31 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mrj4DVp2zeA',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150204',
|
||||
'upload_date': '20150212',
|
||||
'uploader': 'The National Archives UK',
|
||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||
'uploader_id': 'NationalArchives08',
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
}
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'aanslagen-kopenhagen',
|
||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||
}
|
||||
},
|
||||
# Zapiks embed
|
||||
{
|
||||
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||
'info_dict': {
|
||||
'id': '118046',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@ -782,6 +801,13 @@ class GenericIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
@ -789,7 +815,6 @@ class GenericIE(InfoExtractor):
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
@ -1082,6 +1107,12 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
|
||||
# Look for Zapiks embed
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
width = int_or_none(self._search_regex(
|
||||
r'<param name="width" value="([0-9]+)"',
|
||||
webpage, 'width', fatal=False))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'<param name="height" value="([0-9]+)"',
|
||||
webpage, 'height', fatal=False))
|
||||
|
||||
video_elements = self._search_regex(
|
||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||
webpage, 'video elements', default=None)
|
||||
if not video_elements:
|
||||
raise ExtractorError(
|
||||
'No sources found for video %s. Maybe an image?' % video_id,
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||
formats.append({
|
||||
'format_id': m.group('type').partition('/')[2],
|
||||
'url': self._proto_relative_url(m.group('src')),
|
||||
'ext': mimetype2ext(m.group('type')),
|
||||
'acodec': 'none',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'http_headers': {
|
||||
'User-Agent': 'youtube-dl (like wget)',
|
||||
},
|
||||
})
|
||||
|
||||
gif_json = self._search_regex(
|
||||
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||
webpage, 'GIF code', fatal=False)
|
||||
if gif_json:
|
||||
gifd = self._parse_json(
|
||||
gif_json, video_id, transform_source=js_to_json)
|
||||
formats.append({
|
||||
'format_id': 'gif',
|
||||
'preference': -10,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ext': 'gif',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'gif',
|
||||
'container': 'gif',
|
||||
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||
'filesize': gifd.get('size'),
|
||||
'http_headers': {
|
||||
'User-Agent': 'youtube-dl (like wget)',
|
||||
},
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
|
||||
'url': 'http://new.livestream.com/tedx/cityenglish',
|
||||
'info_dict': {
|
||||
'title': 'TEDCity2.0 (English)',
|
||||
'id': '2245590',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
|
||||
if is_relevant(video_data, video_id)]
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
return self.playlist_result(
|
||||
videos, '%s' % info['id'], info['full_name'])
|
||||
else:
|
||||
if not videos:
|
||||
raise ExtractorError('Cannot find video %s' % video_id)
|
||||
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@ -0,0 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
theplatform_id = url_basename(content.attrib.get('url'))
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force the use of f4m
|
||||
{'force_smil_url': True}))
|
@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'bTmnLCvIbaaH',
|
||||
'id': 'c9xnCo0YPOPH',
|
||||
'ext': 'flv',
|
||||
'title': 'I Am a Firefighter',
|
||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
||||
'timestamp': 1344858571,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Download only works from Germany',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,9 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
@ -11,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||
'info_dict': {
|
||||
'id': 'SU4fj_aEMVw',
|
||||
'ext': 'mp4',
|
||||
'title': 'I\'m on Patreon!',
|
||||
'uploader': 'TraciJHines',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'upload_date': '20150211',
|
||||
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||
'uploader_id': 'TraciJHines',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
||||
attach_fn = self._html_search_regex(
|
||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||
webpage, 'attachment URL', default=None)
|
||||
embed = self._html_search_regex(
|
||||
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
|
||||
webpage, 'embedded URL', default=None)
|
||||
|
||||
if attach_fn is not None:
|
||||
video_url = 'http://www.patreon.com' + attach_fn
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||
elif embed is not None:
|
||||
return self.url_result(embed)
|
||||
else:
|
||||
playlist_js = self._search_regex(
|
||||
playlist = self._parse_json(self._search_regex(
|
||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||
webpage, 'playlist JSON')
|
||||
playlist_json = js_to_json(playlist_js)
|
||||
playlist = json.loads(playlist_json)
|
||||
webpage, 'playlist JSON'),
|
||||
video_id, transform_source=js_to_json)
|
||||
data = playlist[0]
|
||||
video_url = self._proto_relative_url(data['mp3'])
|
||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class PornHubPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||
'info_dict': {
|
||||
'id': '6201671',
|
||||
'title': 'P0p4',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||
]
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||
playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||
|
88
youtube_dl/extractor/r7.py
Normal file
88
youtube_dl/extractor/r7.py
Normal file
@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class R7IE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
|
||||
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
|
||||
player\.r7\.com/video/i/
|
||||
)
|
||||
(?P<id>[\da-f]{24})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
|
||||
'md5': '403c4e393617e8e8ddc748978ee8efde',
|
||||
'info_dict': {
|
||||
'id': '54e7050b0cf2ff57e0279389',
|
||||
'ext': 'mp4',
|
||||
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 98,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://player.r7.com/video/i/%s' % video_id, video_id)
|
||||
|
||||
item = self._parse_json(js_to_json(self._search_regex(
|
||||
r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
|
||||
|
||||
title = unescapeHTML(item['title'])
|
||||
thumbnail = item.get('init', {}).get('thumbUri')
|
||||
duration = None
|
||||
|
||||
statistics = item.get('statistics', {})
|
||||
like_count = int_or_none(statistics.get('likes'))
|
||||
view_count = int_or_none(statistics.get('views'))
|
||||
|
||||
formats = []
|
||||
for format_key, format_dict in item['playlist'][0].items():
|
||||
src = format_dict.get('src')
|
||||
if not src:
|
||||
continue
|
||||
format_id = format_dict.get('format') or format_key
|
||||
if duration is None:
|
||||
duration = format_dict.get('duration')
|
||||
if '.f4m' in src:
|
||||
formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
|
||||
elif src.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'like_count': like_count,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
||||
_TEST = {
|
||||
'url': 'http://ndr2.radio.de/',
|
||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
||||
'info_dict': {
|
||||
'id': 'ndr2',
|
||||
'ext': 'mp3',
|
||||
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
||||
'thumbnail': 're:^https?://.*\.png',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
radio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, radio_id)
|
||||
jscode = self._search_regex(
|
||||
r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
|
||||
webpage, 'broadcast')
|
||||
|
||||
broadcast = json.loads(self._search_regex(
|
||||
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
|
||||
webpage, 'broadcast'))
|
||||
|
||||
broadcast = self._parse_json(jscode, radio_id)
|
||||
title = self._live_title(broadcast['name'])
|
||||
description = broadcast.get('description') or broadcast.get('shortDescription')
|
||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
|
||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
|
||||
|
||||
formats = [{
|
||||
'url': stream['streamUrl'],
|
||||
|
@ -1,16 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class RtlXlIE(InfoExtractor):
|
||||
IE_NAME = 'rtlxl.nl'
|
||||
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||
class RtlNlIE(InfoExtractor):
|
||||
IE_NAME = 'rtl.nl'
|
||||
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||
'info_dict': {
|
||||
@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor):
|
||||
'upload_date': '20140814',
|
||||
'duration': 576.880,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1424039400,
|
||||
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uuid = mobj.group('uuid')
|
||||
|
||||
uuid = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
uuid)
|
||||
|
||||
material = info['material'][0]
|
||||
episode_info = info['episodes'][0]
|
||||
|
||||
progname = info['abstracts'][0]['name']
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||
@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor):
|
||||
'quality': 0,
|
||||
}
|
||||
])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
meta = info.get('meta', {})
|
||||
for p in ('poster_base_url', '"thumb_base_url"'):
|
||||
if not meta.get(p):
|
||||
continue
|
||||
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(meta[p] + uuid),
|
||||
'width': int_or_none(self._search_regex(
|
||||
r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'/sz=[0-9]+x([0-9]+)',
|
||||
meta[p], 'thumbnail height', fatal=False))
|
||||
})
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': '%s - %s' % (progname, subtitle),
|
||||
'formats': formats,
|
||||
'timestamp': material['original_date'],
|
||||
'description': episode_info['synopsis'],
|
||||
'description': description,
|
||||
'duration': parse_duration(material.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
struct_unpack,
|
||||
remove_end,
|
||||
@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
video_path = self._download_webpage(
|
||||
auth_url, video_id, 'Getting video url')
|
||||
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||
# the right Content-Length header and the mp4 format
|
||||
video_url = (
|
||||
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
|
||||
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
|
||||
)
|
||||
video_url = compat_urlparse.urljoin(
|
||||
'http://mvod1.akcdn.rtve.es/', video_path)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
117
youtube_dl/extractor/sandia.py
Normal file
117
youtube_dl/extractor/sandia.py
Normal file
@ -0,0 +1,117 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SandiaIE(InfoExtractor):
|
||||
IE_DESC = 'Sandia National Laboratories'
|
||||
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
|
||||
_TEST = {
|
||||
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||
'info_dict': {
|
||||
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120904',
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
js_path = self._search_regex(
|
||||
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
|
||||
webpage, 'JS code URL')
|
||||
js_url = compat_urlparse.urljoin(url, js_path)
|
||||
|
||||
js_code = self._download_webpage(
|
||||
js_url, video_id, note='Downloading player')
|
||||
|
||||
def extract_str(key, **args):
|
||||
return self._search_regex(
|
||||
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
|
||||
js_code, key, **args)
|
||||
|
||||
def extract_data(key, **args):
|
||||
data_json = extract_str(key, **args)
|
||||
if data_json is None:
|
||||
return data_json
|
||||
return self._parse_json(
|
||||
data_json, video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for i in itertools.count():
|
||||
fd = extract_data('VideoUrls[%d]' % i, default=None)
|
||||
if fd is None:
|
||||
break
|
||||
formats.append({
|
||||
'format_id': '%s' % i,
|
||||
'format_note': fd['MimeType'].partition('/')[2],
|
||||
'ext': mimetype2ext(fd['MimeType']),
|
||||
'url': fd['Location'],
|
||||
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
slide_baseurl = compat_urlparse.urljoin(
|
||||
url, extract_data('SlideBaseUrl'))
|
||||
slide_template = slide_baseurl + re.sub(
|
||||
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
|
||||
slides = []
|
||||
last_slide_time = 0
|
||||
for i in itertools.count(1):
|
||||
sd = extract_str('Slides[%d]' % i, default=None)
|
||||
if sd is None:
|
||||
break
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
|
||||
sd, 'slide %s timestamp' % i, fatal=False))
|
||||
slides.append({
|
||||
'url': slide_template % i,
|
||||
'duration': timestamp - last_slide_time,
|
||||
})
|
||||
last_slide_time = timestamp
|
||||
formats.append({
|
||||
'format_id': 'slides',
|
||||
'protocol': 'slideshow',
|
||||
'url': json.dumps(slides),
|
||||
'preference': -10000, # Downloader not yet written
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = extract_data('Title')
|
||||
description = extract_data('Description', fatal=False)
|
||||
duration = int_or_none(extract_data(
|
||||
'Duration', fatal=False), scale=1000)
|
||||
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
}
|
@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
|
||||
'id': '437BE28B89D799D7',
|
||||
'title': 'big_buck_bunny_720p_surround.avi',
|
||||
'ext': 'avi',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
|
||||
''', webpage, 'hash')
|
||||
|
||||
fields = {
|
||||
"hash": confirm_hash,
|
||||
"hash": confirm_hash.encode('utf-8'),
|
||||
"confirm": "Continue as Free User"
|
||||
}
|
||||
|
||||
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
|
||||
webpage, 'title', default=None)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||
webpage, 'thumbnail')
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class SoundgasmIE(InfoExtractor):
|
||||
IE_NAME = 'soundgasm'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
|
||||
@ -38,3 +39,25 @@ class SoundgasmIE(InfoExtractor):
|
||||
'title': audio_title,
|
||||
'description': description
|
||||
}
|
||||
|
||||
class SoundgasmProfileIE(InfoExtractor):
|
||||
IE_NAME = 'soundgasm:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://soundgasm.net/u/ytdl',
|
||||
'info_dict': {
|
||||
'id': 'ytdl',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(audio_url, 'Soundgasm')
|
||||
for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
|
||||
|
||||
return self.playlist_result(entries, profile_id)
|
||||
|
@ -1,8 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '19705',
|
||||
'ext': 'mp4',
|
||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
"title": "Louis C.K. Interview Pt. 1 11/3/11",
|
||||
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
|
||||
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = mobj.group("video_id")
|
||||
video_id = mobj.group('video_id')
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_xml(
|
||||
data_url, display_id, 'Downloading data webpage')
|
||||
embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
|
||||
embed = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed page')
|
||||
|
||||
encoded_data = self._search_regex(
|
||||
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
|
||||
data = self._parse_json(
|
||||
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
|
||||
|
||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||
formats = []
|
||||
for filed in data.findall('files/file'):
|
||||
if filed.attrib.get('playmode') == 'all':
|
||||
# it just duplicates one of the entries
|
||||
break
|
||||
file_url = filed.text
|
||||
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
|
||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||
for filed in data['files']:
|
||||
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
|
||||
if m_format is not None:
|
||||
format_id = m_format.group(1)
|
||||
else:
|
||||
format_id = filed.attrib['bitrate']
|
||||
format_id = filed['bitrate']
|
||||
tbr = (
|
||||
int(filed.attrib['bitrate'])
|
||||
if filed.attrib['bitrate'].isdigit()
|
||||
int(filed['bitrate'])
|
||||
if filed['bitrate'].isdigit()
|
||||
else None)
|
||||
|
||||
try:
|
||||
quality = qualities.index(format_id)
|
||||
except ValueError:
|
||||
quality = -1
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'url': filed['url'],
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'format_id': format_id,
|
||||
'quality': quality,
|
||||
'quality': get_quality(format_id),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': data['title'],
|
||||
'thumbnail': data.get('thumb', {}).get('href'),
|
||||
'description': data.get('teaser'),
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
}
|
||||
|
@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# YouTube video
|
||||
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': 'aFBIPO-P7LM',
|
||||
'ext': 'mp4',
|
||||
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
||||
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
||||
'uploader': 'TEDx Talks',
|
||||
'uploader_id': 'TEDxTalks',
|
||||
'upload_date': '20111216',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_NATIVE_FORMATS = {
|
||||
@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
|
||||
talk_info = self._extract_info(webpage)['talks'][0]
|
||||
|
||||
if talk_info.get('external') is not None:
|
||||
self.to_screen('Found video from %s' % talk_info['external']['service'])
|
||||
external = talk_info.get('external')
|
||||
if external:
|
||||
service = external['service']
|
||||
self.to_screen('Found video from %s' % service)
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': talk_info['external']['uri'],
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
formats = [{
|
||||
|
@ -4,11 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class TheOnionIE(InfoExtractor):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
||||
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
||||
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
article_id = mobj.group('article_id')
|
||||
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'"videoId":\s(\d+),', webpage, 'video ID')
|
||||
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
||||
if not sources:
|
||||
raise ExtractorError(
|
||||
'No sources found for video %s' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for src, type_ in sources:
|
||||
if type_ == 'video/mp4':
|
||||
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
|
||||
})
|
||||
elif type_ == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(src, video_id, preference=-1))
|
||||
self._extract_m3u8_formats(src, display_id, preference=-1))
|
||||
else:
|
||||
self.report_warning(
|
||||
'Encountered unexpected format: %s' % type_)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
|
@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
if not provider_id:
|
||||
provider_id = 'dJ5BDC'
|
||||
|
||||
if mobj.group('config'):
|
||||
if smuggled_data.get('force_smil_url', False):
|
||||
smil_url = url
|
||||
elif mobj.group('config'):
|
||||
config_url = url + '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
|
100
youtube_dl/extractor/tv4.py
Normal file
100
youtube_dl/extractor/tv4.py
Normal file
@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TV4IE(InfoExtractor):
|
||||
IE_DESC = 'tv4.se and tv4play.se'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:
|
||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||
tv4play\.se/
|
||||
(?:
|
||||
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
||||
iframe/video/|
|
||||
film/|
|
||||
sport/|
|
||||
)
|
||||
)(?P<id>[0-9]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
|
||||
'md5': '909d6454b87b10a25aa04c4bdd416a9b',
|
||||
'info_dict': {
|
||||
'id': '2491650',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kalla Fakta 5 (english subtitles)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': int,
|
||||
'upload_date': '20131125',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/iframe/video/3054113',
|
||||
'md5': '77f851c55139ffe0ebd41b6a5552489b',
|
||||
'info_dict': {
|
||||
'id': '3054113',
|
||||
'ext': 'mp4',
|
||||
'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
|
||||
'timestamp': int,
|
||||
'upload_date': '20150130',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/sport/3060959',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/film/2378136',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
|
||||
|
||||
# If is_geo_restricted is true, it doesn't neceserally mean we can't download it
|
||||
if info['is_geo_restricted']:
|
||||
self.report_warning('This content might not be available in your country due to licensing restrictions.')
|
||||
if info['requires_subscription']:
|
||||
raise ExtractorError('This content requires subscription.', expected=True)
|
||||
|
||||
sources_data = self._download_json(
|
||||
'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
|
||||
sources = sources_data['playback']
|
||||
|
||||
formats = []
|
||||
for item in sources.get('items', {}).get('item', []):
|
||||
ext, bitrate = item['mediaFormat'], item['bitrate']
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (ext, bitrate),
|
||||
'tbr': bitrate,
|
||||
'ext': ext,
|
||||
'url': item['url'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'description': info.get('description'),
|
||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||
'duration': info.get('duration'),
|
||||
'thumbnail': info.get('image'),
|
||||
'is_live': sources.get('live'),
|
||||
}
|
@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
||||
channel_id, 'mp4')
|
||||
|
||||
# prefer the 'source' stream, the others are limited to 30 fps
|
||||
def _sort_source(f):
|
||||
if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
|
||||
return 1
|
||||
return 0
|
||||
formats = sorted(formats, key=_sort_source)
|
||||
|
||||
view_count = stream.get('viewers')
|
||||
timestamp = parse_iso8601(stream.get('created_at'))
|
||||
|
||||
|
@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor):
|
||||
thumbnail = (
|
||||
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
|
||||
|
||||
formats = [{
|
||||
'url': v.attrib['src'],
|
||||
'width': int_or_none(v.attrib.get('width')),
|
||||
'height': int_or_none(v.attrib.get('height')),
|
||||
'filesize': int_or_none(v.attrib.get('size')),
|
||||
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
||||
'ext': v.attrib.get('ext'),
|
||||
} for v in switch.findall('./video')
|
||||
if v.attrib.get('proto') == 'http']
|
||||
formats = []
|
||||
for v in switch.findall('./video'):
|
||||
proto = v.attrib.get('proto')
|
||||
if proto not in ['http', 'rtmp']:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(v.attrib.get('width')),
|
||||
'height': int_or_none(v.attrib.get('height')),
|
||||
'filesize': int_or_none(v.attrib.get('size')),
|
||||
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
||||
'ext': v.attrib.get('ext'),
|
||||
}
|
||||
src = v.attrib['src']
|
||||
if proto == 'http':
|
||||
if self._is_valid_url(src, video_id):
|
||||
f['url'] = src
|
||||
formats.append(f)
|
||||
elif proto == 'rtmp':
|
||||
f.update({
|
||||
'url': v.attrib['streamer'],
|
||||
'play_path': src,
|
||||
'rtmp_real_time': True,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
@ -18,6 +19,7 @@ from ..utils import (
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
@ -174,7 +176,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'password': password,
|
||||
@ -224,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password:
|
||||
headers['Cookie'] = '%s_password=%s' % (
|
||||
video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
try:
|
||||
@ -267,8 +274,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
||||
|
||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||
if data and '_video_password_verified' in data:
|
||||
raise ExtractorError('video password verification failed!')
|
||||
self._verify_video_password(url, video_id, webpage)
|
||||
return self._real_extract(url)
|
||||
return self._real_extract(
|
||||
smuggle_url(url, {'_video_password_verified': 'verified'}))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract info section',
|
||||
cause=e)
|
||||
@ -401,6 +411,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/channels/tributes',
|
||||
'info_dict': {
|
||||
'id': 'tributes',
|
||||
'title': 'Vimeo Tributes',
|
||||
},
|
||||
'playlist_mincount': 25,
|
||||
@ -479,6 +490,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
'url': 'http://vimeo.com/nkistudio/videos',
|
||||
'info_dict': {
|
||||
'title': 'Nki',
|
||||
'id': 'nkistudio',
|
||||
},
|
||||
'playlist_mincount': 66,
|
||||
}]
|
||||
@ -496,6 +508,7 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/album/2632481',
|
||||
'info_dict': {
|
||||
'id': '2632481',
|
||||
'title': 'Staff Favorites: November 2013',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
@ -526,6 +539,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://vimeo.com/groups/rolexawards',
|
||||
'info_dict': {
|
||||
'id': 'rolexawards',
|
||||
'title': 'Rolex Awards for Enterprise',
|
||||
},
|
||||
'playlist_mincount': 73,
|
||||
@ -608,6 +622,7 @@ class VimeoLikesIE(InfoExtractor):
|
||||
'url': 'https://vimeo.com/user755559/likes/',
|
||||
'playlist_mincount': 293,
|
||||
"info_dict": {
|
||||
'id': 'user755559_likes',
|
||||
"description": "See all the videos urza likes",
|
||||
"title": 'Videos urza likes',
|
||||
},
|
||||
|
@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor):
|
||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||
_TEST = {
|
||||
'url': 'http://vk.com/videos205387401',
|
||||
'info_dict': {
|
||||
'id': '205387401',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
|
@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
story_filename = self._search_regex(
|
||||
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
|
||||
speaker_id = self._search_regex(
|
||||
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
|
||||
story_id = self._search_regex(
|
||||
r'\.storyId\((\d+)\)', webpage, 'story ID')
|
||||
speaker_type = self._search_regex(
|
||||
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
|
||||
great_life = self._search_regex(
|
||||
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
|
||||
embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
|
||||
r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
|
||||
webpage, 'embed params').split(',')]
|
||||
|
||||
(
|
||||
_, speaker_id, story_id, story_duration,
|
||||
speaker_type, great_life, _thumbnail, _has_subtitles,
|
||||
story_filename, _story_order) = embed_params
|
||||
|
||||
is_great_life_series = great_life == 'true'
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
|
||||
duration = int_or_none(story_duration)
|
||||
|
||||
# URL building, see: http://www.webofstories.com/scripts/player.js
|
||||
ms_prefix = ''
|
||||
|
@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
|
||||
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150202',
|
||||
'uploader_id': 'bbright',
|
||||
'creator': 'bbright',
|
||||
'uploader_id': 'jdesai',
|
||||
'creator': 'jdesai',
|
||||
'categories': list, # a long list
|
||||
'duration': 90,
|
||||
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||
|
@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
|
||||
'id': 'kVTUy_G222_',
|
||||
'ext': 'mp4',
|
||||
'title': 'strange erotica',
|
||||
'description': 'http://www.xtube.com an ET kind of thing',
|
||||
'description': 'contains:an ET kind of thing',
|
||||
'uploader': 'greenshowers',
|
||||
'duration': 450,
|
||||
'age_limit': 18,
|
||||
|
@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
||||
'info_dict': {
|
||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||
'ext': 'mp4',
|
||||
|
@ -541,26 +541,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if cache_spec is not None:
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
|
||||
download_note = (
|
||||
'Downloading player %s' % player_url
|
||||
if self._downloader.params.get('verbose') else
|
||||
'Downloading %s player %s' % (player_type, player_id)
|
||||
)
|
||||
if player_type == 'js':
|
||||
code = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
note=download_note,
|
||||
errnote='Download of %s failed' % player_url)
|
||||
res = self._parse_sig_js(code)
|
||||
elif player_type == 'swf':
|
||||
urlh = self._request_webpage(
|
||||
player_url, video_id,
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
note=download_note,
|
||||
errnote='Download of %s failed' % player_url)
|
||||
code = urlh.read()
|
||||
res = self._parse_sig_swf(code)
|
||||
else:
|
||||
assert False, 'Invalid player type %r' % player_type
|
||||
|
||||
if cache_spec is None:
|
||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
|
||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||
return res
|
||||
|
110
youtube_dl/extractor/zapiks.py
Normal file
110
youtube_dl/extractor/zapiks.py
Normal file
@ -0,0 +1,110 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ZapiksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
|
||||
'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
|
||||
'info_dict': {
|
||||
'id': '80798',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
|
||||
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 528,
|
||||
'timestamp': 1359044972,
|
||||
'upload_date': '20130124',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id="(\d+)"', webpage, 'video id')
|
||||
|
||||
playlist = self._download_xml(
|
||||
'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
|
||||
display_id)
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||
}
|
||||
|
||||
def ns(path):
|
||||
return xpath_with_ns(path, NS_MAP)
|
||||
|
||||
item = playlist.find('./channel/item')
|
||||
|
||||
title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = xpath_text(
|
||||
item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', default=None))
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', default=None), ' ')
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'UserPlays:(\d+)', webpage, 'view count', default=None))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'UserComments:(\d+)', webpage, 'comment count', default=None))
|
||||
|
||||
formats = []
|
||||
for source in item.findall(ns('./jwplayer:source')):
|
||||
format_id = source.attrib['label']
|
||||
f = {
|
||||
'url': source.attrib['file'],
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
|
||||
if m:
|
||||
f['height'] = int(m.group('height'))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
@ -30,13 +30,10 @@ class JSInterpreter(object):
|
||||
def __init__(self, code, objects=None):
|
||||
if objects is None:
|
||||
objects = {}
|
||||
self.code = self._remove_comments(code)
|
||||
self.code = code
|
||||
self._functions = {}
|
||||
self._objects = objects
|
||||
|
||||
def _remove_comments(self, code):
|
||||
return re.sub(r'(?s)/\*.*?\*/', '', code)
|
||||
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
if allow_recursion < 0:
|
||||
raise ExtractorError('Recursion limit reached')
|
||||
|
@ -424,6 +424,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--xattr-set-filesize',
|
||||
dest='xattr_set_filesize', action='store_true',
|
||||
help='(experimental) set file xattribute ytdl.filesize with expected filesize')
|
||||
downloader.add_option(
|
||||
'--hls-prefer-native',
|
||||
dest='hls_prefer_native', action='store_true',
|
||||
help='(experimental) Use the native HLS downloader instead of ffmpeg.')
|
||||
downloader.add_option(
|
||||
'--external-downloader',
|
||||
dest='external_downloader', metavar='COMMAND',
|
||||
|
@ -34,10 +34,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
self._determine_executables()
|
||||
|
||||
def check_version(self):
|
||||
if not self.available():
|
||||
if not self.available:
|
||||
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
|
||||
|
||||
required_version = '10-0' if self._uses_avconv() else '1.0'
|
||||
required_version = '10-0' if self.basename == 'avconv' else '1.0'
|
||||
if is_outdated_version(
|
||||
self._versions[self.basename], required_version):
|
||||
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
|
||||
@ -108,12 +108,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
self.probe_basename = p
|
||||
break
|
||||
|
||||
@property
|
||||
def available(self):
|
||||
return self.basename is not None
|
||||
|
||||
def _uses_avconv(self):
|
||||
return self.basename == 'avconv'
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
return self._paths[self.basename]
|
||||
|
@ -900,8 +900,8 @@ def _windows_write_string(s, out):
|
||||
def not_a_console(handle):
|
||||
if handle == INVALID_HANDLE_VALUE or handle is None:
|
||||
return True
|
||||
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
|
||||
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
|
||||
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
|
||||
GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
|
||||
|
||||
if not_a_console(h):
|
||||
return False
|
||||
@ -1560,8 +1560,8 @@ def js_to_json(code):
|
||||
return '"%s"' % v
|
||||
|
||||
res = re.sub(r'''(?x)
|
||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||
@ -1616,6 +1616,15 @@ def args_to_str(args):
|
||||
return ' '.join(shlex_quote(a) for a in args)
|
||||
|
||||
|
||||
def mimetype2ext(mt):
|
||||
_, _, res = mt.rpartition('/')
|
||||
|
||||
return {
|
||||
'x-ms-wmv': 'wmv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
try:
|
||||
url_handle.headers
|
||||
@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle):
|
||||
if e:
|
||||
return e
|
||||
|
||||
return getheader('Content-Type').split("/")[1]
|
||||
return mimetype2ext(getheader('Content-Type'))
|
||||
|
||||
|
||||
def age_restricted(content_limit, age_limit):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.02.16'
|
||||
__version__ = '2015.02.23'
|
||||
|
Reference in New Issue
Block a user