Ignore BOM in batch files (Fixes #2450)
This commit is contained in:
		| @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| # Various small unit tests | ||||
| import io | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| #from youtube_dl.utils import htmlentity_transform | ||||
| @@ -21,6 +22,7 @@ from youtube_dl.utils import ( | ||||
|     orderedSet, | ||||
|     PagedList, | ||||
|     parse_duration, | ||||
|     read_batch_urls, | ||||
|     sanitize_filename, | ||||
|     shell_quote, | ||||
|     smuggle_url, | ||||
| @@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase): | ||||
|     def test_struct_unpack(self): | ||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) | ||||
|  | ||||
|     def test_read_batch_urls(self): | ||||
|         f = io.StringIO(u'''\xef\xbb\xbf foo | ||||
|             bar\r | ||||
|             baz | ||||
|             # More after this line\r | ||||
|             ; or after this | ||||
|             bam''') | ||||
|         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -71,6 +71,7 @@ from .utils import ( | ||||
|     get_cachedir, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
|     SameFileError, | ||||
|     setproctitle, | ||||
|     std_headers, | ||||
| @@ -552,21 +553,19 @@ def _real_main(argv=None): | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Batch file verification | ||||
|     batchurls = [] | ||||
|     batch_urls = [] | ||||
|     if opts.batchfile is not None: | ||||
|         try: | ||||
|             if opts.batchfile == '-': | ||||
|                 batchfd = sys.stdin | ||||
|             else: | ||||
|                 batchfd = open(opts.batchfile, 'r') | ||||
|             batchurls = batchfd.readlines() | ||||
|             batchurls = [x.strip() for x in batchurls] | ||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] | ||||
|                 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') | ||||
|             batch_urls = read_batch_urls(batchfd) | ||||
|             if opts.verbose: | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') | ||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') | ||||
|         except IOError: | ||||
|             sys.exit(u'ERROR: batch file could not be read') | ||||
|     all_urls = batchurls + args | ||||
|     all_urls = batch_urls + args | ||||
|     all_urls = [url.strip() for url in all_urls] | ||||
|     _enc = preferredencoding() | ||||
|     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import contextlib | ||||
| import ctypes | ||||
| import datetime | ||||
| import email.utils | ||||
| @@ -1245,3 +1246,19 @@ except TypeError: | ||||
| else: | ||||
|     struct_pack = struct.pack | ||||
|     struct_unpack = struct.unpack | ||||
|  | ||||
|  | ||||
| def read_batch_urls(batch_fd): | ||||
|     def fixup(url): | ||||
|         if not isinstance(url, compat_str): | ||||
|             url = url.decode('utf-8', 'replace') | ||||
|         BOM_UTF8 = u'\xef\xbb\xbf' | ||||
|         if url.startswith(BOM_UTF8): | ||||
|             url = url[len(BOM_UTF8):] | ||||
|         url = url.strip() | ||||
|         if url.startswith(('#', ';', ']')): | ||||
|             return False | ||||
|         return url | ||||
|  | ||||
|     with contextlib.closing(batch_fd) as fd: | ||||
|         return [url for url in map(fixup, fd) if url] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user