Compare commits
68 Commits
2013.08.14
...
2013.08.23
Author | SHA1 | Date | |
---|---|---|---|
|
8ae7be3ef4 | ||
|
306170518f | ||
|
aa6a10c44a | ||
|
9af73dc4fc | ||
|
fc483bb6af | ||
|
53b0f3e4e2 | ||
|
4353cf51a0 | ||
|
ce34e9ce5e | ||
|
d4051a8e05 | ||
|
df3df7fb64 | ||
|
9e9c164052 | ||
|
066090dd3f | ||
|
74e6672beb | ||
|
02bcf0d389 | ||
|
10204dc898 | ||
|
1865ed31b9 | ||
|
3669cdba10 | ||
|
939fbd26ac | ||
|
e6ddb4e7af | ||
|
83390b83d9 | ||
|
4a55479fa9 | ||
|
f527115b5f | ||
|
75e1b46add | ||
|
063fcc9676 | ||
|
8403612258 | ||
|
25b51c7816 | ||
|
9779b63bb6 | ||
|
d81aef3adf | ||
|
45ed795cb0 | ||
|
683e98a8a4 | ||
|
e0cfeb2ea7 | ||
|
75340ee383 | ||
|
668de34c6b | ||
|
a91b954bb4 | ||
|
37b6d5f684 | ||
|
b7a6838407 | ||
|
cde846b3d3 | ||
|
6c3e6e88d3 | ||
|
739674cd77 | ||
|
4b2d7cae11 | ||
|
7fea7156cb | ||
|
3093468977 | ||
|
79cb25776f | ||
|
87f78946a5 | ||
|
211fbc1328 | ||
|
836a086ce9 | ||
|
90d3989b99 | ||
|
d741e55a42 | ||
|
17d3aaaf16 | ||
|
ea55b2a4ca | ||
|
3f0537dd4a | ||
|
12e895fc5a | ||
|
bda2c49d75 | ||
|
01b32990da | ||
|
dbda1b5147 | ||
|
ddf3bd328b | ||
|
b9c37b92cf | ||
|
f9c3c90ca8 | ||
|
6daccbe317 | ||
|
71ea844c0e | ||
|
3a7256697e | ||
|
2b9213cdc1 | ||
|
95fdc7d69c | ||
|
6804038d06 | ||
|
2f799533ae | ||
|
88ae5991cd | ||
|
5d51a883c2 | ||
|
c4a91be726 |
16
README.md
16
README.md
@@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats (currently youtube
|
||||
only)
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file (currently youtube only)
|
||||
--write-auto-sub write automatic subtitle file (currently youtube
|
||||
only)
|
||||
--only-sub [deprecated] alias of --skip-download
|
||||
--all-subs downloads all the available subtitles of the
|
||||
video (currently youtube only)
|
||||
video
|
||||
--list-subs lists all available subtitles for the video
|
||||
(currently youtube only)
|
||||
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
|
||||
(currently youtube only)
|
||||
--sub-lang LANG language of the subtitles to download (optional)
|
||||
use IETF language tags like 'en'
|
||||
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
|
||||
only)
|
||||
--sub-lang LANGS languages of the subtitles to download (optional)
|
||||
separated by commas, use IETF language tags like
|
||||
'en,pt'
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME account username
|
||||
@@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-
|
||||
processed files are overwritten by default
|
||||
--embed-subs embed subtitles in the video (only for mp4
|
||||
videos)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
|
@@ -11,30 +11,36 @@ tests = [
|
||||
# 90
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||
# 89
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
|
||||
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
|
||||
# 88
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||
# 87 - vflART1Nf 2013/07/24
|
||||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
|
||||
# 86 - vflm_D8eE 2013/07/31
|
||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||
# 86 - vflh9ybst 2013/08/23
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"),
|
||||
# 85 - vflSAFCP9 2013/07/19
|
||||
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83 - vflTWC9KW 2013/08/01
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),
|
||||
# 82
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
# 82 - vflZK4ZYR 2013/08/23
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
|
||||
# 81 - vflLC8JvQ 2013/07/25
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
|
||||
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
|
||||
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
|
||||
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
|
@@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
|
@@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
|
||||
DL.params['writesubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_it(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_onlysubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['onlysubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_allsubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.assertEqual(len(subtitles), 13)
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'sbv'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'vtt'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
def test_youtube_list_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['listsubtitles'] = True
|
||||
@@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
|
||||
def test_youtube_automatic_captions(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writeautomaticsub'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('8YoUxe5ncPo')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertTrue(sub[2] is not None)
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertTrue(sub is not None)
|
||||
def test_youtube_multiple_langs(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
DL.params['subtitleslangs'] = langs
|
||||
IE = YoutubeIE(DL)
|
||||
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -79,9 +79,13 @@ class FileDownloader(object):
|
||||
rate = float(current) / dif
|
||||
eta = int((float(total) - float(current)) / rate)
|
||||
(eta_mins, eta_secs) = divmod(eta, 60)
|
||||
if eta_mins > 99:
|
||||
return '--:--'
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
(eta_hours, eta_mins) = divmod(eta_mins, 60)
|
||||
if eta_hours > 99:
|
||||
return '--:--:--'
|
||||
if eta_hours == 0:
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
|
@@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
return dict((program, executable(program)) for program in programs)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
|
||||
if not self._exes['ffmpeg'] and not self._exes['avconv']:
|
||||
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
|
||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
|
||||
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend(['-i', encodeFilename(path)])
|
||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
|
||||
+ opts +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
|
||||
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout,stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
@@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||
if fn.startswith(u'-'):
|
||||
@@ -232,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
|
||||
information['format'] = self._preferedformat
|
||||
information['ext'] = self._preferedformat
|
||||
return False,information
|
||||
|
||||
|
||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
def __init__(self, downloader=None, subtitlesformat='srt'):
|
||||
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
|
||||
self._subformat = subtitlesformat
|
||||
|
||||
@classmethod
|
||||
def _conver_lang_code(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
def run(self, information):
|
||||
if information['ext'] != u'mp4':
|
||||
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
|
||||
return True, information
|
||||
sub_langs = [key for key in information['subtitles']]
|
||||
|
||||
filename = information['filepath']
|
||||
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
|
||||
|
||||
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
|
||||
lang_code = self._conver_lang_code(lang)
|
||||
if lang_code is not None:
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
opts.extend(['-f', 'mp4'])
|
||||
|
||||
temp_filename = filename + u'.temp'
|
||||
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
|
||||
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, information
|
||||
|
@@ -76,7 +76,7 @@ class YoutubeDL(object):
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitleslang: Language of the subtitles to download
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
@@ -483,41 +483,28 @@ class YoutubeDL(object):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub'),
|
||||
self.params.get('allsubtitles', False)])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitle = info_dict['subtitles'][0]
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
for sub_lang in subtitles.keys():
|
||||
sub = subtitles[sub_lang]
|
||||
if sub is None:
|
||||
continue
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
for subtitle in subtitles:
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
|
@@ -27,6 +27,7 @@ __authors__ = (
|
||||
'Johny Mo Swag',
|
||||
'Axel Noack',
|
||||
'Albert Kim',
|
||||
'Pierre Rudloff',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -82,6 +83,9 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
return "".join(opts)
|
||||
|
||||
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||
setattr(parser.values, option.dest, value.split(','))
|
||||
|
||||
def _find_term_columns():
|
||||
columns = os.environ.get('COLUMNS', None)
|
||||
if columns:
|
||||
@@ -119,6 +123,7 @@ def parseOpts(overrideArguments=None):
|
||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
@@ -185,27 +190,29 @@ def parseOpts(overrideArguments=None):
|
||||
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
|
||||
video_format.add_option('-F', '--list-formats',
|
||||
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
|
||||
video_format.add_option('--write-sub', '--write-srt',
|
||||
|
||||
subtitles.add_option('--write-sub', '--write-srt',
|
||||
action='store_true', dest='writesubtitles',
|
||||
help='write subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
action='store_true', dest='writeautomaticsub',
|
||||
help='write automatic subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--only-sub',
|
||||
subtitles.add_option('--only-sub',
|
||||
action='store_true', dest='skip_download',
|
||||
help='[deprecated] alias of --skip-download', default=False)
|
||||
video_format.add_option('--all-subs',
|
||||
subtitles.add_option('--all-subs',
|
||||
action='store_true', dest='allsubtitles',
|
||||
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--list-subs',
|
||||
help='downloads all the available subtitles of the video', default=False)
|
||||
subtitles.add_option('--list-subs',
|
||||
action='store_true', dest='listsubtitles',
|
||||
help='lists all available subtitles for the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--sub-format',
|
||||
help='lists all available subtitles for the video', default=False)
|
||||
subtitles.add_option('--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT',
|
||||
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
|
||||
video_format.add_option('--sub-lang', '--srt-lang',
|
||||
action='store', dest='subtitleslang', metavar='LANG',
|
||||
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
|
||||
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
|
||||
subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
|
||||
action='callback', dest='subtitleslang', metavar='LANGS', type='str',
|
||||
default=[], callback=_comma_separated_values_options_callback,
|
||||
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
|
||||
|
||||
downloader.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
|
||||
@@ -320,6 +327,8 @@ def parseOpts(overrideArguments=None):
|
||||
help='keeps the video file on disk after the post-processing; the video is erased by default')
|
||||
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
|
||||
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
|
||||
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
|
||||
help='embed subtitles in the video (only for mp4 videos)')
|
||||
|
||||
|
||||
parser.add_option_group(general)
|
||||
@@ -328,6 +337,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
parser.add_option_group(postproc)
|
||||
|
||||
@@ -343,7 +353,7 @@ def parseOpts(overrideArguments=None):
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile)
|
||||
commandLineConf = sys.argv[1:]
|
||||
commandLineConf = sys.argv[1:]
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
opts, args = parser.parse_args(argv)
|
||||
if opts.verbose:
|
||||
@@ -377,7 +387,7 @@ def _real_main(argv=None):
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
|
||||
# Set referer
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
@@ -567,7 +577,7 @@ def _real_main(argv=None):
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslang': opts.subtitleslang,
|
||||
'subtitleslangs': opts.subtitleslang,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
@@ -607,6 +617,8 @@ def _real_main(argv=None):
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
if opts.embedsubtitles:
|
||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
|
@@ -36,6 +36,7 @@ from .ign import IGNIE, OneUPIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
@@ -50,13 +51,16 @@ from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
@@ -71,6 +75,7 @@ from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .unistra import UnistraIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
|
@@ -4,6 +4,7 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
determine_ext,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -12,7 +13,7 @@ from ..utils import (
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
@@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
u'file': u'3505939.mp4',
|
||||
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||
u'info_dict': {
|
||||
u'title': u'Font Conference',
|
||||
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
manifest_url = videoNode.findall('./file')[0].text
|
||||
next_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
else:
|
||||
# Old-style direct links
|
||||
info['url'] = next_url
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
return [info]
|
||||
return info
|
||||
|
@@ -47,7 +47,8 @@ class InfoExtractor(object):
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location of the video.
|
||||
player_url: SWF Player URL (used for rtmpdump).
|
||||
subtitles: The subtitle file contents.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
@@ -77,7 +78,13 @@ class InfoExtractor(object):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
# This does not use has/getattr intentionally - we want to know whether
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url) is not None
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
|
@@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
|
||||
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
|
||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
return [info]
|
||||
|
@@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
|
||||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url: return [self.url_result(new_url)]
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
return [self.url_result(new_url)]
|
||||
except compat_urllib_error.HTTPError:
|
||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||
pass
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
@@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
|
47
youtube_dl/extractor/jeuxvideo.py
Normal file
47
youtube_dl/extractor/jeuxvideo.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
|
||||
|
||||
xml_link = m_download.group(1)
|
||||
|
||||
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
|
||||
|
||||
xml_config = self._download_webpage(xml_link, title,
|
||||
'Downloading XML config')
|
||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
||||
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
||||
info = json.loads(info)['versions'][0]
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
||||
return {'id': id,
|
||||
'title' : config.find('titre_video').text,
|
||||
'ext' : 'mp4',
|
||||
'url' : video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
}
|
34
youtube_dl/extractor/pbs.py
Normal file
34
youtube_dl/extractor/pbs.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
u'file': u'2365006249.mp4',
|
||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
u'info_dict': {
|
||||
u'title': u'A More Perfect Union',
|
||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
u'duration': 3190,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
info =json.loads(info_page)
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
113
youtube_dl/extractor/rtlnow.py
Normal file
113
youtube_dl/extractor/rtlnow.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
u'file': u'90419.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20070416',
|
||||
u'title': u'Ahornallee - Folge 1 - Der Einzug',
|
||||
u'description': u'Folge 1 - Der Einzug',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'Only works from Germany',
|
||||
},
|
||||
{
|
||||
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||
u'file': u'69756.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20120519',
|
||||
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
||||
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'Only works from Germany',
|
||||
},
|
||||
{
|
||||
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||
u'file': u'13883.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20090627',
|
||||
u'title': u'Voxtours - Südafrika-Reporter II',
|
||||
u'description': u'Südafrika-Reporter II',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
video_page_url = u'http://' + mobj.group('base_url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
note_m = re.search(r'''(?sx)
|
||||
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
|
||||
<div[ ]id="playerteaser">''', webpage)
|
||||
if note_m:
|
||||
msg = clean_html(note_m.group(1))
|
||||
raise ExtractorError(msg)
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
|
||||
webpage, u'title')
|
||||
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
||||
webpage, u'playerdata_url')
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
|
||||
if mobj:
|
||||
video_description = mobj.group(u'description')
|
||||
if mobj.group('upload_date_Y'):
|
||||
video_upload_date = mobj.group('upload_date_Y')
|
||||
else:
|
||||
video_upload_date = u'20' + mobj.group('upload_date_y')
|
||||
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
|
||||
else:
|
||||
video_description = None
|
||||
video_upload_date = None
|
||||
self._downloader.report_warning(u'Unable to extract description and upload date')
|
||||
|
||||
# Thumbnail: not every video has an thumbnail
|
||||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
||||
if mobj:
|
||||
video_thumbnail = mobj.group(u'thumbnail')
|
||||
else:
|
||||
video_thumbnail = None
|
||||
|
||||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
video_url = mobj.group(u'url')
|
||||
video_play_path = u'mp4:' + mobj.group(u'play_path')
|
||||
video_player_url = video_page_url + u'includes/vodplayer.swf'
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'page_url': video_page_url,
|
||||
'player_url': video_player_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
}]
|
23
youtube_dl/extractor/slashdot.py
Normal file
23
youtube_dl/extractor/slashdot.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
u'info_dict': {
|
||||
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
||||
return self.url_result(ooyala_url, 'Ooyala')
|
@@ -4,6 +4,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
@@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
@@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
elif mobj.group('widget'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
|
@@ -5,13 +5,13 @@ from .common import InfoExtractor
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://statigr.am/p/484091715184808010_284179915',
|
||||
u'file': u'484091715184808010_284179915.mp4',
|
||||
u'md5': u'deda4ff333abe2e118740321e992605b',
|
||||
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
||||
u'file': u'522207370455279102_24101272.mp4',
|
||||
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"videoseconds",
|
||||
u"title": u"Instagram photo by @videoseconds"
|
||||
}
|
||||
u'uploader_id': u'aguynamedpatrick',
|
||||
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
32
youtube_dl/extractor/unistra.py
Normal file
32
youtube_dl/extractor/unistra.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
@@ -11,14 +11,14 @@ class VevoIE(InfoExtractor):
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody to Die For"
|
||||
}
|
||||
}
|
||||
|
@@ -14,7 +14,7 @@ class VideofyMeIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
|
||||
u'file': u'1100701.mp4',
|
||||
u'md5': u'2046dd5758541d630bfa93e741e2fd79',
|
||||
u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
|
||||
u'info_dict': {
|
||||
u'title': u'This is VideofyMe',
|
||||
u'description': None,
|
||||
@@ -32,9 +32,8 @@ class VideofyMeIE(InfoExtractor):
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
|
||||
if url_node is None:
|
||||
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
|
||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||
for key in ['on', 'av', 'off']] if node is not None)
|
||||
video_url = url_node.find('url').text
|
||||
|
||||
return {'id': video_id,
|
||||
|
@@ -20,18 +20,31 @@ class VimeoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TEST = {
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
u'file': u'56015672.mp4',
|
||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121220",
|
||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
u"uploader_id": u"user7108434",
|
||||
u"uploader": u"Filippo Valsorda",
|
||||
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
u'file': u'56015672.mp4',
|
||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121220",
|
||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
u"uploader_id": u"user7108434",
|
||||
u"uploader": u"Filippo Valsorda",
|
||||
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
|
||||
u'file': u'68093876.mp4',
|
||||
u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
|
||||
u'note': u'Vimeo Pro video (#1197)',
|
||||
u'info_dict': {
|
||||
u'uploader_id': u'openstreetmapus',
|
||||
u'uploader': u'OpenStreetMap US',
|
||||
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -83,7 +96,9 @@ class VimeoIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
if not mobj.group('proto'):
|
||||
url = 'https://' + url
|
||||
if mobj.group('direct_link') or mobj.group('pro'):
|
||||
elif mobj.group('pro'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
elif mobj.group('direct_link'):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
|
@@ -3,7 +3,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse.unquote(mobj.group('file'))
|
||||
else:
|
||||
video_url = mobj.group('server')+'/key='+mobj.group('file')
|
||||
video_extension = video_url.split('.')[-1]
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
|
||||
webpage, u'title')
|
||||
|
||||
# Can't see the description anywhere in the UI
|
||||
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
|
||||
# webpage, u'description', fatal=False)
|
||||
# if video_description: video_description = unescapeHTML(video_description)
|
||||
# Only a few videos have an description
|
||||
mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
|
||||
if mobj:
|
||||
video_description = unescapeHTML(mobj.group('description'))
|
||||
else:
|
||||
video_description = None
|
||||
|
||||
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
|
||||
if mobj:
|
||||
@@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'ext': determine_ext(video_url),
|
||||
'title': video_title,
|
||||
# 'description': video_description,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'thumbnail': video_thumbnail
|
||||
|
@@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
@@ -155,11 +155,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Listed in order of quality
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
# Dash video
|
||||
'138', '248', '137', '247', '136', '246', '245',
|
||||
'244', '135', '243', '134', '242', '133', '160',
|
||||
# Dash audio
|
||||
'172', '141', '171', '140', '139',
|
||||
]
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
@@ -181,7 +192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'100': 'webm',
|
||||
'101': 'webm',
|
||||
'102': 'webm',
|
||||
|
||||
|
||||
# videos that use m3u8
|
||||
'92': 'mp4',
|
||||
'93': 'mp4',
|
||||
@@ -190,6 +201,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'96': 'mp4',
|
||||
'132': 'mp4',
|
||||
'151': 'mp4',
|
||||
|
||||
# Dash mp4
|
||||
'133': 'mp4',
|
||||
'134': 'mp4',
|
||||
'135': 'mp4',
|
||||
'136': 'mp4',
|
||||
'137': 'mp4',
|
||||
'138': 'mp4',
|
||||
'139': 'mp4',
|
||||
'140': 'mp4',
|
||||
'141': 'mp4',
|
||||
'160': 'mp4',
|
||||
|
||||
# Dash webm
|
||||
'171': 'webm',
|
||||
'172': 'webm',
|
||||
'242': 'webm',
|
||||
'243': 'webm',
|
||||
'244': 'webm',
|
||||
'245': 'webm',
|
||||
'246': 'webm',
|
||||
'247': 'webm',
|
||||
'248': 'webm',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'5': '240x400',
|
||||
@@ -217,11 +251,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'96': '1080p',
|
||||
'100': '360p',
|
||||
'101': '480p',
|
||||
'102': '720p',
|
||||
'102': '720p',
|
||||
'132': '240p',
|
||||
'151': '72p',
|
||||
'133': '240p',
|
||||
'134': '360p',
|
||||
'135': '480p',
|
||||
'136': '720p',
|
||||
'137': '1080p',
|
||||
'138': '>1080p',
|
||||
'139': '48k',
|
||||
'140': '128k',
|
||||
'141': '256k',
|
||||
'160': '192p',
|
||||
'171': '128k',
|
||||
'172': '256k',
|
||||
'242': '240p',
|
||||
'243': '360p',
|
||||
'244': '480p',
|
||||
'245': '480p',
|
||||
'246': '480p',
|
||||
'247': '720p',
|
||||
'248': '1080p',
|
||||
}
|
||||
_3d_itags = ['85', '84', '102', '83', '101', '82', '100']
|
||||
_special_itags = {
|
||||
'82': '3D',
|
||||
'83': '3D',
|
||||
'84': '3D',
|
||||
'85': '3D',
|
||||
'100': '3D',
|
||||
'101': '3D',
|
||||
'102': '3D',
|
||||
'133': 'DASH Video',
|
||||
'134': 'DASH Video',
|
||||
'135': 'DASH Video',
|
||||
'136': 'DASH Video',
|
||||
'137': 'DASH Video',
|
||||
'138': 'DASH Video',
|
||||
'139': 'DASH Audio',
|
||||
'140': 'DASH Audio',
|
||||
'141': 'DASH Audio',
|
||||
'160': 'DASH Video',
|
||||
'171': 'DASH Audio',
|
||||
'172': 'DASH Audio',
|
||||
'242': 'DASH Video',
|
||||
'243': 'DASH Video',
|
||||
'244': 'DASH Video',
|
||||
'245': 'DASH Video',
|
||||
'246': 'DASH Video',
|
||||
'247': 'DASH Video',
|
||||
'248': 'DASH Video',
|
||||
}
|
||||
|
||||
IE_NAME = u'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -255,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
|
||||
u"uploader": u"IconaPop",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
}
|
||||
},
|
||||
@@ -340,19 +421,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif len(s) == 88:
|
||||
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
||||
elif len(s) == 87:
|
||||
return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53]
|
||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||
elif len(s) == 86:
|
||||
return s[5:20] + s[2] + s[21:]
|
||||
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
|
||||
elif len(s) == 85:
|
||||
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
elif len(s) == 84:
|
||||
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
|
||||
elif len(s) == 83:
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
||||
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
|
||||
elif len(s) == 81:
|
||||
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
elif len(s) == 80:
|
||||
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
|
||||
elif len(s) == 79:
|
||||
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
|
||||
@@ -375,11 +458,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
try:
|
||||
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
return (u'unable to download video subtitles: %s' % compat_str(err), None)
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
|
||||
if not sub_lang_list:
|
||||
return (u'video doesn\'t have subtitles', None)
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
def _list_available_subtitles(self, video_id):
|
||||
@@ -388,8 +473,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
|
||||
"""
|
||||
Return tuple:
|
||||
(error_message, sub_lang, sub)
|
||||
Return the subtitle as a string or None if they are not found
|
||||
"""
|
||||
self.report_video_subtitles_request(video_id, sub_lang, format)
|
||||
params = compat_urllib_parse.urlencode({
|
||||
@@ -402,21 +486,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
try:
|
||||
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
return (u'Did not fetch video subtitles', None, None)
|
||||
return (None, sub_lang, sub)
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _request_automatic_caption(self, video_id, webpage):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_lang = self._downloader.params.get('subtitleslang') or 'en'
|
||||
sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
|
||||
if mobj is None:
|
||||
return [(err_msg, None, None)]
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
player_config = json.loads(mobj.group(1))
|
||||
try:
|
||||
args = player_config[u'args']
|
||||
@@ -431,40 +518,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
})
|
||||
subtitles_url = caption_url + '&' + params
|
||||
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
|
||||
return [(None, sub_lang, sub)]
|
||||
except KeyError:
|
||||
return [(err_msg, None, None)]
|
||||
|
||||
def _extract_subtitle(self, video_id):
|
||||
return {sub_lang: sub}
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _extract_subtitles(self, video_id):
|
||||
"""
|
||||
Return a list with a tuple:
|
||||
[(error_message, sub_lang, sub)]
|
||||
Return a dictionary: {language: subtitles} or {} if the subtitles
|
||||
couldn't be found
|
||||
"""
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
available_subs_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
|
||||
return [(sub_lang_list[0], None, None)]
|
||||
if self._downloader.params.get('subtitleslang', False):
|
||||
sub_lang = self._downloader.params.get('subtitleslang')
|
||||
elif 'en' in sub_lang_list:
|
||||
sub_lang = 'en'
|
||||
if not available_subs_list: #There was some error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
sub_lang_list = available_subs_list
|
||||
else:
|
||||
sub_lang = list(sub_lang_list.keys())[0]
|
||||
if not sub_lang in sub_lang_list:
|
||||
return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
|
||||
if self._downloader.params.get('subtitleslangs', False):
|
||||
reqested_langs = self._downloader.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs_list:
|
||||
reqested_langs = ['en']
|
||||
else:
|
||||
reqested_langs = [list(available_subs_list.keys())[0]]
|
||||
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
return [subtitle]
|
||||
|
||||
def _extract_all_subtitles(self, video_id):
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
|
||||
return [(sub_lang_list[0], None, None)]
|
||||
subtitles = []
|
||||
sub_lang_list = {}
|
||||
for sub_lang in reqested_langs:
|
||||
if not sub_lang in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
subtitles = {}
|
||||
for sub_lang in sub_lang_list:
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
subtitles.append(subtitle)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _print_formats(self, formats):
|
||||
@@ -472,7 +562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
|
||||
self._video_dimensions.get(x, '???'),
|
||||
' (3D)' if x in self._3d_itags else ''))
|
||||
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -655,25 +745,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# subtitles
|
||||
video_subtitles = None
|
||||
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
video_subtitles = self._extract_subtitle(video_id)
|
||||
if video_subtitles:
|
||||
(sub_error, sub_lang, sub) = video_subtitles[0]
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('writeautomaticsub', False):
|
||||
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_subtitles(video_id)
|
||||
elif self._downloader.params.get('writeautomaticsub', False):
|
||||
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
|
||||
(sub_error, sub_lang, sub) = video_subtitles[0]
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_all_subtitles(video_id)
|
||||
for video_subtitle in video_subtitles:
|
||||
(sub_error, sub_lang, sub) = video_subtitle
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
@@ -699,6 +774,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if m_s is not None:
|
||||
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||
m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
|
||||
if m_s is not None:
|
||||
if 'url_encoded_fmt_stream_map' in video_info:
|
||||
video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
|
||||
else:
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
|
||||
elif 'adaptive_fmts' in video_info:
|
||||
if 'url_encoded_fmt_stream_map' in video_info:
|
||||
video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
|
||||
else:
|
||||
video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
@@ -758,7 +844,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
|
||||
self._video_dimensions.get(format_param, '???'),
|
||||
' (3D)' if format_param in self._3d_itags else '')
|
||||
' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
|
||||
|
||||
results.append({
|
||||
'id': video_id,
|
||||
|
@@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
|
||||
else:
|
||||
return default_ext
|
||||
|
||||
def subtitles_filename(filename, sub_lang, sub_format):
|
||||
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
|
||||
def date_from_str(date_str):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.08.14'
|
||||
__version__ = '2013.08.23'
|
||||
|
Reference in New Issue
Block a user