Compare commits
110 Commits
2013.11.22
...
2013.12.02
Author | SHA1 | Date | |
---|---|---|---|
0037e02921 | |||
6ad14cab59 | |||
a9be0cc736 | |||
55a10eab48 | |||
e344693b65 | |||
355e4fd07e | |||
5e09d6abbd | |||
b138de72f2 | |||
06dcbb71d8 | |||
c5171c454b | |||
323ec6ae56 | |||
befd88b786 | |||
a3fb4675fb | |||
5f077efcb1 | |||
9986238ba9 | |||
e1f900d6a4 | |||
acf37ca151 | |||
17769d5a6c | |||
677c18092d | |||
3862402ff3 | |||
b03d0d064c | |||
d8d6148628 | |||
2be54167d0 | |||
4e0084d92e | |||
fc9e1cc697 | |||
f8f60d2793 | |||
ea07dbb8b1 | |||
2a275ab007 | |||
a2e6db365c | |||
9d93e7da6c | |||
0e44d8381a | |||
35907e23ec | |||
76d1700b28 | |||
dcca796ce4 | |||
4b19e38954 | |||
5f09bbff4d | |||
c1f9c59d11 | |||
652cdaa269 | |||
e26f871228 | |||
6e47b51eef | |||
4a98cdbf3b | |||
c5ed4e8f7e | |||
c2e52508cc | |||
d8ec4959c8 | |||
d31209a144 | |||
529a2e2cc3 | |||
781a7d0546 | |||
fb04e40396 | |||
d9b011f201 | |||
b0b9eaa196 | |||
8b134b1062 | |||
0c75c3fa7a | |||
a3927cf7ee | |||
1a62c18f65 | |||
2a15e7063b | |||
d46cc192d7 | |||
bb2bebdbe1 | |||
5db07df634 | |||
ea36cbac5e | |||
d0d2b49ab7 | |||
31cb6d8fef | |||
daa0dd2973 | |||
de79c46c8f | |||
94ccb6fa2e | |||
07e4035879 | |||
d0efb9ec9a | |||
ac05067d3d | |||
113577e155 | |||
79d09f47c2 | |||
c059bdd432 | |||
02dbf93f0e | |||
1fb2bcbbf7 | |||
16e055849e | |||
66cfab4226 | |||
6d88bc37a3 | |||
b7553b2554 | |||
e03db0a077 | |||
a1ee09e815 | |||
267ed0c5d3 | |||
f459d17018 | |||
dc65dcbb6d | |||
d214fdb8fe | |||
138df537ff | |||
0c7c19d6bc | |||
eaaafc59c2 | |||
382ed50e0e | |||
66ec019240 | |||
bd49928f7a | |||
23e6d50d73 | |||
2e767313e4 | |||
38b2db6a66 | |||
13ebea791f | |||
4c9c57428f | |||
8bf9319e9c | |||
4914120727 | |||
36de0a0e1a | |||
e5c146d586 | |||
52ad14aeb0 | |||
43afe28588 | |||
a87b0615aa | |||
d7386f6276 | |||
081640940e | |||
7012b23c94 | |||
d3b30148ed | |||
9f79463803 | |||
d35dc6d3b5 | |||
dca0872056 | |||
2b35c9ef74 | |||
4894fe8c5b | |||
d5a9bb4ea9 |
@ -56,7 +56,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--no-playlist download only the currently playing video
|
--no-playlist download only the currently playing video
|
||||||
--age-limit YEARS download only videos suitable for the given age
|
--age-limit YEARS download only videos suitable for the given age
|
||||||
--download-archive FILE Download only videos not present in the archive
|
--download-archive FILE Download only videos not present in the archive
|
||||||
file. Record all downloaded videos in it.
|
file. Record the IDs of all downloaded videos in
|
||||||
|
it.
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
||||||
@ -130,11 +131,11 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
-v, --verbose print various debugging information
|
-v, --verbose print various debugging information
|
||||||
--dump-intermediate-pages print downloaded pages to debug problems(very
|
--dump-intermediate-pages print downloaded pages to debug problems(very
|
||||||
verbose)
|
verbose)
|
||||||
--write-pages Write downloaded pages to files in the current
|
--write-pages Write downloaded intermediary pages to files in
|
||||||
directory
|
the current directory to debug problems
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specifiy the order of
|
-f, --format FORMAT video format code, specify the order of
|
||||||
preference using slashes: "-f 22/17/18". "-f mp4"
|
preference using slashes: "-f 22/17/18". "-f mp4"
|
||||||
and "-f flv" are also supported
|
and "-f flv" are also supported
|
||||||
--all-formats download all available video formats
|
--all-formats download all available video formats
|
||||||
|
@ -1,10 +1,21 @@
|
|||||||
__youtube_dl()
|
__youtube_dl()
|
||||||
{
|
{
|
||||||
local cur prev opts
|
local cur prev opts fileopts diropts keywords
|
||||||
COMPREPLY=()
|
COMPREPLY=()
|
||||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||||
|
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
||||||
opts="{{flags}}"
|
opts="{{flags}}"
|
||||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
||||||
|
fileopts="-a|--batch-file|--download-archive|--cookies"
|
||||||
|
diropts="--cache-dir"
|
||||||
|
|
||||||
|
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ ${cur} =~ : ]]; then
|
if [[ ${cur} =~ : ]]; then
|
||||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||||
|
@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
|
|||||||
from youtube_dl.utils import preferredencoding
|
from youtube_dl.utils import preferredencoding
|
||||||
|
|
||||||
|
|
||||||
def global_setup():
|
|
||||||
youtube_dl._setup_opener(timeout=10)
|
|
||||||
|
|
||||||
|
|
||||||
def get_params(override=None):
|
def get_params(override=None):
|
||||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
"parameters.json")
|
"parameters.json")
|
||||||
|
@ -39,5 +39,6 @@
|
|||||||
"writeinfojson": true,
|
"writeinfojson": true,
|
||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false
|
"listssubtitles": false,
|
||||||
|
"socket_timeout": 20
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup, try_rm
|
from test.helper import try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
@ -100,10 +100,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_keywords(self):
|
def test_keywords(self):
|
||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':tds', ['ComedyCentral'])
|
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':cr', ['ComedyCentral'])
|
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
||||||
|
self.assertMatch(':cr', ['ComedyCentralShows'])
|
||||||
|
|
||||||
|
def test_vimeo_matching(self):
|
||||||
|
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
|
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import (
|
from test.helper import (
|
||||||
get_params,
|
get_params,
|
||||||
get_testcases,
|
get_testcases,
|
||||||
global_setup,
|
|
||||||
try_rm,
|
try_rm,
|
||||||
md5,
|
md5,
|
||||||
report_warning
|
report_warning
|
||||||
)
|
)
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -8,14 +8,14 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
VimeoChannelIE,
|
VimeoChannelIE,
|
||||||
|
VimeoUserIE,
|
||||||
UstreamChannelIE,
|
UstreamChannelIE,
|
||||||
SoundcloudSetIE,
|
SoundcloudSetIE,
|
||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
@ -55,6 +55,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||||
self.assertTrue(len(result['entries']) > 24)
|
self.assertTrue(len(result['entries']) > 24)
|
||||||
|
|
||||||
|
def test_vimeo_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = VimeoUserIE(dl)
|
||||||
|
result = ie.extract('http://vimeo.com/nkistudio/videos')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Nki')
|
||||||
|
self.assertTrue(len(result['entries']) > 65)
|
||||||
|
|
||||||
def test_ustream_channel(self):
|
def test_ustream_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = UstreamChannelIE(dl)
|
ie = UstreamChannelIE(dl)
|
||||||
@ -102,7 +110,7 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], u'pixelversity')
|
self.assertEqual(result['title'], u'pixelversity')
|
||||||
self.assertTrue(len(result['entries']) >= 66)
|
self.assertTrue(len(result['entries']) >= 60)
|
||||||
|
|
||||||
def test_bandcamp_album(self):
|
def test_bandcamp_album(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
|
@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
from test.helper import FakeYDL, md5
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['subtitlesformat'] = 'vtt'
|
self.DL.params['subtitlesformat'] = 'vtt'
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
def test_youtube_list_subtitles(self):
|
||||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||||
|
@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup, try_rm
|
from test.helper import get_params, try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup
|
from test.helper import get_params
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
@ -84,16 +83,16 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeChannelIE(dl)
|
ie = YoutubeChannelIE(dl)
|
||||||
#test paginated channel
|
#test paginated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
self.assertTrue(len(result['entries']) > 90)
|
||||||
#test autogenerated channel
|
#test autogenerated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
self.assertTrue(len(result['entries']) >= 18)
|
||||||
|
|
||||||
def test_youtube_user(self):
|
def test_youtube_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeUserIE(dl)
|
ie = YoutubeUserIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
self.assertTrue(len(result['entries']) >= 320)
|
||||||
|
|
||||||
def test_youtube_safe_search(self):
|
def test_youtube_safe_search(self):
|
||||||
@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||||
self.assertTrue(len(result) >= 3)
|
self.assertTrue(len(result) >= 3)
|
||||||
|
|
||||||
|
def test_youtube_mix(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertTrue(len(entries) >= 20)
|
||||||
|
original_video = entries[0]
|
||||||
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -6,9 +6,6 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import math
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -11,6 +10,7 @@ from .utils import (
|
|||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
format_bytes,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
)
|
)
|
||||||
@ -53,20 +53,6 @@ class FileDownloader(object):
|
|||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format_bytes(bytes):
|
|
||||||
if bytes is None:
|
|
||||||
return 'N/A'
|
|
||||||
if type(bytes) is str:
|
|
||||||
bytes = float(bytes)
|
|
||||||
if bytes == 0.0:
|
|
||||||
exponent = 0
|
|
||||||
else:
|
|
||||||
exponent = int(math.log(bytes, 1024.0))
|
|
||||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
|
||||||
converted = float(bytes) / float(1024 ** exponent)
|
|
||||||
return '%.2f%s' % (converted, suffix)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
(mins, secs) = divmod(seconds, 60)
|
(mins, secs) = divmod(seconds, 60)
|
||||||
@ -117,7 +103,7 @@ class FileDownloader(object):
|
|||||||
def format_speed(speed):
|
def format_speed(speed):
|
||||||
if speed is None:
|
if speed is None:
|
||||||
return '%10s' % '---b/s'
|
return '%10s' % '---b/s'
|
||||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
@ -270,6 +256,61 @@ class FileDownloader(object):
|
|||||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||||
|
|
||||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||||
|
def run_rtmpdump(args):
|
||||||
|
start = time.time()
|
||||||
|
resume_percent = None
|
||||||
|
resume_downloaded_data_len = None
|
||||||
|
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||||
|
cursor_in_new_line = True
|
||||||
|
proc_stderr_closed = False
|
||||||
|
while not proc_stderr_closed:
|
||||||
|
# read line from stderr
|
||||||
|
line = u''
|
||||||
|
while True:
|
||||||
|
char = proc.stderr.read(1)
|
||||||
|
if not char:
|
||||||
|
proc_stderr_closed = True
|
||||||
|
break
|
||||||
|
if char in [b'\r', b'\n']:
|
||||||
|
break
|
||||||
|
line += char.decode('ascii', 'replace')
|
||||||
|
if not line:
|
||||||
|
# proc_stderr_closed is True
|
||||||
|
continue
|
||||||
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||||
|
if mobj:
|
||||||
|
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||||
|
percent = float(mobj.group(2))
|
||||||
|
if not resume_percent:
|
||||||
|
resume_percent = percent
|
||||||
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
|
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||||
|
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||||
|
data_len = None
|
||||||
|
if percent > 0:
|
||||||
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
|
data_len_str = u'~' + format_bytes(data_len)
|
||||||
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
cursor_in_new_line = False
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
|
})
|
||||||
|
elif self.params.get('verbose', False):
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
cursor_in_new_line = True
|
||||||
|
self.to_screen(u'[rtmpdump] '+line)
|
||||||
|
proc.wait()
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
return proc.returncode
|
||||||
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
test = self.params.get('test', False)
|
test = self.params.get('test', False)
|
||||||
@ -280,12 +321,11 @@ class FileDownloader(object):
|
|||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||||
return False
|
return False
|
||||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
# the connection was interrumpted and resuming appears to be
|
# the connection was interrumpted and resuming appears to be
|
||||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||||
if player_url is not None:
|
if player_url is not None:
|
||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
@ -299,30 +339,48 @@ class FileDownloader(object):
|
|||||||
if live:
|
if live:
|
||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||||
|
|
||||||
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
|
# Windows subprocess module does not actually support Unicode
|
||||||
|
# on Python 2.x
|
||||||
|
# See http://stackoverflow.com/a/9951851/35070
|
||||||
|
subprocess_encoding = sys.getfilesystemencoding()
|
||||||
|
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||||
|
else:
|
||||||
|
subprocess_encoding = None
|
||||||
|
|
||||||
if self.params.get('verbose', False):
|
if self.params.get('verbose', False):
|
||||||
|
if subprocess_encoding:
|
||||||
|
str_args = [
|
||||||
|
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||||
|
for a in args]
|
||||||
|
else:
|
||||||
|
str_args = args
|
||||||
try:
|
try:
|
||||||
import pipes
|
import pipes
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
except ImportError:
|
except ImportError:
|
||||||
shell_quote = repr
|
shell_quote = repr
|
||||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
|
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||||
retval = subprocess.call(args)
|
|
||||||
|
retval = run_rtmpdump(args)
|
||||||
|
|
||||||
while (retval == 2 or retval == 1) and not test:
|
while (retval == 2 or retval == 1) and not test:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == 1:
|
if prevsize == cursize and retval == 1:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||||
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = 0
|
retval = 0
|
||||||
break
|
break
|
||||||
if retval == 0 or (test and retval == 2):
|
if retval == 0 or (test and retval == 2):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
@ -525,7 +583,7 @@ class FileDownloader(object):
|
|||||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = self.format_bytes(data_len)
|
data_len_str = format_bytes(data_len)
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
@ -7,8 +7,10 @@ import errno
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import subprocess
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@ -18,6 +20,7 @@ if os.name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
compat_cookiejar,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -30,9 +33,12 @@ from .utils import (
|
|||||||
DownloadError,
|
DownloadError,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
format_bytes,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
|
platform_name,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -41,9 +47,11 @@ from .utils import (
|
|||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
|
YoutubeDLHandler,
|
||||||
)
|
)
|
||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
from .FileDownloader import FileDownloader
|
from .FileDownloader import FileDownloader
|
||||||
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
@ -97,6 +105,7 @@ class YoutubeDL(object):
|
|||||||
playlistend: Playlist item to end at.
|
playlistend: Playlist item to end at.
|
||||||
matchtitle: Download only matching titles.
|
matchtitle: Download only matching titles.
|
||||||
rejecttitle: Reject downloads for matching titles.
|
rejecttitle: Reject downloads for matching titles.
|
||||||
|
logger: Log messages to a logging.Logger instance.
|
||||||
logtostderr: Log messages to stderr instead of stdout.
|
logtostderr: Log messages to stderr instead of stdout.
|
||||||
writedescription: Write the video description to a .description file
|
writedescription: Write the video description to a .description file
|
||||||
writeinfojson: Write the video description to a .info.json file
|
writeinfojson: Write the video description to a .info.json file
|
||||||
@ -117,9 +126,13 @@ class YoutubeDL(object):
|
|||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
age_limit: An integer representing the user's age in years.
|
age_limit: An integer representing the user's age in years.
|
||||||
Unsuitable videos for the given age are skipped.
|
Unsuitable videos for the given age are skipped.
|
||||||
downloadarchive: File name of a file where all downloads are recorded.
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
|
proxy: URL of the proxy server to use
|
||||||
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -134,7 +147,7 @@ class YoutubeDL(object):
|
|||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
_screen_file = None
|
_screen_file = None
|
||||||
|
|
||||||
def __init__(self, params):
|
def __init__(self, params=None):
|
||||||
"""Create a FileDownloader object with the given options."""
|
"""Create a FileDownloader object with the given options."""
|
||||||
self._ies = []
|
self._ies = []
|
||||||
self._ies_instances = {}
|
self._ies_instances = {}
|
||||||
@ -143,6 +156,7 @@ class YoutubeDL(object):
|
|||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
|
self.params = {} if params is None else params
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
@ -152,14 +166,15 @@ class YoutubeDL(object):
|
|||||||
u'Assuming --restrict-filenames since file system encoding '
|
u'Assuming --restrict-filenames since file system encoding '
|
||||||
u'cannot encode all charactes. '
|
u'cannot encode all charactes. '
|
||||||
u'Set the LC_ALL environment variable to fix this.')
|
u'Set the LC_ALL environment variable to fix this.')
|
||||||
params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
self.params = params
|
|
||||||
self.fd = FileDownloader(self, self.params)
|
self.fd = FileDownloader(self, self.params)
|
||||||
|
|
||||||
if '%(stitle)s' in self.params['outtmpl']:
|
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||||
|
|
||||||
|
self._setup_opener()
|
||||||
|
|
||||||
def add_info_extractor(self, ie):
|
def add_info_extractor(self, ie):
|
||||||
"""Add an InfoExtractor object to the end of the list."""
|
"""Add an InfoExtractor object to the end of the list."""
|
||||||
self._ies.append(ie)
|
self._ies.append(ie)
|
||||||
@ -192,7 +207,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def to_screen(self, message, skip_eol=False):
|
def to_screen(self, message, skip_eol=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if not self.params.get('quiet', False):
|
if self.params.get('logger'):
|
||||||
|
self.params['logger'].debug(message)
|
||||||
|
elif not self.params.get('quiet', False):
|
||||||
terminator = [u'\n', u''][skip_eol]
|
terminator = [u'\n', u''][skip_eol]
|
||||||
output = message + terminator
|
output = message + terminator
|
||||||
write_string(output, self._screen_file)
|
write_string(output, self._screen_file)
|
||||||
@ -200,10 +217,13 @@ class YoutubeDL(object):
|
|||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
"""Print message to stderr."""
|
"""Print message to stderr."""
|
||||||
assert type(message) == type(u'')
|
assert type(message) == type(u'')
|
||||||
output = message + u'\n'
|
if self.params.get('logger'):
|
||||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
self.params['logger'].error(message)
|
||||||
output = output.encode(preferredencoding())
|
else:
|
||||||
sys.stderr.write(output)
|
output = message + u'\n'
|
||||||
|
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||||
|
output = output.encode(preferredencoding())
|
||||||
|
sys.stderr.write(output)
|
||||||
|
|
||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
@ -235,10 +255,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.restore_console_title()
|
self.restore_console_title()
|
||||||
|
|
||||||
def fixed_template(self):
|
if self.params.get('cookiefile') is not None:
|
||||||
"""Checks if the output template is fixed."""
|
self.cookiejar.save()
|
||||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, message=None, tb=None):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
@ -355,15 +374,17 @@ class YoutubeDL(object):
|
|||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
title = info_dict['title']
|
if 'title' in info_dict:
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
# This can happen when we're just evaluating the playlist
|
||||||
if matchtitle:
|
title = info_dict['title']
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
if matchtitle:
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
if rejecttitle:
|
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
if rejecttitle:
|
||||||
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
|
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
date = info_dict.get('upload_date', None)
|
date = info_dict.get('upload_date', None)
|
||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
@ -374,8 +395,8 @@ class YoutubeDL(object):
|
|||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%(title)s has already been recorded in archive'
|
return (u'%s has already been recorded in archive'
|
||||||
% info_dict)
|
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -454,7 +475,7 @@ class YoutubeDL(object):
|
|||||||
ie_key=ie_result.get('ie_key'),
|
ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info)
|
extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist':
|
||||||
self.add_extra_info(ie_result, extra_info)
|
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||||
@ -484,6 +505,12 @@ class YoutubeDL(object):
|
|||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reason = self._match_entry(entry)
|
||||||
|
if reason is not None:
|
||||||
|
self.to_screen(u'[download] ' + reason)
|
||||||
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.process_ie_result(entry,
|
||||||
download=download,
|
download=download,
|
||||||
extra_info=extra)
|
extra_info=extra)
|
||||||
@ -768,13 +795,15 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
if len(url_list) > 1 and self.fixed_template():
|
if (len(url_list) > 1 and
|
||||||
|
'%' not in self.params['outtmpl']
|
||||||
|
and self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(self.params['outtmpl'])
|
raise SameFileError(self.params['outtmpl'])
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
#It also downloads the videos
|
#It also downloads the videos
|
||||||
videos = self.extract_info(url)
|
self.extract_info(url)
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error(u'unable to download video')
|
self.report_error(u'unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
@ -806,11 +835,26 @@ class YoutubeDL(object):
|
|||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning(u'Unable to remove downloaded video file')
|
self.report_warning(u'Unable to remove downloaded video file')
|
||||||
|
|
||||||
|
def _make_archive_id(self, info_dict):
|
||||||
|
# Future-proof against any change in case
|
||||||
|
# and backwards compatibility with prior versions
|
||||||
|
extractor = info_dict.get('extractor_key')
|
||||||
|
if extractor is None:
|
||||||
|
if 'id' in info_dict:
|
||||||
|
extractor = info_dict.get('ie_key') # key in a playlist
|
||||||
|
if extractor is None:
|
||||||
|
return None # Incomplete video information
|
||||||
|
return extractor.lower() + u' ' + info_dict['id']
|
||||||
|
|
||||||
def in_download_archive(self, info_dict):
|
def in_download_archive(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return False
|
return False
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
|
||||||
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
if vid_id is None:
|
||||||
|
return False # Incomplete video information
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
@ -825,12 +869,15 @@ class YoutubeDL(object):
|
|||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return
|
return
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
assert vid_id
|
||||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
archive_file.write(vid_id + u'\n')
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_resolution(format, default='unknown'):
|
def format_resolution(format, default='unknown'):
|
||||||
|
if format.get('vcodec') == 'none':
|
||||||
|
return 'audio only'
|
||||||
if format.get('_resolution') is not None:
|
if format.get('_resolution') is not None:
|
||||||
return format['_resolution']
|
return format['_resolution']
|
||||||
if format.get('height') is not None:
|
if format.get('height') is not None:
|
||||||
@ -844,10 +891,11 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
def format_note(fdict):
|
def format_note(fdict):
|
||||||
if fdict.get('format_note') is not None:
|
|
||||||
return fdict['format_note']
|
|
||||||
res = u''
|
res = u''
|
||||||
if fdict.get('vcodec') is not None:
|
if fdict.get('format_note') is not None:
|
||||||
|
res += fdict['format_note'] + u' '
|
||||||
|
if (fdict.get('vcodec') is not None and
|
||||||
|
fdict.get('vcodec') != 'none'):
|
||||||
res += u'%-5s' % fdict['vcodec']
|
res += u'%-5s' % fdict['vcodec']
|
||||||
elif fdict.get('vbr') is not None:
|
elif fdict.get('vbr') is not None:
|
||||||
res += u'video'
|
res += u'video'
|
||||||
@ -863,25 +911,103 @@ class YoutubeDL(object):
|
|||||||
res += 'audio'
|
res += 'audio'
|
||||||
if fdict.get('abr') is not None:
|
if fdict.get('abr') is not None:
|
||||||
res += u'@%3dk' % fdict['abr']
|
res += u'@%3dk' % fdict['abr']
|
||||||
|
if fdict.get('filesize') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += format_bytes(fdict['filesize'])
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def line(format):
|
def line(format, idlen=20):
|
||||||
return (u'%-20s%-10s%-12s%s' % (
|
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||||
format['format_id'],
|
format['format_id'],
|
||||||
format['ext'],
|
format['ext'],
|
||||||
self.format_resolution(format),
|
self.format_resolution(format),
|
||||||
format_note(format),
|
format_note(format),
|
||||||
)
|
))
|
||||||
)
|
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
formats_s = list(map(line, formats))
|
idlen = max(len(u'format code'),
|
||||||
|
max(len(f['format_id']) for f in formats))
|
||||||
|
formats_s = [line(f, idlen) for f in formats]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||||
|
|
||||||
header_line = line({
|
header_line = line({
|
||||||
'format_id': u'format code', 'ext': u'extension',
|
'format_id': u'format code', 'ext': u'extension',
|
||||||
'_resolution': u'resolution', 'format_note': u'note'})
|
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||||
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||||
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||||
|
|
||||||
|
def urlopen(self, req):
|
||||||
|
""" Start an HTTP download """
|
||||||
|
return self._opener.open(req)
|
||||||
|
|
||||||
|
def print_debug_header(self):
|
||||||
|
if not self.params.get('verbose'):
|
||||||
|
return
|
||||||
|
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||||
|
try:
|
||||||
|
sp = subprocess.Popen(
|
||||||
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
out, err = sp.communicate()
|
||||||
|
out = out.decode().strip()
|
||||||
|
if re.match('[0-9a-f]+', out):
|
||||||
|
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
sys.exc_clear()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
write_string(u'[debug] Python version %s - %s' %
|
||||||
|
(platform.python_version(), platform_name()) + u'\n')
|
||||||
|
|
||||||
|
proxy_map = {}
|
||||||
|
for handler in self._opener.handlers:
|
||||||
|
if hasattr(handler, 'proxies'):
|
||||||
|
proxy_map.update(handler.proxies)
|
||||||
|
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||||
|
|
||||||
|
def _setup_opener(self):
|
||||||
|
timeout_val = self.params.get('socket_timeout')
|
||||||
|
timeout = 600 if timeout_val is None else float(timeout_val)
|
||||||
|
|
||||||
|
opts_cookiefile = self.params.get('cookiefile')
|
||||||
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
|
if opts_cookiefile is None:
|
||||||
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
|
else:
|
||||||
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
|
opts_cookiefile)
|
||||||
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
self.cookiejar.load()
|
||||||
|
|
||||||
|
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
||||||
|
self.cookiejar)
|
||||||
|
if opts_proxy is not None:
|
||||||
|
if opts_proxy == '':
|
||||||
|
proxies = {}
|
||||||
|
else:
|
||||||
|
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||||
|
else:
|
||||||
|
proxies = compat_urllib_request.getproxies()
|
||||||
|
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||||
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
|
proxies['https'] = proxies['http']
|
||||||
|
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||||
|
https_handler = make_HTTPS_handler(
|
||||||
|
self.params.get('nocheckcertificate', False))
|
||||||
|
opener = compat_urllib_request.build_opener(
|
||||||
|
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||||
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
|
opener.addheaders = []
|
||||||
|
self._opener = opener
|
||||||
|
|
||||||
|
# TODO remove this global modification
|
||||||
|
compat_urllib_request.install_opener(opener)
|
||||||
|
socket.setdefaulttimeout(timeout)
|
||||||
|
@ -35,50 +35,41 @@ __authors__ = (
|
|||||||
'Jelle van der Waa',
|
'Jelle van der Waa',
|
||||||
'Marcin Cieślak',
|
'Marcin Cieślak',
|
||||||
'Anton Larionov',
|
'Anton Larionov',
|
||||||
|
'Takuya Tsuchida',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import collections
|
|
||||||
import getpass
|
import getpass
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import socket
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
|
||||||
import platform
|
|
||||||
|
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
compat_cookiejar,
|
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_str,
|
|
||||||
compat_urllib_request,
|
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
make_HTTPS_handler,
|
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
platform_name,
|
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
YoutubeDLHandler,
|
|
||||||
)
|
)
|
||||||
from .update import update_self
|
from .update import update_self
|
||||||
from .version import __version__
|
|
||||||
from .FileDownloader import (
|
from .FileDownloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors
|
||||||
|
from .version import __version__
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
from .PostProcessor import (
|
from .PostProcessor import (
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
@ -207,6 +198,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||||
help='Disable filesystem caching')
|
help='Disable filesystem caching')
|
||||||
|
general.add_option(
|
||||||
|
'--socket-timeout', dest='socket_timeout',
|
||||||
|
type=float, default=None, help=optparse.SUPPRESS_HELP)
|
||||||
|
|
||||||
|
|
||||||
selection.add_option('--playlist-start',
|
selection.add_option('--playlist-start',
|
||||||
@ -215,7 +209,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
||||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||||
|
dest='max_downloads', type=int, default=None,
|
||||||
|
help='Abort after downloading NUMBER files')
|
||||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
@ -227,7 +223,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
default=None, type=int)
|
default=None, type=int)
|
||||||
selection.add_option('--download-archive', metavar='FILE',
|
selection.add_option('--download-archive', metavar='FILE',
|
||||||
dest='download_archive',
|
dest='download_archive',
|
||||||
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
|
||||||
|
|
||||||
|
|
||||||
authentication.add_option('-u', '--username',
|
authentication.add_option('-u', '--username',
|
||||||
@ -242,7 +238,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
video_format.add_option('-f', '--format',
|
video_format.add_option('-f', '--format',
|
||||||
action='store', dest='format', metavar='FORMAT', default='best',
|
action='store', dest='format', metavar='FORMAT', default='best',
|
||||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||||
video_format.add_option('--all-formats',
|
video_format.add_option('--all-formats',
|
||||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||||
video_format.add_option('--prefer-free-formats',
|
video_format.add_option('--prefer-free-formats',
|
||||||
@ -324,7 +320,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='print downloaded pages to debug problems(very verbose)')
|
help='print downloaded pages to debug problems(very verbose)')
|
||||||
verbosity.add_option('--write-pages',
|
verbosity.add_option('--write-pages',
|
||||||
action='store_true', dest='write_pages', default=False,
|
action='store_true', dest='write_pages', default=False,
|
||||||
help='Write downloaded pages to files in the current directory')
|
help='Write downloaded intermediary pages to files in the current directory to debug problems')
|
||||||
verbosity.add_option('--youtube-print-sig-code',
|
verbosity.add_option('--youtube-print-sig-code',
|
||||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
@ -451,19 +447,6 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
|
|
||||||
# Open appropriate CookieJar
|
|
||||||
if opts.cookiefile is None:
|
|
||||||
jar = compat_cookiejar.CookieJar()
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
|
|
||||||
if os.access(opts.cookiefile, os.R_OK):
|
|
||||||
jar.load()
|
|
||||||
except (IOError, OSError) as err:
|
|
||||||
if opts.verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
write_string(u'ERROR: unable to open cookie file\n')
|
|
||||||
sys.exit(101)
|
|
||||||
# Set user agent
|
# Set user agent
|
||||||
if opts.user_agent is not None:
|
if opts.user_agent is not None:
|
||||||
std_headers['User-Agent'] = opts.user_agent
|
std_headers['User-Agent'] = opts.user_agent
|
||||||
@ -495,8 +478,6 @@ def _real_main(argv=None):
|
|||||||
all_urls = batchurls + args
|
all_urls = batchurls + args
|
||||||
all_urls = [url.strip() for url in all_urls]
|
all_urls = [url.strip() for url in all_urls]
|
||||||
|
|
||||||
opener = _setup_opener(jar=jar, opts=opts)
|
|
||||||
|
|
||||||
extractors = gen_extractors()
|
extractors = gen_extractors()
|
||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
@ -551,7 +532,7 @@ def _real_main(argv=None):
|
|||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
try:
|
try:
|
||||||
opts.retries = int(opts.retries)
|
opts.retries = int(opts.retries)
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid retry count specified')
|
parser.error(u'invalid retry count specified')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
@ -562,13 +543,13 @@ def _real_main(argv=None):
|
|||||||
opts.playliststart = int(opts.playliststart)
|
opts.playliststart = int(opts.playliststart)
|
||||||
if opts.playliststart <= 0:
|
if opts.playliststart <= 0:
|
||||||
raise ValueError(u'Playlist start must be positive')
|
raise ValueError(u'Playlist start must be positive')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist start number specified')
|
parser.error(u'invalid playlist start number specified')
|
||||||
try:
|
try:
|
||||||
opts.playlistend = int(opts.playlistend)
|
opts.playlistend = int(opts.playlistend)
|
||||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
||||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist end number specified')
|
parser.error(u'invalid playlist end number specified')
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
@ -671,34 +652,14 @@ def _real_main(argv=None):
|
|||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': opts.download_archive,
|
'download_archive': opts.download_archive,
|
||||||
|
'cookiefile': opts.cookiefile,
|
||||||
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
|
'proxy': opts.proxy,
|
||||||
|
'socket_timeout': opts.socket_timeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
if opts.verbose:
|
ydl.print_debug_header()
|
||||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
|
||||||
try:
|
|
||||||
sp = subprocess.Popen(
|
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
||||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
out, err = sp.communicate()
|
|
||||||
out = out.decode().strip()
|
|
||||||
if re.match('[0-9a-f]+', out):
|
|
||||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
sys.exc_clear()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
write_string(u'[debug] Python version %s - %s' %
|
|
||||||
(platform.python_version(), platform_name()) + u'\n')
|
|
||||||
|
|
||||||
proxy_map = {}
|
|
||||||
for handler in opener.handlers:
|
|
||||||
if hasattr(handler, 'proxies'):
|
|
||||||
proxy_map.update(handler.proxies)
|
|
||||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
|
||||||
|
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
@ -729,46 +690,9 @@ def _real_main(argv=None):
|
|||||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||||
retcode = 101
|
retcode = 101
|
||||||
|
|
||||||
# Dump cookie jar if requested
|
|
||||||
if opts.cookiefile is not None:
|
|
||||||
try:
|
|
||||||
jar.save()
|
|
||||||
except (IOError, OSError):
|
|
||||||
sys.exit(u'ERROR: unable to save cookie jar')
|
|
||||||
|
|
||||||
sys.exit(retcode)
|
sys.exit(retcode)
|
||||||
|
|
||||||
|
|
||||||
def _setup_opener(jar=None, opts=None, timeout=300):
|
|
||||||
if opts is None:
|
|
||||||
FakeOptions = collections.namedtuple(
|
|
||||||
'FakeOptions', ['proxy', 'no_check_certificate'])
|
|
||||||
opts = FakeOptions(proxy=None, no_check_certificate=False)
|
|
||||||
|
|
||||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
|
||||||
if opts.proxy is not None:
|
|
||||||
if opts.proxy == '':
|
|
||||||
proxies = {}
|
|
||||||
else:
|
|
||||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
|
||||||
else:
|
|
||||||
proxies = compat_urllib_request.getproxies()
|
|
||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
|
||||||
proxies['https'] = proxies['http']
|
|
||||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
|
||||||
https_handler = make_HTTPS_handler(opts)
|
|
||||||
opener = compat_urllib_request.build_opener(
|
|
||||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
|
||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
|
||||||
opener.addheaders = []
|
|
||||||
compat_urllib_request.install_opener(opener)
|
|
||||||
socket.setdefaulttimeout(timeout)
|
|
||||||
return opener
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
try:
|
try:
|
||||||
_real_main(argv)
|
_real_main(argv)
|
||||||
|
@ -20,9 +20,11 @@ from .c56 import C56IE
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
|
from .clipfish import ClipfishIE
|
||||||
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cnn import CNNIE
|
from .cnn import CNNIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
@ -70,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
|
|||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .ign import IGNIE, OneUPIE
|
from .ign import IGNIE, OneUPIE
|
||||||
|
from .imdb import ImdbIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
@ -98,11 +101,13 @@ from .nba import NBAIE
|
|||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
|
from .niconico import NiconicoIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .podomatic import PodomaticIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@ -154,8 +159,13 @@ from .viddler import ViddlerIE
|
|||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .vimeo import VimeoIE, VimeoChannelIE
|
from .vimeo import (
|
||||||
|
VimeoIE,
|
||||||
|
VimeoChannelIE,
|
||||||
|
VimeoUserIE,
|
||||||
|
)
|
||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .websurg import WeBSurgIE
|
from .websurg import WeBSurgIE
|
||||||
@ -166,7 +176,11 @@ from .xhamster import XHamsterIE
|
|||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeIE
|
from .xtube import XTubeIE
|
||||||
from .yahoo import YahooIE, YahooSearchIE
|
from .yahoo import (
|
||||||
|
YahooIE,
|
||||||
|
YahooNewsIE,
|
||||||
|
YahooSearchIE,
|
||||||
|
)
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
@ -183,6 +197,7 @@ from .youtube import (
|
|||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
|
YoutubeHistoryIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
|
|||||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||||
webpage, u'key')
|
webpage, u'key')
|
||||||
|
|
||||||
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||||
key)
|
key)
|
||||||
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
|
||||||
|
|
||||||
video_title = config_xml.find('title').text
|
video_title = config_xml.find('title').text
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract form http://liveweb.arte.tv/"""
|
"""Extract form http://liveweb.arte.tv/"""
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||||
video_id, u'Downloading information')
|
video_id, u'Downloading information')
|
||||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
event_doc = config_doc.find('event')
|
event_doc = config_doc.find('event')
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
url_node = event_doc.find('video').find('urlHd')
|
||||||
if url_node is None:
|
if url_node is None:
|
||||||
|
@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
|
|||||||
u'uploader': u'pixelversity',
|
u'uploader': u'pixelversity',
|
||||||
u'uploader_id': u'344706',
|
u'uploader_id': u'344706',
|
||||||
},
|
},
|
||||||
|
u'params': {
|
||||||
|
# It doesn't respect the 'Range' header, it would download the whole video
|
||||||
|
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
|
|||||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||||
},
|
},
|
||||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
|
||||||
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
|
||||||
u'playlist': [
|
|
||||||
{
|
|
||||||
u'file': u'1353101989.mp3',
|
|
||||||
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Intro',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u'file': u'38097443.mp3',
|
|
||||||
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
],
|
|
||||||
u'params': {
|
|
||||||
u'playlistend': 2
|
|
||||||
},
|
|
||||||
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -56,20 +34,17 @@ class BandcampIE(InfoExtractor):
|
|||||||
json_code = m_trackinfo.group(1)
|
json_code = m_trackinfo.group(1)
|
||||||
data = json.loads(json_code)
|
data = json.loads(json_code)
|
||||||
|
|
||||||
entries = []
|
|
||||||
for d in data:
|
for d in data:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'format_id',
|
'format_id': 'format_id',
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'ext': format_id.partition('-')[0]
|
'ext': format_id.partition('-')[0]
|
||||||
} for format_id, format_url in sorted(d['file'].items())]
|
} for format_id, format_url in sorted(d['file'].items())]
|
||||||
entries.append({
|
return {
|
||||||
'id': compat_str(d['id']),
|
'id': compat_str(d['id']),
|
||||||
'title': d['title'],
|
'title': d['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
}
|
||||||
|
|
||||||
return self.playlist_result(entries, title, title)
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'No free songs found')
|
raise ExtractorError(u'No free songs found')
|
||||||
|
|
||||||
@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
IE_NAME = u'Bandcamp:album'
|
IE_NAME = u'Bandcamp:album'
|
||||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
u'playlist': [
|
||||||
|
{
|
||||||
|
u'file': u'1353101989.mp3',
|
||||||
|
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Intro',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'file': u'38097443.mp3',
|
||||||
|
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
u'params': {
|
||||||
|
u'playlistend': 2
|
||||||
|
},
|
||||||
|
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
|
@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
params = {'flashID': object_doc.attrib['id'],
|
params = {'flashID': object_doc.attrib['id'],
|
||||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||||
}
|
}
|
||||||
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
return None
|
||||||
|
playerKey = find_param('playerKey')
|
||||||
# Not all pages define this value
|
# Not all pages define this value
|
||||||
if playerKey is not None:
|
if playerKey is not None:
|
||||||
params['playerKey'] = playerKey.attrib['value']
|
params['playerKey'] = playerKey
|
||||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
# The three fields hold the id of the video
|
||||||
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
params['@videoPlayer'] = videoPlayer
|
||||||
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
|
linkBase = find_param('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
params['linkBaseURL'] = linkBase.attrib['value']
|
params['linkBaseURL'] = linkBase
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
info_page = self._download_webpage(info_url,video_id,
|
doc = self._download_xml(info_url,video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
infos = video_info.find('INFOS')
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
|
58
youtube_dl/extractor/clipfish.py
Normal file
58
youtube_dl/extractor/clipfish.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import re
|
||||||
|
import time
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class ClipfishIE(InfoExtractor):
|
||||||
|
IE_NAME = u'clipfish'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||||
|
u'file': u'3966754.mp4',
|
||||||
|
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||||
|
u'duration': 82,
|
||||||
|
},
|
||||||
|
u'skip': 'Blocked in the US'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||||
|
(video_id, int(time.time())))
|
||||||
|
doc = self._download_xml(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
title = doc.find('title').text
|
||||||
|
video_url = doc.find('filename').text
|
||||||
|
if video_url is None:
|
||||||
|
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||||
|
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||||
|
xml_bytes)
|
||||||
|
thumbnail = doc.find('imageurl').text
|
||||||
|
duration_str = doc.find('duration').text
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||||
|
duration_str)
|
||||||
|
if m:
|
||||||
|
duration = (
|
||||||
|
(int(m.group('hours')) * 60 * 60) +
|
||||||
|
(int(m.group('minutes')) * 60) +
|
||||||
|
(int(m.group('seconds')))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
52
youtube_dl/extractor/clipsyndicate.py
Normal file
52
youtube_dl/extractor/clipsyndicate.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipsyndicateIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||||
|
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'4629301',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Brick Briscoe',
|
||||||
|
u'duration': 612,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
js_player = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||||
|
video_id, u'Downlaoding player')
|
||||||
|
# it includes a required token
|
||||||
|
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||||
|
|
||||||
|
playlist_page = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||||
|
video_id, u'Downloading video info')
|
||||||
|
# Fix broken xml
|
||||||
|
playlist_page = re.sub('&', '&', playlist_page)
|
||||||
|
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
|
||||||
|
|
||||||
|
track_doc = pdoc.find('trackList/track')
|
||||||
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': find_param('title'),
|
||||||
|
'url': track_doc.find('location').text,
|
||||||
|
'thumbnail': find_param('thumbnail'),
|
||||||
|
'duration': int(find_param('duration')),
|
||||||
|
}
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
|
|||||||
path = mobj.group('path')
|
path = mobj.group('path')
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
info_xml = self._download_webpage(info_url, page_title)
|
info = self._download_xml(info_url, page_title)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in info.findall('files/file'):
|
for f in info.findall('files/file'):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
mdoc = self._download_xml(xmlUrl, video_id,
|
||||||
u'Downloading info XML',
|
u'Downloading info XML',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
|
||||||
try:
|
try:
|
||||||
videoNode = mdoc.findall('./video')[0]
|
videoNode = mdoc.findall('./video')[0]
|
||||||
youtubeIdNode = videoNode.find('./youtubeID')
|
youtubeIdNode = videoNode.find('./youtubeID')
|
||||||
@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
if next_url.endswith(u'manifest.f4m'):
|
if next_url.endswith(u'manifest.f4m'):
|
||||||
manifest_url = next_url + '?hdcore=2.10.3'
|
manifest_url = next_url + '?hdcore=2.10.3'
|
||||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
adoc = self._download_xml(manifest_url, video_id,
|
||||||
u'Downloading XML manifest',
|
u'Downloading XML manifest',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
|
||||||
try:
|
try:
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .mtv import MTVIE, _media_xml_tag
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -11,7 +11,37 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(InfoExtractor):
|
class ComedyCentralIE(MTVIE):
|
||||||
|
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||||
|
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||||
|
u'md5': u'4167875aae411f903b751a21f357f1ee',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
|
||||||
|
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
# Overwrite MTVIE properties we don't want
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
|
return itemdoc.find(search_path).attrib['url']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
|
||||||
|
webpage, u'mgid')
|
||||||
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
class ComedyCentralShowsIE(InfoExtractor):
|
||||||
IE_DESC = u'The Daily Show / Colbert Report'
|
IE_DESC = u'The Daily Show / Colbert Report'
|
||||||
# urls can be abbreviations like :thedailyshow or :colbert
|
# urls can be abbreviations like :thedailyshow or :colbert
|
||||||
# urls for episodes like:
|
# urls for episodes like:
|
||||||
@ -127,13 +157,12 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||||
indexXml = self._download_webpage(indexUrl, epTitle,
|
idoc = self._download_xml(indexUrl, epTitle,
|
||||||
u'Downloading show index',
|
u'Downloading show index',
|
||||||
u'unable to download episode index')
|
u'unable to download episode index')
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
|
||||||
itemEls = idoc.findall('.//item')
|
itemEls = idoc.findall('.//item')
|
||||||
for partNum,itemEl in enumerate(itemEls):
|
for partNum,itemEl in enumerate(itemEls):
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
mediaId = itemEl.findall('./guid')[0].text
|
||||||
@ -144,10 +173,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
configXml = self._download_webpage(configUrl, epTitle,
|
cdoc = self._download_xml(configUrl, epTitle,
|
||||||
u'Downloading configuration for %s' % shortMediaId)
|
u'Downloading configuration for %s' % shortMediaId)
|
||||||
|
|
||||||
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
|
||||||
turls = []
|
turls = []
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
for rendition in cdoc.findall('.//rendition'):
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||||
|
@ -4,11 +4,11 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import netrc
|
import netrc
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
@ -19,6 +19,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
"""Information Extractor class.
|
"""Information Extractor class.
|
||||||
|
|
||||||
@ -75,6 +76,7 @@ class InfoExtractor(object):
|
|||||||
* acodec Name of the audio codec in use
|
* acodec Name of the audio codec in use
|
||||||
* vbr Average video bitrate in KBit/s
|
* vbr Average video bitrate in KBit/s
|
||||||
* vcodec Name of the video codec in use
|
* vcodec Name of the video codec in use
|
||||||
|
* filesize The number of bytes, if known in advance
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
@ -156,7 +158,7 @@ class InfoExtractor(object):
|
|||||||
elif note is not False:
|
elif note is not False:
|
||||||
self.to_screen(u'%s: %s' % (video_id, note))
|
self.to_screen(u'%s: %s' % (video_id, note))
|
||||||
try:
|
try:
|
||||||
return compat_urllib_request.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
@ -208,6 +210,12 @@ class InfoExtractor(object):
|
|||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||||
|
|
||||||
|
def _download_xml(self, url_or_request, video_id,
|
||||||
|
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
||||||
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||||
@ -229,12 +237,14 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
def url_result(self, url, ie=None):
|
def url_result(self, url, ie=None, video_id=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
#TODO: ie should be the class used for getting the info
|
#TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
'url': url,
|
'url': url,
|
||||||
'ie_key': ie}
|
'ie_key': ie}
|
||||||
|
if video_id is not None:
|
||||||
|
video_info['id'] = video_id
|
||||||
return video_info
|
return video_info
|
||||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||||
"""Returns a playlist"""
|
"""Returns a playlist"""
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
|
|||||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||||
webpage, u'full id')
|
webpage, u'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
|
|||||||
'vid': full_id,
|
'vid': full_id,
|
||||||
'profile': profile,
|
'profile': profile,
|
||||||
})
|
})
|
||||||
url_xml = self._download_webpage(
|
url_doc = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
|
||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config_xml = self._download_webpage(
|
config = self._download_xml(
|
||||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
video_url = config.find('file').text
|
video_url = config.find('file').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||||
u'config xml url')
|
u'config xml url')
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id,
|
config = self._download_xml(config_xml_url, video_id,
|
||||||
u'Downloading config xml')
|
u'Downloading config xml')
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
encodings = config.find('ENCODINGS')
|
encodings = config.find('ENCODINGS')
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -11,11 +10,10 @@ from ..utils import (
|
|||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
xml_desc = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||||
'getInfosOeuvre.php?id-diffusion='
|
'getInfosOeuvre.php?id-diffusion='
|
||||||
+ video_id, video_id, 'Downloading XML config')
|
+ video_id, video_id, 'Downloading XML config')
|
||||||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
|
||||||
|
|
||||||
manifest_url = info.find('videos/video/url').text
|
manifest_url = info.find('videos/video/url').text
|
||||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||||
|
@ -195,6 +195,15 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
|
# Look for embedded Dailymotion player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
|
||||||
|
if matches:
|
||||||
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||||
|
for tuppl in matches]
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@ -209,7 +218,7 @@ class GenericIE(InfoExtractor):
|
|||||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Broaden the search a little bit: JWPlayer JS loader
|
# Broaden the search a little bit: JWPlayer JS loader
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||||
@ -236,18 +245,16 @@ class GenericIE(InfoExtractor):
|
|||||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||||
|
|
||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_extension = os.path.splitext(video_id)[1][1:]
|
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
# video uploader is domain name
|
# video uploader is domain name
|
||||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||||
url, u'video uploader')
|
url, u'video uploader')
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
}
|
||||||
}]
|
|
||||||
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
u'file': u'390161.mp4',
|
u'file': u'390161.mp4',
|
||||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||||
u"title": u"How to Tie a Square Knot Properly"
|
u"title": u"How to Tie a Square Knot Properly"
|
||||||
|
59
youtube_dl/extractor/imdb.py
Normal file
59
youtube_dl/extractor/imdb.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
get_element_by_attribute,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImdbIE(InfoExtractor):
|
||||||
|
IE_NAME = u'imdb'
|
||||||
|
IE_DESC = u'Internet Movie Database trailers'
|
||||||
|
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
|
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2524815897',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||||
|
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||||
|
u'duration': 151,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url,video_id)
|
||||||
|
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||||
|
available_formats = re.findall(
|
||||||
|
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
formats = []
|
||||||
|
for f_id, f_path in available_formats:
|
||||||
|
format_page = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, f_path),
|
||||||
|
u'Downloading info for %s format' % f_id)
|
||||||
|
json_data = self._search_regex(
|
||||||
|
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||||
|
format_page, u'json data', flags=re.DOTALL)
|
||||||
|
info = json.loads(json_data)
|
||||||
|
format_info = info['videoPlayerObject']['video']
|
||||||
|
formats.append({
|
||||||
|
'format_id': f_id,
|
||||||
|
'url': format_info['url'],
|
||||||
|
'height': int(info['titleObject']['encoding']['selected'][:-1]),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': descr,
|
||||||
|
'thumbnail': format_info['slate'],
|
||||||
|
'duration': int(info['titleObject']['title']['duration_seconds']),
|
||||||
|
}
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
video_id = query_dic['publishedid'][0]
|
video_id = query_dic['publishedid'][0]
|
||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration_xml = self._download_webpage(url, video_id,
|
flashconfiguration = self._download_xml(url, video_id,
|
||||||
u'Downloading flash configuration')
|
u'Downloading flash configuration')
|
||||||
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info_xml = self._download_webpage(file_url, video_id,
|
info = self._download_xml(file_url, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, u'video ID')
|
||||||
|
|
||||||
xml_config = self._download_webpage(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, u'Downloading XML config')
|
||||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
info_json = config.find('format.json').text
|
||||||
info_json = self._search_regex(
|
|
||||||
r'(?sm)<format\.json>(.*?)</format\.json>',
|
|
||||||
xml_config, u'JSON information')
|
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
|
|||||||
archive_id = m.group(1)
|
archive_id = m.group(1)
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
chapter_info_xml = self._download_webpage(api, chapter_id,
|
doc = self._download_xml(api, chapter_id,
|
||||||
note=u'Downloading chapter information',
|
note=u'Downloading chapter information',
|
||||||
errnote=u'Chapter information download failed')
|
errnote=u'Chapter information download failed')
|
||||||
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
|
||||||
for a in doc.findall('.//archive'):
|
for a in doc.findall('.//archive'):
|
||||||
if archive_id == a.find('./id').text:
|
if archive_id == a.find('./id').text:
|
||||||
break
|
break
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||||
|
|
||||||
api_response = self._download_webpage(api_url, video_id)
|
info = self._download_xml(api_url, video_id)
|
||||||
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
|
||||||
item = info.find('channel').find('item')
|
item = info.find('channel').find('item')
|
||||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||||
|
@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'url': final_song_url,
|
'url': final_song_url,
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'description': info['description'],
|
'description': info.get('description'),
|
||||||
'thumbnail': info['pictures'].get('extra_large'),
|
'thumbnail': info['pictures'].get('extra_large'),
|
||||||
'uploader': info['user']['name'],
|
'uploader': info['user']['name'],
|
||||||
'uploader_id': info['user']['username'],
|
'uploader_id': info['user']['username'],
|
||||||
|
@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
|
|||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
||||||
u'Downloading info')
|
u'Downloading info')
|
||||||
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import os.path
|
import os.path
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
|
|||||||
|
|
||||||
# get metadata
|
# get metadata
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||||
metadata_text = self._download_webpage(metadata_url, video_id)
|
metadata = self._download_xml(metadata_url, video_id)
|
||||||
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
|
||||||
|
|
||||||
# extract values from metadata
|
# extract values from metadata
|
||||||
url_flv_el = metadata.find('url_flv')
|
url_flv_el = metadata.find('url_flv')
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
|
|||||||
'protocol': 'p2p',
|
'protocol': 'p2p',
|
||||||
'inKey': key,
|
'inKey': key,
|
||||||
})
|
})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||||
video_id, u'Downloading video info')
|
video_id, u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import find_xpath_attr, compat_str
|
from ..utils import find_xpath_attr, compat_str
|
||||||
@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
info = all_info.find('video')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': info.find('headline').text,
|
'title': info.find('headline').text,
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||||
path_response = self._download_webpage(path_url, video_id,
|
path_doc = self._download_xml(path_url, video_id,
|
||||||
u'Downloading final video url')
|
u'Downloading final video url')
|
||||||
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
|
||||||
video_url = path_doc.find('path').text
|
video_url = path_doc.find('path').text
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
join = compat_urlparse.urljoin
|
||||||
|
127
youtube_dl/extractor/niconico.py
Normal file
127
youtube_dl/extractor/niconico.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_http_client,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoIE(InfoExtractor):
|
||||||
|
IE_NAME = u'niconico'
|
||||||
|
IE_DESC = u'ニコニコ動画'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
|
u'file': u'sm22312215.mp4',
|
||||||
|
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Big Buck Bunny',
|
||||||
|
u'uploader': u'takuya0301',
|
||||||
|
u'uploader_id': u'2698420',
|
||||||
|
u'upload_date': u'20131123',
|
||||||
|
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'username': u'ydl.niconico@gmail.com',
|
||||||
|
u'password': u'youtube-dl',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||||
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
# If True it will raise an error if no login info is provided
|
||||||
|
_LOGIN_REQUIRED = True
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
# No authentication to be performed
|
||||||
|
if username is None:
|
||||||
|
if self._LOGIN_REQUIRED:
|
||||||
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Log in
|
||||||
|
login_form_strs = {
|
||||||
|
u'mail': username,
|
||||||
|
u'password': password,
|
||||||
|
}
|
||||||
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
|
# chokes on unicode
|
||||||
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||||
|
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
u'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
|
login_results = self._download_webpage(
|
||||||
|
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
||||||
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
# Get video webpage. We are not actually interested in it, but need
|
||||||
|
# the cookies in order to be able to download the info webpage
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
|
|
||||||
|
video_info = self._download_xml(
|
||||||
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
|
note=u'Downloading video info page')
|
||||||
|
|
||||||
|
# Get flv info
|
||||||
|
flv_info_webpage = self._download_webpage(
|
||||||
|
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||||
|
video_id, u'Downloading flv info')
|
||||||
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
|
# Start extracting information
|
||||||
|
video_title = video_info.find('.//title').text
|
||||||
|
video_extension = video_info.find('.//movie_type').text
|
||||||
|
video_format = video_extension.upper()
|
||||||
|
video_thumbnail = video_info.find('.//thumbnail_url').text
|
||||||
|
video_description = video_info.find('.//description').text
|
||||||
|
video_uploader_id = video_info.find('.//user_id').text
|
||||||
|
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||||
|
video_view_count = video_info.find('.//view_counter').text
|
||||||
|
video_webpage_url = video_info.find('.//watch_url').text
|
||||||
|
|
||||||
|
# uploader
|
||||||
|
video_uploader = video_uploader_id
|
||||||
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
|
try:
|
||||||
|
user_info = self._download_xml(
|
||||||
|
url, video_id, note=u'Downloading user information')
|
||||||
|
video_uploader = user_info.find('.//nickname').text
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_real_url,
|
||||||
|
'title': video_title,
|
||||||
|
'ext': video_extension,
|
||||||
|
'format': video_format,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'description': video_description,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': video_upload_date,
|
||||||
|
'uploader_id': video_uploader_id,
|
||||||
|
'view_count': video_view_count,
|
||||||
|
'webpage_url': video_webpage_url,
|
||||||
|
}
|
49
youtube_dl/extractor/podomatic.py
Normal file
49
youtube_dl/extractor/podomatic.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PodomaticIE(InfoExtractor):
|
||||||
|
IE_NAME = 'podomatic'
|
||||||
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||||
|
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
||||||
|
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
||||||
|
u"info_dict": {
|
||||||
|
u"uploader": u"Science Teaching Tips",
|
||||||
|
u"uploader_id": u"scienceteachingtips",
|
||||||
|
u"title": u"64. When the Moon Hits Your Eye",
|
||||||
|
u"duration": 446,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
channel = mobj.group('channel')
|
||||||
|
|
||||||
|
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
||||||
|
'?permalink=true&rtmp=0') %
|
||||||
|
(mobj.group('proto'), channel, video_id))
|
||||||
|
data_json = self._download_webpage(
|
||||||
|
json_url, video_id, note=u'Downloading video info')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
video_url = data['downloadLink']
|
||||||
|
uploader = data['podcast']
|
||||||
|
title = data['title']
|
||||||
|
thumbnail = data['imageLocation']
|
||||||
|
duration = int(data['length'] / 1000.0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': channel,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||||
video_id, u'Downloading video url')
|
video_id, u'Downloading video url')
|
||||||
image_page = self._download_webpage(
|
image_page = self._download_webpage(
|
||||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||||
video_id, u'Downloading thumbnail info')
|
video_id, u'Downloading thumbnail info')
|
||||||
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'url': url_doc.find('./durl/url').text,
|
'url': url_doc.find('./durl/url').text,
|
||||||
|
@ -76,44 +76,78 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||||
track_id = compat_str(info['id'])
|
track_id = compat_str(info['id'])
|
||||||
name = full_title or track_id
|
name = full_title or track_id
|
||||||
if quiet == False:
|
if quiet:
|
||||||
self.report_extraction(name)
|
self.report_extraction(name)
|
||||||
|
|
||||||
thumbnail = info['artwork_url']
|
thumbnail = info['artwork_url']
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||||
|
ext = info.get('original_format', u'mp3')
|
||||||
result = {
|
result = {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'uploader': info['user']['username'],
|
'uploader': info['user']['username'],
|
||||||
'upload_date': unified_strdate(info['created_at']),
|
'upload_date': unified_strdate(info['created_at']),
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'ext': info.get('original_format', u'mp3'),
|
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
if info.get('downloadable', False):
|
if info.get('downloadable', False):
|
||||||
# We can build a direct link to the song
|
# We can build a direct link to the song
|
||||||
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
|
format_url = (
|
||||||
|
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||||
|
track_id, self._CLIENT_ID))
|
||||||
|
result['formats'] = [{
|
||||||
|
'format_id': 'download',
|
||||||
|
'ext': ext,
|
||||||
|
'url': format_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}]
|
||||||
else:
|
else:
|
||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
stream_json = self._download_webpage(
|
stream_json = self._download_webpage(
|
||||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||||
track_id, u'Downloading track url')
|
track_id, u'Downloading track url')
|
||||||
# There should be only one entry in the dictionary
|
|
||||||
key, stream_url = list(json.loads(stream_json).items())[0]
|
formats = []
|
||||||
if key.startswith(u'http'):
|
format_dict = json.loads(stream_json)
|
||||||
result['url'] = stream_url
|
for key, stream_url in format_dict.items():
|
||||||
elif key.startswith(u'rtmp'):
|
if key.startswith(u'http'):
|
||||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
formats.append({
|
||||||
url, path = stream_url.split('mp3:', 1)
|
'format_id': key,
|
||||||
result.update({
|
'ext': ext,
|
||||||
'url': url,
|
'url': stream_url,
|
||||||
'play_path': 'mp3:' + path,
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
else:
|
elif key.startswith(u'rtmp'):
|
||||||
|
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||||
|
url, path = stream_url.split('mp3:', 1)
|
||||||
|
formats.append({
|
||||||
|
'format_id': key,
|
||||||
|
'url': url,
|
||||||
|
'play_path': 'mp3:' + path,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
# We fallback to the stream_url in the original info, this
|
# We fallback to the stream_url in the original info, this
|
||||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||||
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
formats.append({
|
||||||
|
'format_id': u'fallback',
|
||||||
|
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
def format_pref(f):
|
||||||
|
if f['format_id'].startswith('http'):
|
||||||
|
return 2
|
||||||
|
if f['format_id'].startswith('rtmp'):
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
formats.sort(key=format_pref)
|
||||||
|
result['formats'] = formats
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
|
|||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||||
xml_code = self._download_webpage(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
|
@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
|
|||||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||||
u'duration': 9,
|
u'duration': 9,
|
||||||
},
|
},
|
||||||
|
u'skip': u'Only available from the EU'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
|
||||||
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
|
|
||||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
|
|||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_webpage = self._download_webpage(
|
streams_doc = self._download_xml(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note=u'Downloading stream list')
|
||||||
|
|
||||||
streams_doc = xml.etree.ElementTree.fromstring(
|
|
||||||
streams_webpage.encode('utf-8'))
|
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if u'//ad.doubleclick' not in n.text)
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
|
|||||||
|
|
||||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||||
u'video-formats2' % log)
|
u'video-formats2' % log)
|
||||||
format_str = self._download_webpage(
|
format_doc = self._download_xml(
|
||||||
format_url, video_id,
|
format_url, video_id,
|
||||||
note=u'Downloading formats',
|
note=u'Downloading formats',
|
||||||
errnote=u'Error while downloading formats')
|
errnote=u'Error while downloading formats')
|
||||||
|
|
||||||
format_doc = xml.etree.ElementTree.fromstring(format_str)
|
|
||||||
|
|
||||||
video_url_template = (
|
video_url_template = (
|
||||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
video = config.find('video')
|
video = config.find('video')
|
||||||
sources = video.find('sources')
|
sources = video.find('sources')
|
||||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class VideoPremiumIE(InfoExtractor):
|
class VideoPremiumIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
||||||
u'file': u'4w7oadjsf156.f4v',
|
u'file': u'4w7oadjsf156.f4v',
|
||||||
@ -41,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
}
|
}
|
||||||
|
101
youtube_dl/extractor/viki.py
Normal file
101
youtube_dl/extractor/viki.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VikiIE(SubtitlesInfoExtractor):
|
||||||
|
IE_NAME = u'viki'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||||
|
u'file': u'1023585v.mp4',
|
||||||
|
u'md5': u'a21454021c2646f5433514177e2caa5f',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Heirs Episode 14',
|
||||||
|
u'uploader': u'SBS',
|
||||||
|
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||||
|
u'upload_date': u'20131121',
|
||||||
|
u'age_limit': 13,
|
||||||
|
},
|
||||||
|
u'skip': u'Blocked in the US',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
uploader_m = re.search(
|
||||||
|
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
|
||||||
|
if uploader_m is None:
|
||||||
|
uploader = None
|
||||||
|
else:
|
||||||
|
uploader = uploader_m.group(1).strip()
|
||||||
|
|
||||||
|
rating_str = self._html_search_regex(
|
||||||
|
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||||
|
u'rating information', default='').strip()
|
||||||
|
RATINGS = {
|
||||||
|
'G': 0,
|
||||||
|
'PG': 10,
|
||||||
|
'PG-13': 13,
|
||||||
|
'R': 16,
|
||||||
|
'NC': 18,
|
||||||
|
}
|
||||||
|
age_limit = RATINGS.get(rating_str)
|
||||||
|
|
||||||
|
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||||
|
info_webpage = self._download_webpage(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||||
|
raise ExtractorError(
|
||||||
|
u'Video %s is blocked from your location.' % video_id,
|
||||||
|
expected=True)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||||
|
|
||||||
|
upload_date_str = self._html_search_regex(
|
||||||
|
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
|
||||||
|
upload_date = (
|
||||||
|
unified_strdate(upload_date_str)
|
||||||
|
if upload_date_str is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, info_webpage)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, info_webpage)
|
||||||
|
return
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'uploader': uploader,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_available_subtitles(self, video_id, info_webpage):
|
||||||
|
res = {}
|
||||||
|
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
|
||||||
|
sturl = unescapeHTML(sturl_html)
|
||||||
|
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
res[m.group('lang')] = sturl
|
||||||
|
return res
|
@ -249,25 +249,46 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
IE_NAME = u'vimeo:channel'
|
IE_NAME = u'vimeo:channel'
|
||||||
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
|
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||||
|
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_videos(self, list_id, base_url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
channel_id = mobj.group('id')
|
|
||||||
video_ids = []
|
video_ids = []
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
|
webpage = self._download_webpage(
|
||||||
channel_id, u'Downloading page %s' % pagenum)
|
'%s/videos/page:%d/' % (base_url, pagenum),list_id,
|
||||||
|
u'Downloading page %s' % pagenum)
|
||||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
break
|
break
|
||||||
|
|
||||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
|
list_title = self._html_search_regex(self._TITLE_RE, webpage,
|
||||||
webpage, u'channel title')
|
u'list title')
|
||||||
return {'_type': 'playlist',
|
return {'_type': 'playlist',
|
||||||
'id': channel_id,
|
'id': list_id,
|
||||||
'title': channel_title,
|
'title': list_title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
channel_id = mobj.group('id')
|
||||||
|
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
|
||||||
|
|
||||||
|
|
||||||
|
class VimeoUserIE(VimeoChannelIE):
|
||||||
|
IE_NAME = u'vimeo:user'
|
||||||
|
_VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
|
||||||
|
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url):
|
||||||
|
return False
|
||||||
|
return super(VimeoUserIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
name = mobj.group('name')
|
||||||
|
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
|
||||||
|
@ -17,27 +17,21 @@ class YahooIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
u'file': u'214727115.flv',
|
u'file': u'214727115.mp4',
|
||||||
|
u'md5': u'4962b075c08be8690a922ee026d05e69',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
|
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
|
||||||
u'description': u'Julian and Travis watch Julian Smith',
|
u'description': u'Julian and Travis watch Julian Smith',
|
||||||
},
|
},
|
||||||
u'params': {
|
|
||||||
# Requires rtmpdump
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||||
u'file': u'103000935.flv',
|
u'file': u'103000935.mp4',
|
||||||
|
u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
|
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
|
||||||
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||||
},
|
},
|
||||||
u'params': {
|
|
||||||
# Requires rtmpdump
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -46,15 +40,19 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
|
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
||||||
webpage, u'items', flags=re.MULTILINE)
|
webpage, u'items', flags=re.MULTILINE)
|
||||||
items = json.loads(items_json)
|
items = json.loads(items_json)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
# The 'meta' field is not always in the video webpage, we request it
|
# The 'meta' field is not always in the video webpage, we request it
|
||||||
# from another page
|
# from another page
|
||||||
long_id = info['id']
|
long_id = info['id']
|
||||||
|
return self._get_info(info['id'], video_id)
|
||||||
|
|
||||||
|
def _get_info(self, long_id, video_id):
|
||||||
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
|
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
|
||||||
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
|
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
|
||||||
|
' AND protocol="http"' % long_id)
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'q': query,
|
'q': query,
|
||||||
'env': 'prod',
|
'env': 'prod',
|
||||||
@ -91,17 +89,39 @@ class YahooIE(InfoExtractor):
|
|||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': meta['title'],
|
'title': meta['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(meta['description']),
|
'description': clean_html(meta['description']),
|
||||||
'thumbnail': meta['thumbnail'],
|
'thumbnail': meta['thumbnail'],
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
class YahooNewsIE(YahooIE):
|
||||||
|
IE_NAME = 'yahoo:news'
|
||||||
|
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
|
u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'104538833',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'China Moses Is Crazy About the Blues',
|
||||||
|
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Overwrite YahooIE properties we don't want
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
|
||||||
|
return self._get_info(long_id, video_id)
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
|
@ -11,7 +11,6 @@ import socket
|
|||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
import xml.etree.ElementTree
|
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
@ -29,6 +28,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'248': 'webm',
|
'248': 'webm',
|
||||||
}
|
}
|
||||||
_video_dimensions = {
|
_video_dimensions = {
|
||||||
'5': '240x400',
|
'5': '400x240',
|
||||||
'6': '???',
|
'6': '???',
|
||||||
'13': '???',
|
'13': '???',
|
||||||
'17': '144x176',
|
'17': '176x144',
|
||||||
'18': '360x640',
|
'18': '640x360',
|
||||||
'22': '720x1280',
|
'22': '1280x720',
|
||||||
'34': '360x640',
|
'34': '640x360',
|
||||||
'35': '480x854',
|
'35': '854x480',
|
||||||
'36': '240x320',
|
'36': '320x240',
|
||||||
'37': '1080x1920',
|
'37': '1920x1080',
|
||||||
'38': '3072x4096',
|
'38': '4096x3072',
|
||||||
'43': '360x640',
|
'43': '640x360',
|
||||||
'44': '480x854',
|
'44': '854x480',
|
||||||
'45': '720x1280',
|
'45': '1280x720',
|
||||||
'46': '1080x1920',
|
'46': '1920x1080',
|
||||||
'82': '360p',
|
'82': '360p',
|
||||||
'83': '480p',
|
'83': '480p',
|
||||||
'84': '720p',
|
'84': '720p',
|
||||||
@ -1144,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'asrs': 1,
|
'asrs': 1,
|
||||||
})
|
})
|
||||||
list_url = caption_url + '&' + list_params
|
list_url = caption_url + '&' + list_params
|
||||||
list_page = self._download_webpage(list_url, video_id)
|
caption_list = self._download_xml(list_url, video_id)
|
||||||
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||||
@ -1528,7 +1527,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1539,6 +1538,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
def _ids_to_results(self, ids):
|
||||||
|
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||||
|
for vid_id in ids]
|
||||||
|
|
||||||
|
def _extract_mix(self, playlist_id):
|
||||||
|
# The mixes are generated from a a single video
|
||||||
|
# the id of the playlist is just 'RD' + video_id
|
||||||
|
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
|
||||||
|
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||||
|
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
||||||
|
get_element_by_attribute('class', 'title ', webpage))
|
||||||
|
title = clean_html(title_span)
|
||||||
|
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
|
||||||
|
ids = orderedSet(re.findall(video_re, webpage))
|
||||||
|
url_results = self._ids_to_results(ids)
|
||||||
|
|
||||||
|
return self.playlist_result(url_results, playlist_id, title)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
@ -1552,18 +1569,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
video_id = query_dict['v'][0]
|
video_id = query_dict['v'][0]
|
||||||
if self._downloader.params.get('noplaylist'):
|
if self._downloader.params.get('noplaylist'):
|
||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
|
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
|
||||||
|
# Mixes require a custom extraction process
|
||||||
|
return self._extract_mix(playlist_id)
|
||||||
|
|
||||||
# Extract the video ids from the playlist pages
|
# Extract the video ids from the playlist pages
|
||||||
ids = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||||
# The ids are duplicated
|
matches = re.finditer(self._VIDEO_RE, page)
|
||||||
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
# We remove the duplicates and the link with index 0
|
||||||
|
# (it's not the first video of the playlist)
|
||||||
|
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||||
@ -1571,7 +1594,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
playlist_title = self._og_search_title(page)
|
playlist_title = self._og_search_title(page)
|
||||||
|
|
||||||
url_results = [self.url_result(vid, 'Youtube') for vid in ids]
|
url_results = self._ids_to_results(ids)
|
||||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
@ -1626,9 +1649,9 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
for video_id in video_ids]
|
||||||
return [self.playlist_result(url_entries, channel_id)]
|
return self.playlist_result(url_entries, channel_id)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
@ -1692,9 +1715,11 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
url_results = [
|
||||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
return [self.playlist_result(url_results, playlist_title = username)]
|
for video_id in video_ids]
|
||||||
|
return self.playlist_result(url_results, playlist_title=username)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com searches'
|
IE_DESC = u'YouTube.com searches'
|
||||||
@ -1735,7 +1760,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
|
|
||||||
if len(video_ids) > n:
|
if len(video_ids) > n:
|
||||||
video_ids = video_ids[:n]
|
video_ids = video_ids[:n]
|
||||||
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
|
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
@ -1765,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||||
"""
|
"""
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
_PAGING_STEP = 30
|
|
||||||
# use action_load_personal_feed instead of action_load_system_feed
|
# use action_load_personal_feed instead of action_load_system_feed
|
||||||
_PERSONAL_FEED = False
|
_PERSONAL_FEED = False
|
||||||
|
|
||||||
@ -1785,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
feed_entries = []
|
feed_entries = []
|
||||||
# The step argument is available only in 2.7 or higher
|
paging = 0
|
||||||
for i in itertools.count(0):
|
for i in itertools.count(1):
|
||||||
paging = i*self._PAGING_STEP
|
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
@ -1795,9 +1819,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_html = info['feed_html']
|
feed_html = info['feed_html']
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
feed_entries.extend(
|
||||||
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
|
paging = info['paging']
|
||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
@ -1817,9 +1844,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
||||||
_FEED_NAME = 'watch_later'
|
_FEED_NAME = 'watch_later'
|
||||||
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
||||||
_PAGING_STEP = 100
|
|
||||||
_PERSONAL_FEED = True
|
_PERSONAL_FEED = True
|
||||||
|
|
||||||
|
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||||
|
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
|
||||||
|
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||||
|
_FEED_NAME = 'history'
|
||||||
|
_PERSONAL_FEED = True
|
||||||
|
_PLAYLIST_TITLE = u'Youtube Watch History'
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||||
IE_NAME = u'youtube:favorites'
|
IE_NAME = u'youtube:favorites'
|
||||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||||
|
@ -1,75 +1,125 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
unified_strdate,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ZDFIE(InfoExtractor):
|
class ZDFIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
||||||
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
|
||||||
|
u"file": u"2037704.webm",
|
||||||
|
u"info_dict": {
|
||||||
|
u"upload_date": u"20131127",
|
||||||
|
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
|
||||||
|
u"uploader": u"spezial",
|
||||||
|
u"title": u"ZDFspezial - Ende des Machtpokers"
|
||||||
|
},
|
||||||
|
u"skip": u"Videos on ZDF.de are depublicised in short order",
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
if mobj.group('hash'):
|
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
url = url.replace(u'#', u'', 1)
|
doc = self._download_xml(
|
||||||
|
xml_url, video_id,
|
||||||
|
note=u'Downloading video info',
|
||||||
|
errnote=u'Failed to download video info')
|
||||||
|
|
||||||
html = self._download_webpage(url, video_id)
|
title = doc.find('.//information/title').text
|
||||||
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
|
description = doc.find('.//information/detail').text
|
||||||
if streams is None:
|
uploader_node = doc.find('.//details/originChannelTitle')
|
||||||
raise ExtractorError(u'No media url found.')
|
uploader = None if uploader_node is None else uploader_node.text
|
||||||
|
duration_str = doc.find('.//details/length').text
|
||||||
|
duration_m = re.match(r'''(?x)^
|
||||||
|
(?P<hours>[0-9]{2})
|
||||||
|
:(?P<minutes>[0-9]{2})
|
||||||
|
:(?P<seconds>[0-9]{2})
|
||||||
|
(?:\.(?P<ms>[0-9]+)?)
|
||||||
|
''', duration_str)
|
||||||
|
duration = (
|
||||||
|
(
|
||||||
|
(int(duration_m.group('hours')) * 60 * 60) +
|
||||||
|
(int(duration_m.group('minutes')) * 60) +
|
||||||
|
int(duration_m.group('seconds'))
|
||||||
|
)
|
||||||
|
if duration_m
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
||||||
|
|
||||||
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
|
def xml_to_format(fnode):
|
||||||
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
|
video_url = fnode.find('url').text
|
||||||
# choose first/default media type and highest quality for now
|
is_available = u'http://www.metafilegenerator' not in video_url
|
||||||
def stream_pref(s):
|
|
||||||
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
|
format_id = fnode.attrib['basetype']
|
||||||
|
format_m = re.match(r'''(?x)
|
||||||
|
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||||
|
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||||
|
''', format_id)
|
||||||
|
|
||||||
|
ext = format_m.group('container')
|
||||||
|
is_supported = ext != 'f4f'
|
||||||
|
|
||||||
|
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
|
||||||
try:
|
try:
|
||||||
type_pref = TYPE_ORDER.index(s['media_type'])
|
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
type_pref = 999
|
proto_pref = 999
|
||||||
|
|
||||||
QUALITY_ORDER = ['veryhigh', '300']
|
quality = fnode.find('./quality').text
|
||||||
|
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
|
||||||
try:
|
try:
|
||||||
quality_pref = QUALITY_ORDER.index(s['quality'])
|
quality_pref = -QUALITY_ORDER.index(quality)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
quality_pref = 999
|
quality_pref = 999
|
||||||
|
|
||||||
return (type_pref, quality_pref)
|
abr = int(fnode.find('./audioBitrate').text) // 1000
|
||||||
|
vbr = int(fnode.find('./videoBitrate').text) // 1000
|
||||||
|
pref = (is_available, is_supported,
|
||||||
|
proto_pref, quality_pref, vbr, abr)
|
||||||
|
|
||||||
sorted_streams = sorted(streams, key=stream_pref)
|
format_note = u''
|
||||||
if not sorted_streams:
|
if not is_supported:
|
||||||
raise ExtractorError(u'No stream found.')
|
format_note += u'(unsupported)'
|
||||||
stream = sorted_streams[0]
|
if not format_note:
|
||||||
|
format_note = None
|
||||||
|
|
||||||
media_link = self._download_webpage(
|
return {
|
||||||
stream['video_url'],
|
'format_id': format_id + u'-' + quality,
|
||||||
video_id,
|
'url': video_url,
|
||||||
u'Get stream URL')
|
'ext': ext,
|
||||||
|
'acodec': format_m.group('acodec'),
|
||||||
|
'vcodec': format_m.group('vcodec'),
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'width': int(fnode.find('./width').text),
|
||||||
|
'height': int(fnode.find('./height').text),
|
||||||
|
'filesize': int(fnode.find('./filesize').text),
|
||||||
|
'format_note': format_note,
|
||||||
|
'_pref': pref,
|
||||||
|
'_available': is_available,
|
||||||
|
}
|
||||||
|
|
||||||
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
format_nodes = doc.findall('.//formitaeten/formitaet')
|
||||||
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
|
formats = sorted(filter(lambda f: f['_available'],
|
||||||
|
map(xml_to_format, format_nodes)),
|
||||||
mobj = re.search(self._MEDIA_STREAM, media_link)
|
key=operator.itemgetter('_pref'))
|
||||||
if mobj is None:
|
|
||||||
mobj = re.search(RTSP_STREAM, media_link)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
|
|
||||||
video_url = mobj.group('video_url')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
|
|
||||||
html, u'title')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': determine_ext(video_url)
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
|
|||||||
if signature != sha256(message).digest(): return False
|
if signature != sha256(message).digest(): return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def update_self(to_screen, verbose):
|
def update_self(to_screen, verbose):
|
||||||
"""Update the program file with the latest version from the repository"""
|
"""Update the program file with the latest version from the repository"""
|
||||||
|
|
||||||
@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
|
|||||||
return
|
return
|
||||||
|
|
||||||
version_id = versions_info['latest']
|
version_id = versions_info['latest']
|
||||||
|
|
||||||
|
def version_tuple(version_str):
|
||||||
|
return tuple(map(int, version_str.split('.')))
|
||||||
|
if version_tuple(__version__) >= version_tuple(version_id):
|
||||||
|
to_screen(u'youtube-dl is up to date (%s)' % __version__)
|
||||||
|
return
|
||||||
|
|
||||||
to_screen(u'Updating to version ' + version_id + '...')
|
to_screen(u'Updating to version ' + version_id + '...')
|
||||||
version = versions_info['versions'][version_id]
|
version = versions_info['versions'][version_id]
|
||||||
|
|
||||||
|
@ -8,13 +8,16 @@ import gzip
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
|
import math
|
||||||
import os
|
import os
|
||||||
import pipes
|
import pipes
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import ssl
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -535,17 +538,34 @@ def formatSeconds(secs):
|
|||||||
else:
|
else:
|
||||||
return '%d' % secs
|
return '%d' % secs
|
||||||
|
|
||||||
def make_HTTPS_handler(opts):
|
def make_HTTPS_handler(opts_no_check_certificate):
|
||||||
if sys.version_info < (3,2):
|
if sys.version_info < (3, 2):
|
||||||
# Python's 2.x handler is very simplistic
|
import httplib
|
||||||
return compat_urllib_request.HTTPSHandler()
|
|
||||||
|
class HTTPSConnectionV3(httplib.HTTPSConnection):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
sock = socket.create_connection((self.host, self.port), self.timeout)
|
||||||
|
if self._tunnel_host:
|
||||||
|
self.sock = sock
|
||||||
|
self._tunnel()
|
||||||
|
try:
|
||||||
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
|
||||||
|
except ssl.SSLError:
|
||||||
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
||||||
|
|
||||||
|
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
|
||||||
|
def https_open(self, req):
|
||||||
|
return self.do_open(HTTPSConnectionV3, req)
|
||||||
|
return HTTPSHandlerV3()
|
||||||
else:
|
else:
|
||||||
import ssl
|
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
|
||||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
|
||||||
context.set_default_verify_paths()
|
context.set_default_verify_paths()
|
||||||
|
|
||||||
context.verify_mode = (ssl.CERT_NONE
|
context.verify_mode = (ssl.CERT_NONE
|
||||||
if opts.no_check_certificate
|
if opts_no_check_certificate
|
||||||
else ssl.CERT_REQUIRED)
|
else ssl.CERT_REQUIRED)
|
||||||
return compat_urllib_request.HTTPSHandler(context=context)
|
return compat_urllib_request.HTTPSHandler(context=context)
|
||||||
|
|
||||||
@ -987,3 +1007,17 @@ def unsmuggle_url(smug_url):
|
|||||||
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
|
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
|
||||||
data = json.loads(jsond)
|
data = json.loads(jsond)
|
||||||
return url, data
|
return url, data
|
||||||
|
|
||||||
|
|
||||||
|
def format_bytes(bytes):
|
||||||
|
if bytes is None:
|
||||||
|
return u'N/A'
|
||||||
|
if type(bytes) is str:
|
||||||
|
bytes = float(bytes)
|
||||||
|
if bytes == 0.0:
|
||||||
|
exponent = 0
|
||||||
|
else:
|
||||||
|
exponent = int(math.log(bytes, 1024.0))
|
||||||
|
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
|
||||||
|
converted = float(bytes) / float(1024 ** exponent)
|
||||||
|
return u'%.2f%s' % (converted, suffix)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.11.22.1'
|
__version__ = '2013.12.02'
|
||||||
|
Reference in New Issue
Block a user