Compare commits

..

71 Commits

Author SHA1 Message Date
Philipp Hagemeister
17769d5a6c release 2013.11.29 2013-11-29 03:34:26 +01:00
Philipp Hagemeister
677c18092d [podomatic] Add extractor 2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz
3862402ff3 Add an extractor for Clipsyndicate (closes #1744) 2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz
b03d0d064c [imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz
d8d6148628 Add an extractor for Internet Movie Database trailers (closes #1832) 2013-11-28 13:32:49 +01:00
Philipp Hagemeister
2be54167d0 release 2013.11.28.1 2013-11-28 06:17:56 +01:00
Philipp Hagemeister
4e0084d92e [youtube/subtitles] Change MD5 of vtt subtitle in test 2013-11-28 06:14:17 +01:00
Philipp Hagemeister
fc9e1cc697 [clipfish] Use FIFA trailer as testcase (#1842) 2013-11-28 06:10:37 +01:00
Philipp Hagemeister
f8f60d2793 [clipfish] Fix imports (#1842) 2013-11-28 05:54:46 +01:00
Philipp Hagemeister
ea07dbb8b1 release 2013.11.28 2013-11-28 05:48:32 +01:00
Philipp Hagemeister
2a275ab007 [zdf] Use _download_xml 2013-11-28 05:47:50 +01:00
Philipp Hagemeister
a2e6db365c [zdf] add a pseudo-testcase and fix URL matching 2013-11-28 05:47:20 +01:00
Philipp Hagemeister
9d93e7da6c Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-28 04:37:02 +01:00
Jaime Marquínez Ferrándiz
0e44d8381a [youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845) 2013-11-28 00:33:27 +01:00
Jaime Marquínez Ferrándiz
35907e23ec [yahoo] Fix video extraction and use the new format system exclusively 2013-11-27 21:24:55 +01:00
Jaime Marquínez Ferrándiz
76d1700b28 [youtube:playlist] Fix the extraction of the title for some mixes (#1844)
Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM
2013-11-27 20:01:51 +01:00
Philipp Hagemeister
dcca796ce4 [clipfish] Effect a better error message (#1842) 2013-11-27 18:33:51 +01:00
Filippo Valsorda
4b19e38954 [videopremium] support new .me domain 2013-11-27 02:54:51 +01:00
Jaime Marquínez Ferrándiz
5f09bbff4d [bash-completion] Complete the ':ythistory' keyword 2013-11-27 00:42:59 +01:00
Jaime Marquínez Ferrándiz
c1f9c59d11 [bash-completion] Complete filenames or directories if the previous option requires it 2013-11-27 00:41:30 +01:00
Jaime Marquínez Ferrándiz
652cdaa269 [youtube:playlist] Add support for YouTube mixes (fixes #1839) 2013-11-26 21:35:03 +01:00
Jaime Marquínez Ferrándiz
e26f871228 Use the new '_download_xml' helper in more extractors 2013-11-26 19:17:25 +01:00
Jaime Marquínez Ferrándiz
6e47b51eef [youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
2013-11-26 19:09:14 +01:00
Jaime Marquínez Ferrándiz
4a98cdbf3b YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840)
If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''
2013-11-26 18:54:14 +01:00
Philipp Hagemeister
c5ed4e8f7e release 2013.11.26 2013-11-26 10:41:35 +01:00
Jaime Marquínez Ferrándiz
c2e52508cc Include the proxy in the parameters for YoutubeDL (fixes #1831) 2013-11-26 08:03:11 +01:00
Philipp Hagemeister
d8ec4959c8 Merge pull request #1830 from jaimeMF/download-archive
Use the 'extractor_key' field for the download archive file
2013-11-25 14:14:25 -08:00
Jaime Marquínez Ferrándiz
d31209a144 Use the 'extractor_key' field for the download archive file
It has the same value as the ie_key.
2013-11-25 22:57:15 +01:00
Jaime Marquínez Ferrándiz
529a2e2cc3 Fix typo in the documentation of the 'download_archive' param 2013-11-25 22:52:09 +01:00
Philipp Hagemeister
781a7d0546 release 2013.11.25.3 2013-11-25 22:36:18 +01:00
Philipp Hagemeister
fb04e40396 [soundcloud] Support for listing of audio-only files 2013-11-25 22:34:56 +01:00
Philipp Hagemeister
d9b011f201 Fix rtmpdump with non-ASCII filenames on Windows on 2.x
Reported in #1798
2013-11-25 22:31:38 +01:00
Philipp Hagemeister
b0b9eaa196 Merge pull request #1829 from jaimeMF/ydl-empty-params
Allow to initialize a YoutubeDL object without parameters
2013-11-25 13:19:59 -08:00
Philipp Hagemeister
8b134b1062 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-25 22:16:07 +01:00
Philipp Hagemeister
0c75c3fa7a Do not warn about fixed output template if --max-downloads is 1
Fixes #1828
2013-11-25 22:15:33 +01:00
Jaime Marquínez Ferrándiz
a3927cf7ee Allow to initialize a YoutubeDL object without parameters
Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.
2013-11-25 22:03:39 +01:00
Jaime Marquínez Ferrándiz
1a62c18f65 [bambuser] Skip the download in the test
It doesn't respect the 'Range' header.
2013-11-25 22:03:20 +01:00
Philipp Hagemeister
2a15e7063b [soundcloud] Prefer HTTP over RTMP (#1798) 2013-11-25 20:30:41 +01:00
Philipp Hagemeister
d46cc192d7 Reduce socket timeout 2013-11-25 19:11:01 +01:00
Philipp Hagemeister
bb2bebdbe1 release 2013.11.25.2 2013-11-25 15:47:14 +01:00
Philipp Hagemeister
5db07df634 Fix --download-archive (Fixes #1826) 2013-11-25 15:46:54 +01:00
Philipp Hagemeister
ea36cbac5e Merge remote-tracking branch 'rbrito/swap-dimensions' 2013-11-25 06:19:15 +01:00
Philipp Hagemeister
d0d2b49ab7 [FileDownloader] use moved format_bytes method 2013-11-25 06:17:41 +01:00
Philipp Hagemeister
31cb6d8fef Merge remote-tracking branch 'rzhxeo/rtmpdump' 2013-11-25 06:16:18 +01:00
Philipp Hagemeister
daa0dd2973 release 2013.11.25.1 2013-11-25 06:06:39 +01:00
Philipp Hagemeister
de79c46c8f [viki] Fix subtitle extraction 2013-11-25 06:06:18 +01:00
Philipp Hagemeister
94ccb6fa2e [viki] Fix subtitles extraction 2013-11-25 05:58:04 +01:00
Philipp Hagemeister
07e4035879 [viki] Fix uploader extraction 2013-11-25 05:57:55 +01:00
Philipp Hagemeister
d0efb9ec9a [tests] Remove global_setup function 2013-11-25 03:47:32 +01:00
Philipp Hagemeister
ac05067d3d release 2013.11.25 2013-11-25 03:37:49 +01:00
Philipp Hagemeister
113577e155 [generic] Improve detection
Allow download of http://goo.gl/7X5tOk
Fixes #1818
2013-11-25 03:35:53 +01:00
Philipp Hagemeister
79d09f47c2 Merge branch 'opener-to-ydl' 2013-11-25 03:30:37 +01:00
Philipp Hagemeister
c059bdd432 Remove quality_name field and improve zdf extractor 2013-11-25 03:28:55 +01:00
Philipp Hagemeister
02dbf93f0e [zdf/common] Use API in ZDF extractor.
This also comes with a lot of extra format fields
Fixes #1518
2013-11-25 03:13:22 +01:00
Philipp Hagemeister
1fb2bcbbf7 [viki] Make uploader field optional (#1813) 2013-11-25 02:02:34 +01:00
Jaime Marquínez Ferrándiz
16e055849e Update the keywords tests for the rename of the old ComedyCentralIE 2013-11-24 22:13:20 +01:00
Jaime Marquínez Ferrándiz
66cfab4226 [comedycentral] Add support for comedycentral.com videos (closes #1824)
It's a subclass of MTVIE

The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE
2013-11-24 21:18:35 +01:00
Philipp Hagemeister
6d88bc37a3 [viki] Skip travis test
Also provide a better error message for geoblocked videos.
2013-11-24 15:28:50 +01:00
Philipp Hagemeister
b7553b2554 [vik] Clarify output 2013-11-24 15:20:16 +01:00
Philipp Hagemeister
e03db0a077 Merge branch 'master' into opener-to-ydl 2013-11-24 15:18:44 +01:00
Philipp Hagemeister
a1ee09e815 Document proxy 2013-11-24 15:03:25 +01:00
Jaime Marquínez Ferrándiz
267ed0c5d3 [collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
2013-11-24 14:59:19 +01:00
Jaime Marquínez Ferrándiz
f459d17018 [youtube] Add an extractor for downloading the watch history (closes #1821) 2013-11-24 14:33:50 +01:00
Jaime Marquínez Ferrándiz
dc65dcbb6d [mixcloud] The description field may be missing (fixes #1819) 2013-11-24 11:28:44 +01:00
Jaime Marquínez Ferrándiz
d214fdb8fe [brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead 2013-11-24 11:02:34 +01:00
Philipp Hagemeister
138df537ff release 2013.11.24.1 2013-11-24 07:51:56 +01:00
Philipp Hagemeister
0c7c19d6bc [clipfish] Add extractor (Fixes #1760) 2013-11-24 07:51:44 +01:00
Philipp Hagemeister
dca0872056 Move the opener to the YoutubeDL object.
This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.
2013-11-22 19:57:52 +01:00
rzhxeo
2b35c9ef74 Merge branch 'master' into rtmpdump
Conflicts:
	youtube_dl/FileDownloader.py

Merge
2013-11-18 00:27:06 +01:00
rzhxeo
4894fe8c5b Report download progress of rtmpdump 2013-11-09 11:14:40 +01:00
Rogério Brito
d5a9bb4ea9 extractor: youtube: Swap video dimensions to match standard practice.
While working on this, I thought about simplifying things like changing
480x854 to 480p, and that seemed like a good option, until I realized that
people (me included) usually link the concept of some number followed by a p
with the video being 16:9.

So, we would be losing some information and, as we all know,
[explicit is better than implicit][*].

[*]: http://www.python.org/dev/peps/pep-0020/

This closes #1446.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-19 14:04:44 -03:00
59 changed files with 820 additions and 404 deletions

View File

@@ -1,10 +1,21 @@
__youtube_dl()
{
local cur prev opts
local cur prev opts fileopts diropts keywords
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies"
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )

View File

@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding
def global_setup():
youtube_dl._setup_opener(timeout=10)
def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"parameters.json")

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm
global_setup()
from test.helper import try_rm
from youtube_dl import YoutubeDL

View File

@@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertMatch(':tds', ['ComedyCentral'])
self.assertMatch(':colbertreport', ['ComedyCentral'])
self.assertMatch(':cr', ['ComedyCentral'])
self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':tds', ['ComedyCentralShows'])
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
if __name__ == '__main__':

View File

@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
get_params,
get_testcases,
global_setup,
try_rm,
md5,
report_warning
)
global_setup()
import hashlib

View File

@@ -8,8 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')

View File

@@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm
global_setup()
from test.helper import get_params, try_rm
import io

View File

@@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup
global_setup()
from test.helper import get_params
import io

View File

@@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (
@@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3)
def test_youtube_mix(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
entries = result['entries']
self.assertTrue(len(entries) >= 20)
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
if __name__ == '__main__':
unittest.main()

View File

@@ -6,9 +6,6 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io
import re

View File

@@ -1,4 +1,3 @@
import math
import os
import re
import subprocess
@@ -11,6 +10,7 @@ from .utils import (
ContentTooShortError,
determine_ext,
encodeFilename,
format_bytes,
sanitize_open,
timeconvert,
)
@@ -53,20 +53,6 @@ class FileDownloader(object):
self._progress_hooks = []
self.params = params
@staticmethod
def format_bytes(bytes):
if bytes is None:
return 'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
@@ -117,7 +103,7 @@ class FileDownloader(object):
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -270,6 +256,61 @@ class FileDownloader(object):
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
@@ -280,12 +321,11 @@ class FileDownloader(object):
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
@@ -299,30 +339,48 @@ class FileDownloader(object):
if live:
basic_args += ['--live']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args)
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
@@ -525,7 +583,7 @@ class FileDownloader(object):
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()

View File

@@ -7,8 +7,10 @@ import errno
import io
import json
import os
import platform
import re
import shutil
import subprocess
import socket
import sys
import time
@@ -18,6 +20,7 @@ if os.name == 'nt':
import ctypes
from .utils import (
compat_cookiejar,
compat_http_client,
compat_print,
compat_str,
@@ -30,9 +33,12 @@ from .utils import (
DownloadError,
encodeFilename,
ExtractorError,
format_bytes,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PostProcessingError,
platform_name,
preferredencoding,
SameFileError,
sanitize_filename,
@@ -41,9 +47,11 @@ from .utils import (
UnavailableVideoError,
write_json_file,
write_string,
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
from .version import __version__
class YoutubeDL(object):
@@ -118,9 +126,12 @@ class YoutubeDL(object):
noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
downloadarchive: File name of a file where all downloads are recorded.
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -135,7 +146,7 @@ class YoutubeDL(object):
_num_downloads = None
_screen_file = None
def __init__(self, params):
def __init__(self, params={}):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._ies_instances = {}
@@ -144,6 +155,7 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = params
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -153,14 +165,15 @@ class YoutubeDL(object):
u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True
self.params['restrictfilenames'] = True
self.params = params
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params['outtmpl']:
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self._setup_opener()
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -241,10 +254,9 @@ class YoutubeDL(object):
def __exit__(self, *args):
self.restore_console_title()
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
@@ -782,13 +794,15 @@ class YoutubeDL(object):
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
if (len(url_list) > 1 and
'%' not in self.params['outtmpl']
and self.params.get('max_downloads') != 1):
raise SameFileError(self.params['outtmpl'])
for url in url_list:
try:
#It also downloads the videos
videos = self.extract_info(url)
self.extract_info(url)
except UnavailableVideoError:
self.report_error(u'unable to download video')
except MaxDownloadsReached:
@@ -820,20 +834,26 @@ class YoutubeDL(object):
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return False
extractor = info_dict.get('extractor_id')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + u' ' + info_dict['id']
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return False
vid_id = self._make_archive_id(info_dict)
if vid_id is None:
return False # Incomplete video information
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = extractor.lower()
vid_id = extractor + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
@@ -848,12 +868,15 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n')
@staticmethod
def format_resolution(format, default='unknown'):
if format.get('vcodec') == 'none':
return 'audio only'
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('height') is not None:
@@ -867,10 +890,11 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def format_note(fdict):
if fdict.get('format_note') is not None:
return fdict['format_note']
res = u''
if fdict.get('vcodec') is not None:
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
@@ -886,25 +910,100 @@ class YoutubeDL(object):
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
if fdict.get('filesize') is not None:
if res:
res += u', '
res += format_bytes(fdict['filesize'])
return res
def line(format):
return (u'%-20s%-10s%-12s%s' % (
def line(format, idlen=20):
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
format_note(format),
)
)
))
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
idlen = max(len(u'format code'),
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'})
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))
def urlopen(self, req):
""" Start an HTTP download """
return self._opener.open(req)
def print_debug_header(self):
if not self.params.get('verbose'):
return
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self, timeout=20):
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(
self.params.get('nocheckcertificate', False))
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
# TODO remove this global modification
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)

View File

@@ -41,45 +41,35 @@ __authors__ = (
__license__ = 'Public Domain'
import codecs
import collections
import getpass
import optparse
import os
import random
import re
import shlex
import socket
import subprocess
import sys
import traceback
import platform
from .utils import (
compat_cookiejar,
compat_print,
compat_str,
compat_urllib_request,
DateRange,
decodeOption,
determine_ext,
DownloadError,
get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached,
platform_name,
preferredencoding,
SameFileError,
std_headers,
write_string,
YoutubeDLHandler,
)
from .update import update_self
from .version import __version__
from .FileDownloader import (
FileDownloader,
)
from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL
from .PostProcessor import (
FFmpegMetadataPP,
@@ -216,7 +206,9 @@ def parseOpts(overrideArguments=None):
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--max-downloads', metavar='NUMBER',
dest='max_downloads', type=int, default=None,
help='Abort after downloading NUMBER files')
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
@@ -452,19 +444,6 @@ def _real_main(argv=None):
parser, opts, args = parseOpts(argv)
# Open appropriate CookieJar
if opts.cookiefile is None:
jar = compat_cookiejar.CookieJar()
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@@ -496,8 +475,6 @@ def _real_main(argv=None):
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors()
if opts.list_extractors:
@@ -552,7 +529,7 @@ def _real_main(argv=None):
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@@ -563,13 +540,13 @@ def _real_main(argv=None):
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@@ -672,34 +649,13 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy,
}
with YoutubeDL(ydl_opts) as ydl:
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
@@ -730,46 +686,9 @@ def _real_main(argv=None):
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
try:
jar.save()
except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode)
def _setup_opener(jar=None, opts=None, timeout=300):
if opts is None:
FakeOptions = collections.namedtuple(
'FakeOptions', ['proxy', 'no_check_certificate'])
opts = FakeOptions(proxy=None, no_check_certificate=False)
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
if opts.proxy is not None:
if opts.proxy == '':
proxies = {}
else:
proxies = {'http': opts.proxy, 'https': opts.proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)
return opener
def main(argv=None):
try:
_real_main(argv)

View File

@@ -20,9 +20,11 @@ from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
@@ -70,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
@@ -104,6 +107,7 @@ from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
@@ -185,6 +189,7 @@ from .youtube import (
YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
)
from .zdf import ZDFIE

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text

View File

@@ -1,7 +1,6 @@
# encoding: utf-8
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:

View File

@@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
u'uploader': u'pixelversity',
u'uploader_id': u'344706',
},
u'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
u'skip_download': True,
},
}
def _real_extract(self, url):

View File

@@ -76,18 +76,21 @@ class BrightcoveIE(InfoExtractor):
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
}
def find_param(name):
return find_xpath_attr(object_doc, './param', 'name', name)
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
playerKey = find_param('playerKey')
# Not all pages define this value
if playerKey is not None:
params['playerKey'] = playerKey.attrib['value']
params['playerKey'] = playerKey
# The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value']
params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id,
doc = self._download_xml(info_url,video_id,
u'Downloading video info')
self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS')
media = video_info.find('MEDIA')

View File

@@ -0,0 +1,57 @@
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
info_url, video_id, note=u'Downloading info page')
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -0,0 +1,52 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
playlist_page = self._download_webpage(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info')
# Fix broken xml
playlist_page = re.sub('&', '&amp;', playlist_page)
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
path = mobj.group('path')
page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
info = self._download_xml(info_url, page_title)
formats = []
for f in info.findall('files/file'):

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
metaXml = self._download_webpage(xmlUrl, video_id,
mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError:

View File

@@ -1,7 +1,7 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag
from ..utils import (
compat_str,
compat_urllib_parse,
@@ -11,7 +11,37 @@ from ..utils import (
)
class ComedyCentralIE(InfoExtractor):
class ComedyCentralIE(MTVIE):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
u'md5': u'4167875aae411f903b751a21f357f1ee',
u'info_dict': {
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
u'ext': u'mp4',
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = u'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like:
@@ -127,13 +157,12 @@ class ComedyCentralIE(InfoExtractor):
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
indexXml = self._download_webpage(indexUrl, epTitle,
idoc = self._download_xml(indexUrl, epTitle,
u'Downloading show index',
u'unable to download episode index')
results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
@@ -144,10 +173,9 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
configXml = self._download_webpage(configUrl, epTitle,
cdoc = self._download_xml(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = []
for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)

View File

@@ -4,11 +4,11 @@ import re
import socket
import sys
import netrc
import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML,
)
class InfoExtractor(object):
"""Information Extractor class.
@@ -75,6 +76,7 @@ class InfoExtractor(object):
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@@ -156,7 +158,7 @@ class InfoExtractor(object):
elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
@@ -208,6 +210,12 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML'):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage(
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
urls_xml = self._download_webpage(
urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
@@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
'vid': full_id,
'profile': profile,
})
url_xml = self._download_webpage(
url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,

View File

@@ -1,7 +1,6 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config_xml = self._download_webpage(
config = self._download_xml(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text
return {

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id,
config = self._download_xml(config_xml_url, video_id,
u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS')
formats = []

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
@@ -11,11 +10,10 @@ from ..utils import (
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
info = self._download_xml(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')

View File

@@ -209,7 +209,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -236,18 +236,16 @@ class GenericIE(InfoExtractor):
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')
return [{
return {
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension,
}]
}

View File

@@ -0,0 +1,60 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
u'duration': 151,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
'height': format_info['height'],
'width': format_info['width'],
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
'duration': int(info['titleObject']['title']['duration_seconds']),
}

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
video_id = query_dic['publishedid'][0]
url = self._build_url(query)
flashconfiguration_xml = self._download_webpage(url, video_id,
flashconfiguration = self._download_xml(url, video_id,
u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()),
file_url)
info_xml = self._download_webpage(file_url, video_id,
info = self._download_xml(file_url, video_id,
u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item')
def _bp(p):

View File

@@ -2,7 +2,6 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID')
xml_config = self._download_webpage(
config = self._download_xml(
xml_link, title, u'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info_json = self._search_regex(
r'(?sm)<format\.json>(.*?)</format\.json>',
xml_config, u'JSON information')
info_json = config.find('format.json').text
info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']

View File

@@ -1,7 +1,6 @@
import json
import os
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
chapter_info_xml = self._download_webpage(api, chapter_id,
doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information',
errnote=u'Chapter information download failed')
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text:
break

View File

@@ -1,6 +1,5 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
info = self._download_xml(api_url, video_id)
item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']

View File

@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
'title': info['name'],
'url': final_song_url,
'ext': 'mp3',
'description': info['description'],
'description': info.get('description'),
'thumbnail': info['pictures'].get('extra_large'),
'uploader': info['user']['name'],
'uploader_id': info['user']['username'],

View File

@@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
u'Downloading info')
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url):

View File

@@ -1,5 +1,4 @@
import os.path
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
metadata = self._download_xml(metadata_url, video_id)
# extract values from metadata
url_flv_el = metadata.find('url_flv')

View File

@@ -1,6 +1,5 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
'protocol': 'p2p',
'inKey': key,
})
info_xml = self._download_webpage(
info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info')
urls_xml = self._download_webpage(
urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = all_info.find('video')
return {'id': video_id,
'title': info.find('headline').text,

View File

@@ -1,6 +1,5 @@
import re
import json
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id,
path_doc = self._download_xml(path_url, video_id,
u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin

View File

@@ -2,7 +2,6 @@
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info_webpage = self._download_webpage(
video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
@@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
@@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info_webpage = self._download_webpage(
user_info = self._download_xml(
url, video_id, note=u'Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return {
'id': video_id,

View File

@@ -0,0 +1,49 @@
import json
import re
from .common import InfoExtractor
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
u"file": u"2009-01-02T16_03_35-08_00.mp3",
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
u"info_dict": {
u"uploader": u"Science Teaching Tips",
u"uploader_id": u"scienceteachingtips",
u"title": u"64. When the Moon Hits Your Eye",
u"duration": 446,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
channel = mobj.group('channel')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
duration = int(data['length'] / 1000.0)
return {
'id': video_id,
'url': video_url,
'title': title,
'uploader': uploader,
'uploader_id': channel,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -1,7 +1,6 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url')
image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id,
'url': url_doc.find('./durl/url').text,

View File

@@ -76,44 +76,78 @@ class SoundcloudIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, quiet=False):
track_id = compat_str(info['id'])
name = full_title or track_id
if quiet == False:
if quiet:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = info.get('original_format', u'mp3')
result = {
'id': track_id,
'id': track_id,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
'ext': info.get('original_format', u'mp3'),
'title': info['title'],
'description': info['description'],
'thumbnail': thumbnail,
}
if info.get('downloadable', False):
# We can build a direct link to the song
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
format_url = (
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
'format_id': 'download',
'ext': ext,
'url': format_url,
'vcodec': 'none',
}]
else:
# We have to retrieve the url
stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
track_id, u'Downloading track url')
# There should be only one entry in the dictionary
key, stream_url = list(json.loads(stream_json).items())[0]
if key.startswith(u'http'):
result['url'] = stream_url
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
result.update({
'url': url,
'play_path': 'mp3:' + path,
})
else:
formats = []
format_dict = json.loads(stream_json)
for key, stream_url in format_dict.items():
if key.startswith(u'http'):
formats.append({
'format_id': key,
'ext': ext,
'url': stream_url,
'vcodec': 'none',
})
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
formats.append({
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': ext,
'vcodec': 'none',
})
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
formats.append({
'format_id': u'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'ext': ext,
'vcodec': 'none',
})
def format_pref(f):
if f['format_id'].startswith('http'):
return 2
if f['format_id'].startswith('rtmp'):
return 1
return 0
formats.sort(key=format_pref)
result['formats'] = formats
return result

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(
idoc = self._download_xml(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code)
formats = [
{
'format_id': n.tag.rpartition('type')[2],

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
qualities = ['500k', '480p', '1000k', '720p', '1080p']

View File

@@ -1,6 +1,5 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_webpage = self._download_webpage(
streams_doc = self._download_xml(
streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)

View File

@@ -1,6 +1,5 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_str = self._download_webpage(
format_doc = self._download_xml(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'

View File

@@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video')
sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v',
@@ -41,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v',
'title': video_title,
}
}

View File

@@ -1,6 +1,8 @@
import re
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
@@ -20,7 +22,8 @@ class VikiIE(SubtitlesInfoExtractor):
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
}
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
@@ -32,11 +35,12 @@ class VikiIE(SubtitlesInfoExtractor):
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
u'uploader')
if uploader is not None:
uploader = uploader.strip()
uploader_m = re.search(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
if uploader_m is None:
uploader = None
else:
uploader = uploader_m.group(1).strip()
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
@@ -51,7 +55,12 @@ class VikiIE(SubtitlesInfoExtractor):
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(info_url, video_id)
info_webpage = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError(
u'Video %s is blocked from your location.' % video_id,
expected=True)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
@@ -83,7 +92,8 @@ class VikiIE(SubtitlesInfoExtractor):
def _get_available_subtitles(self, video_id, info_webpage):
res = {}
for sturl in re.findall(r'<track src="([^"]+)"/>'):
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue

View File

@@ -46,7 +46,7 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
items_json = self._search_regex(r'mediaItems: ({.*?})$',
webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
@@ -91,17 +91,13 @@ class YahooIE(InfoExtractor):
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
info = {
return {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
class YahooSearchIE(SearchInfoExtractor):

View File

@@ -11,7 +11,6 @@ import socket
import string
import struct
import traceback
import xml.etree.ElementTree
import zlib
from .common import InfoExtractor, SearchInfoExtractor
@@ -29,6 +28,7 @@ from ..utils import (
clean_html,
get_cachedir,
get_element_by_id,
get_element_by_attribute,
ExtractorError,
unescapeHTML,
unified_strdate,
@@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'248': 'webm',
}
_video_dimensions = {
'5': '240x400',
'5': '400x240',
'6': '???',
'13': '???',
'17': '144x176',
'18': '360x640',
'22': '720x1280',
'34': '360x640',
'35': '480x854',
'36': '240x320',
'37': '1080x1920',
'38': '3072x4096',
'43': '360x640',
'44': '480x854',
'45': '720x1280',
'46': '1080x1920',
'17': '176x144',
'18': '640x360',
'22': '1280x720',
'34': '640x360',
'35': '854x480',
'36': '320x240',
'37': '1920x1080',
'38': '4096x3072',
'43': '640x360',
'44': '854x480',
'45': '1280x720',
'46': '1920x1080',
'82': '360p',
'83': '480p',
'84': '720p',
@@ -1144,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asrs': 1,
})
list_url = caption_url + '&' + list_params
list_page = self._download_webpage(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
caption_list = self._download_xml(list_url, video_id)
original_lang_node = caption_list.find('track')
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1528,7 +1527,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
@classmethod
@@ -1539,6 +1538,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _ids_to_results(self, ids):
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage))
title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
ids = orderedSet(re.findall(video_re, webpage))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1556,14 +1573,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
# The ids are duplicated
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
matches = re.finditer(self._VIDEO_RE, page)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
@@ -1571,8 +1594,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page)
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1769,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
_PAGING_STEP = 30
# use action_load_personal_feed instead of action_load_system_feed
_PERSONAL_FEED = False
@@ -1789,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
feed_entries = []
# The step argument is available only in 2.7 or higher
for i in itertools.count(0):
paging = i*self._PAGING_STEP
paging = 0
for i in itertools.count(1):
info = self._download_webpage(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i)
@@ -1804,6 +1824,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
for video_id in ids)
if info['paging'] is None:
break
paging = info['paging']
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1823,9 +1844,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = u'Youtube Watch Later'
_PAGING_STEP = 100
_PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
_PERSONAL_FEED = True
_PLAYLIST_TITLE = u'Youtube Watch History'
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'

View File

@@ -1,75 +1,125 @@
# coding: utf-8
import operator
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
unified_strdate,
)
class ZDFIE(InfoExtractor):
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
u"file": u"2037704.webm",
u"info_dict": {
u"upload_date": u"20131127",
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
u"uploader": u"spezial",
u"title": u"ZDFspezial - Ende des Machtpokers"
},
u"skip": u"Videos on ZDF.de are depublicised in short order",
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id')
if mobj.group('hash'):
url = url.replace(u'#', u'', 1)
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
doc = self._download_xml(
xml_url, video_id,
note=u'Downloading video info',
errnote=u'Failed to download video info')
html = self._download_webpage(url, video_id)
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
if streams is None:
raise ExtractorError(u'No media url found.')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text
duration_str = doc.find('.//details/length').text
duration_m = re.match(r'''(?x)^
(?P<hours>[0-9]{2})
:(?P<minutes>[0-9]{2})
:(?P<seconds>[0-9]{2})
(?:\.(?P<ms>[0-9]+)?)
''', duration_str)
duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m
else None
)
upload_date = unified_strdate(doc.find('.//details/airtime').text)
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
# choose first/default media type and highest quality for now
def stream_pref(s):
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
def xml_to_format(fnode):
video_url = fnode.find('url').text
is_available = u'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = format_m.group('container')
is_supported = ext != 'f4f'
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
try:
type_pref = TYPE_ORDER.index(s['media_type'])
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
except ValueError:
type_pref = 999
proto_pref = 999
QUALITY_ORDER = ['veryhigh', '300']
quality = fnode.find('./quality').text
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
try:
quality_pref = QUALITY_ORDER.index(s['quality'])
quality_pref = -QUALITY_ORDER.index(quality)
except ValueError:
quality_pref = 999
return (type_pref, quality_pref)
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000
pref = (is_available, is_supported,
proto_pref, quality_pref, vbr, abr)
sorted_streams = sorted(streams, key=stream_pref)
if not sorted_streams:
raise ExtractorError(u'No stream found.')
stream = sorted_streams[0]
format_note = u''
if not is_supported:
format_note += u'(unsupported)'
if not format_note:
format_note = None
media_link = self._download_webpage(
stream['video_url'],
video_id,
u'Get stream URL')
return {
'format_id': format_id + u'-' + quality,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': int(fnode.find('./width').text),
'height': int(fnode.find('./height').text),
'filesize': int(fnode.find('./filesize').text),
'format_note': format_note,
'_pref': pref,
'_available': is_available,
}
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
mobj = re.search(self._MEDIA_STREAM, media_link)
if mobj is None:
mobj = re.search(RTSP_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
video_url = mobj.group('video_url')
title = self._html_search_regex(
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
html, u'title')
format_nodes = doc.findall('.//formitaeten/formitaet')
formats = sorted(filter(lambda f: f['_available'],
map(xml_to_format, format_nodes)),
key=operator.itemgetter('_pref'))
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': determine_ext(video_url)
'formats': formats,
'description': description,
'uploader': uploader,
'duration': duration,
'upload_date': upload_date,
}

View File

@@ -8,6 +8,7 @@ import gzip
import io
import json
import locale
import math
import os
import pipes
import platform
@@ -16,6 +17,7 @@ import ssl
import socket
import sys
import traceback
import xml.etree.ElementTree
import zlib
try:
@@ -536,8 +538,7 @@ def formatSeconds(secs):
else:
return '%d' % secs
def make_HTTPS_handler(opts):
def make_HTTPS_handler(opts_no_check_certificate):
if sys.version_info < (3, 2):
import httplib
@@ -552,7 +553,7 @@ def make_HTTPS_handler(opts):
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError as e:
except ssl.SSLError:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
@@ -564,7 +565,7 @@ def make_HTTPS_handler(opts):
context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE
if opts.no_check_certificate
if opts_no_check_certificate
else ssl.CERT_REQUIRED)
return compat_urllib_request.HTTPSHandler(context=context)
@@ -1006,3 +1007,17 @@ def unsmuggle_url(smug_url):
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
def format_bytes(bytes):
if bytes is None:
return u'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return u'%.2f%s' % (converted, suffix)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.11.24'
__version__ = '2013.11.29'