Compare commits

...

88 Commits

Author SHA1 Message Date
Philipp Hagemeister
ddfd0f2727 release 2014.11.26 2014-11-26 10:46:12 +01:00
Philipp Hagemeister
d0720e7118 Merge branch 'master' of github.com:rg3/youtube-dl 2014-11-26 10:45:57 +01:00
Philipp Hagemeister
4e262a8838 [generic] Detect direct video links (Fixes #4149, #4313) 2014-11-26 10:44:39 +01:00
Sergey M․
b9ed3af343 [tass] Add extractor (Closes #4296) 2014-11-25 22:24:33 +06:00
Philipp Hagemeister
63c9b2c1d9 release 2014.11.25.1 2014-11-25 14:34:29 +01:00
Philipp Hagemeister
65f3a228b1 [generic] Add support for LazyYT embeds (Fixes #4306) 2014-11-25 14:34:19 +01:00
Philipp Hagemeister
3004ae2c3a Credit @t0mm0 for xminus (#4302) 2014-11-25 12:16:48 +01:00
Philipp Hagemeister
d9836a5917 release 2014.11.25 2014-11-25 09:56:52 +01:00
Philipp Hagemeister
be64b5b098 [xminus] Simplify and extend (#4302) 2014-11-25 09:54:54 +01:00
Philipp Hagemeister
c3e74731c2 [README] Mention _og_search_description (#4304)
Lots of sites do have this meta tag, so just add it to the example.
2014-11-25 09:36:27 +01:00
Philipp Hagemeister
c920d7f00d [README] Adapt code to new style
Next to every IE will download the webpage first anyways.
2014-11-25 09:23:46 +01:00
Philipp Hagemeister
0bbf12239c Merge remote-tracking branch 't0mm0/x-minus' 2014-11-25 09:22:33 +01:00
Philipp Hagemeister
70d68eb46f Credit @MatthewRayfield for tmz (#4304) 2014-11-25 09:17:59 +01:00
Philipp Hagemeister
c553fe5d29 [tmz] Simplify (#4304) 2014-11-25 09:16:40 +01:00
Matthew Rayfield
f0c3d729d7 [tmz] Add new extractor 2014-11-25 02:54:13 -05:00
t0mm0
1cdedfee10 [XMinus] Added new extractor. 2014-11-25 03:25:28 +00:00
Philipp Hagemeister
93129d9442 release 2014.11.24 2014-11-24 22:56:43 +01:00
Philipp Hagemeister
e8c8653e9d Merge remote-tracking branch 'origin/master' 2014-11-24 22:52:04 +01:00
Philipp Hagemeister
fab89c67c5 Credit @ossi96 for bpb (#4298) 2014-11-24 22:47:49 +01:00
Philipp Hagemeister
3d960a22fa [bpb] Simplify (#4298) 2014-11-24 22:47:23 +01:00
Philipp Hagemeister
51bbb084d3 Merge remote-tracking branch 'ossi96/bpb' 2014-11-24 22:42:56 +01:00
Naglis Jonaitis
2c25a2bd29 [tunein] Add new extractor (Closes #4097) 2014-11-24 23:15:33 +02:00
Oskar Jauch
355682be01 bpb Add new extractor 2014-11-24 20:02:00 +01:00
Jaime Marquínez Ferrándiz
00e9d396ab [francetv] Use the m3u8 manifest for georestricted videos (closes #3963)
Generating the correct urls for the f4m segments seems to require a lot of work.
Also raise an error if the video is not available from your location.
2014-11-24 19:49:43 +01:00
Philipp Hagemeister
14d4e90eb1 [downloader/__init__] Define proper __all__ 2014-11-23 22:25:12 +01:00
Philipp Hagemeister
b74e86f48a Fix all PEP8 issues except E501 2014-11-23 22:21:46 +01:00
Philipp Hagemeister
3d36cea4ac [vk] PEP8 2014-11-23 22:14:27 +01:00
Philipp Hagemeister
380b822003 Remove outdated transition helper scripts 2014-11-23 22:13:03 +01:00
Philipp Hagemeister
b66e699877 [myspace] pep8 and modernization 2014-11-23 22:12:18 +01:00
Philipp Hagemeister
27f8b0994e Merge remote-tracking branch 'jtwaleson/master' 2014-11-23 22:10:26 +01:00
Philipp Hagemeister
e311b6389a Credit @daohoangson for zingmp3 (#4288) 2014-11-23 22:01:15 +01:00
Jouke Waleson
fab6d4c048 remove useless line, the result is never used 2014-11-23 22:00:35 +01:00
Philipp Hagemeister
4ffc31033e [zingmp3] Simplify and PEP8 (#4288) 2014-11-23 22:00:25 +01:00
Philipp Hagemeister
c1777d5cb3 Merge remote-tracking branch 'daohoangson/zing-mp3' 2014-11-23 21:55:51 +01:00
Jouke Waleson
9e1a5b8455 PEP8: applied even more rules 2014-11-23 21:39:15 +01:00
Philipp Hagemeister
784b6d3a9b Merge remote-tracking branch 'jtwaleson/master' 2014-11-23 21:33:31 +01:00
Dao Hoang Son
c66bdc4869 [zingmp3] Added support for songs and albums 2014-11-24 03:25:47 +07:00
Jouke Waleson
2514d2635e PEP8: E225,E227 2014-11-23 21:23:05 +01:00
Jouke Waleson
8bcc875676 PEP8: more applied 2014-11-23 21:20:46 +01:00
Jouke Waleson
5f6a1245ff PEP8 applied 2014-11-23 20:41:03 +01:00
Philipp Hagemeister
f3a3407226 [youtube] Clarify keywords 2014-11-23 20:09:10 +01:00
Sergey M․
598c218f7b [smotri] Adapt to new API and modernize 2014-11-23 23:53:41 +06:00
Naglis Jonaitis
4698b14b76 [rtlxl] Strip additional dot from video URL (#4115) 2014-11-23 13:28:09 +02:00
Philipp Hagemeister
835a22ef3f release 2014.11.23.1 2014-11-23 10:51:16 +01:00
Philipp Hagemeister
7d4111ed14 Provide guidance when called with a YouTube ID starting with a dash.
Reported at https://news.ycombinator.com/item?id=8648121
2014-11-23 10:51:09 +01:00
Philipp Hagemeister
d37cab2a9d Credit @WillSewell for vk:user (#4233) 2014-11-23 10:12:35 +01:00
Philipp Hagemeister
d16abf434a [vk] Some PEP8 love 2014-11-23 10:11:52 +01:00
Philipp Hagemeister
a8363f3ab7 [vk] Clarify test 2014-11-23 10:11:04 +01:00
Philipp Hagemeister
010cd3a3ee Merge remote-tracking branch 'WillSewell/vk-playlists' 2014-11-23 10:09:45 +01:00
Philipp Hagemeister
b9042def9d release 2014.11.23 2014-11-23 09:59:42 +01:00
Philipp Hagemeister
aa79ac0c82 [youtube] Support controversy videos (Fixes #4275) 2014-11-23 09:59:02 +01:00
Philipp Hagemeister
88125905cf Credit @nulloz for telebruxelles (#4270) 2014-11-23 09:49:23 +01:00
Philipp Hagemeister
dd60be2bf9 [telebruxelles] Simplify (#4270) 2014-11-23 09:44:42 +01:00
Philipp Hagemeister
119b3caa46 Merge remote-tracking branch 'nulloz/telebruxelles' 2014-11-23 09:38:18 +01:00
Naglis Jonaitis
49f0da7ae1 [rtlxl] Use unencrypted m3u8 streams (#4115) 2014-11-22 21:06:45 +02:00
nulloz
2cead7e7bc telebruxelles Add new extractor 2014-11-22 13:34:29 +01:00
Will Sewell
9262867e86 [vk.com] Added newline at the end of the file. 2014-11-21 23:25:05 +00:00
Will Sewell
b9272e8f8f [vk.com] Removed redundant log message -- this information is already being logged. 2014-11-21 23:22:52 +00:00
Will Sewell
021a0db8f7 [vk.com] Simplified the page_id acquisition by using the id matched in the URL earlier on. 2014-11-21 23:22:44 +00:00
Will Sewell
e1e8b6897b [vk.com] Updated the extract_videos_from_page function with a much simpler 1-liner. 2014-11-21 23:16:12 +00:00
Will Sewell
53d1cd1f77 [vk.com] Updated the _VALID_URL regex for the playlist IE. Removed optional m, and named the id group. 2014-11-21 23:03:31 +00:00
Will Sewell
cad985ab4d [vk.com] Updated the description to include vk.com. 2014-11-21 23:00:43 +00:00
Will Sewell
c52331f30c [vk.com] Updated a test video that has been removed, and added a comment for others to update two other test videos that are also now removed. 2014-11-21 23:00:33 +00:00
Will Sewell
42e1ff8665 [vk.com] Added upload_date variable to the test cases that still work. 2014-11-21 23:00:17 +00:00
Philipp Hagemeister
2c64b8ba63 release 2014.11.21.1 2014-11-21 22:47:23 +01:00
Philipp Hagemeister
42e12102a9 [YoutubeDL] Fix multi_video check 2014-11-21 22:39:57 +01:00
Philipp Hagemeister
6127693ed9 [folketinget] Add extractor (Fixes #4262) 2014-11-21 22:36:24 +01:00
Philipp Hagemeister
71069d2157 [sztv] Remove useless determine_ext call 2014-11-21 22:03:29 +01:00
Philipp Hagemeister
f3391db889 [sztvhu] Modernize 2014-11-21 22:02:16 +01:00
Philipp Hagemeister
9b32eca3ce [generic] Add support for single quotes in HTML5 videos (Fixes #4265) 2014-11-21 22:01:25 +01:00
Philipp Hagemeister
ec06f0f610 release 2014.11.21 2014-11-21 10:41:18 +01:00
Philipp Hagemeister
e6c9c8f6ee Merge pull request #4261 from tinybug/patch-4
Update jsinterp.py
2014-11-21 10:41:02 +01:00
tinybug
85b9275517 Update jsinterp.py
http://s.ytimg.com/yts/jsbin/html5player-zh_HK-vfl1NK6PR/html5player.js 
fix raise ExtractorError
2014-11-21 17:09:22 +08:00
Philipp Hagemeister
dfd5313afd [YoutubeDL] Support new _type multi_video 2014-11-21 00:25:46 +01:00
Philipp Hagemeister
be53e2a737 [blip.tv:user] Modernize and add a test 2014-11-21 00:25:13 +01:00
Philipp Hagemeister
a1c68b9ef2 Merge remote-tracking branch 'origin/master' 2014-11-21 00:17:58 +01:00
Jaime Marquínez Ferrándiz
4d46c1c68c [brightcove] Improve error message detection (#4256) 2014-11-20 18:44:54 +01:00
Jaime Marquínez Ferrándiz
d6f714f321 [brightcove] Remove the namespace from the BrightcoveExperience html object 2014-11-20 18:37:08 +01:00
Philipp Hagemeister
8569f3d629 [vh1] Modernize 2014-11-20 16:51:33 +01:00
Philipp Hagemeister
fed5d03260 [extractor/common] Document _type values (Motivated by #4254) 2014-11-20 16:47:59 +01:00
Philipp Hagemeister
6adeffa7c6 [comedycentral] Modernize 2014-11-20 16:36:53 +01:00
Philipp Hagemeister
b244b5c3f9 remove unused imports 2014-11-20 16:36:13 +01:00
Philipp Hagemeister
f42c190769 [stanfordoc] Modernize 2014-11-20 16:34:54 +01:00
Philipp Hagemeister
c9bf41145f [YoutubeDL] Warn if an extractor returns compat_list 2014-11-20 16:29:31 +01:00
Philipp Hagemeister
5239075bb6 [mtv] Return a proper playlist result (#4254) 2014-11-20 16:25:19 +01:00
Will Sewell
02a12f9fe6 [vk] date_added is now extracted from the video page. 2014-11-18 20:19:56 +00:00
Will Sewell
6fcd6e0e21 [vk] Updated the regex for matching user video pages. It now matches optional URL parameters too. 2014-11-18 19:34:12 +00:00
Will Sewell
469d4c8968 [vk] Added a new information extractor for pages that are a list of a user\'s videos on vk.com. It works in a same way to playlist style pages for the YT information extractors. 2014-11-17 17:53:34 -05:00
220 changed files with 1758 additions and 1094 deletions

View File

@@ -82,3 +82,9 @@ Xavier Beynon
Gabriel Schubiner
xantares
Jan Matějka
Mauroy Sébastien
William Sewell
Dao Hoang Son
Oskar Jauch
Matthew Rayfield
t0mm0

View File

@@ -492,14 +492,15 @@ If you want to add support for a new site, you can follow this quick list (assum
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# TODO more code goes here, for example ...
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
# TODO more properties (see youtube_dl/extractor/common.py)
}
```

View File

@@ -9,6 +9,7 @@ import youtube_dl
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
def build_completion(opt_parser):
opts_flag = []
for group in opt_parser.option_groups:

View File

@@ -233,6 +233,7 @@ def rmtree(path):
#==============================================================================
class BuildError(Exception):
def __init__(self, output, code=500):
self.output = output

View File

@@ -23,13 +23,13 @@ EXTRA_ARGS = {
'batch-file': ['--require-parameter'],
}
def build_completion(opt_parser):
commands = []
for group in opt_parser.option_groups:
for option in group.option_list:
long_option = option.get_opt_string().strip('-')
help_msg = shell_quote([option.help])
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
if option._short_opts:
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]

View File

@@ -1,8 +1,5 @@
#!/usr/bin/env python3
import hashlib
import shutil
import subprocess
import tempfile
import urllib.request
import json

View File

@@ -73,4 +73,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
atom_file.write(atom_template)

View File

@@ -9,6 +9,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
import youtube_dl
def main():
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
@@ -21,7 +22,7 @@ def main():
continue
elif ie_desc is not None:
ie_html += ': {}'.format(ie.IE_DESC)
if ie.working() == False:
if not ie.working():
ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html))

View File

@@ -1,40 +0,0 @@
#!/usr/bin/env python
import sys, os
try:
import urllib.request as compat_urllib_request
except ImportError: # Python 2
import urllib2 as compat_urllib_request
sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
sys.stderr.write(u'The new location of the binaries is https://github.com/rg3/youtube-dl/downloads, not the git repository.\n\n')
try:
raw_input()
except NameError: # Python 3
input()
filename = sys.argv[0]
API_URL = "https://api.github.com/repos/rg3/youtube-dl/downloads"
BIN_URL = "https://github.com/downloads/rg3/youtube-dl/youtube-dl"
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
try:
urlh = compat_urllib_request.urlopen(BIN_URL)
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
sys.exit('ERROR: unable to download latest version')
try:
with open(filename, 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
sys.exit('ERROR: unable to overwrite current version')
sys.stderr.write(u'Done! Now you can run youtube-dl.\n')

View File

@@ -1,12 +0,0 @@
from distutils.core import setup
import py2exe
py2exe_options = {
"bundle_files": 1,
"compressed": 1,
"optimize": 2,
"dist_dir": '.',
"dll_excludes": ['w9xpopen.exe']
}
setup(console=['youtube-dl.py'], options={ "py2exe": py2exe_options }, zipfile=None)

View File

@@ -1,102 +0,0 @@
#!/usr/bin/env python
import sys, os
import urllib2
import json, hashlib
def rsa_verify(message, signature, key):
from struct import pack
from hashlib import sha256
from sys import version_info
def b(x):
if version_info[0] == 2: return x
else: return x.encode('latin1')
assert(type(message) == type(b('')))
block_size = 0
n = key[0]
while n:
block_size += 1
n >>= 8
signature = pow(int(signature, 16), key[1], key[0])
raw_bytes = []
while signature:
raw_bytes.insert(0, pack("B", signature & 0xFF))
signature >>= 8
signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
if signature[0:2] != b('\x00\x01'): return False
signature = signature[2:]
if not b('\x00') in signature: return False
signature = signature[signature.index(b('\x00'))+1:]
if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
signature = signature[19:]
if signature != sha256(message).digest(): return False
return True
sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
raw_input()
filename = sys.argv[0]
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
exe = os.path.abspath(filename)
directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % directory)
try:
versions_info = urllib2.urlopen(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
except:
sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
if not 'signature' in versions_info:
sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
signature = versions_info['signature']
del versions_info['signature']
if not rsa_verify(json.dumps(versions_info, sort_keys=True), signature, UPDATES_RSA_KEY):
sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
version = versions_info['versions'][versions_info['latest']]
try:
urlh = urllib2.urlopen(version['exe'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
sys.exit('ERROR: unable to download latest version')
newcontent_hash = hashlib.sha256(newcontent).hexdigest()
if newcontent_hash != version['exe'][1]:
sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
try:
with open(exe + '.new', 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
sys.exit(u'ERROR: unable to write the new version')
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w')
b.write("""
echo Updating youtube-dl...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s"
del "%s"
\n""" %(exe, exe, bat))
b.close()
os.startfile(bat)
except (IOError, OSError) as err:
sys.exit('ERROR: unable to overwrite current version')
sys.stderr.write(u'Done! Now you can run youtube-dl.\n')

View File

@@ -4,7 +4,6 @@
from __future__ import print_function
import os.path
import pkg_resources
import warnings
import sys

View File

@@ -72,8 +72,10 @@ class FakeYDL(YoutubeDL):
def expect_warning(self, regex):
# Silence an expected warning matching a regex
old_report_warning = self.report_warning
def report_warning(self, message):
if re.match(regex, message): return
if re.match(regex, message):
return
old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self)

View File

@@ -266,6 +266,7 @@ class TestFormatSelection(unittest.TestCase):
'ext': 'mp4',
'width': None,
}
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
return ydl.prepare_filename(info)

View File

@@ -40,18 +40,22 @@ from youtube_dl.extractor import get_info_extractor
RETRIES = 3
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen
self.processed_info_dicts = []
super(YoutubeDL, self).__init__(*args, **kwargs)
def report_warning(self, message):
# Don't accept warnings during tests
raise ExtractorError(message)
def process_info(self, info_dict):
self.processed_info_dicts.append(info_dict)
return super(YoutubeDL, self).process_info(info_dict)
def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
@@ -61,10 +65,13 @@ defs = gettestcases()
class TestDownload(unittest.TestCase):
maxDiff = None
def setUp(self):
self.defs = defs
### Dynamically generate tests
# Dynamically generate tests
def generator(test_case):
def test_template(self):
@@ -101,6 +108,7 @@ def generator(test_case):
ydl = YoutubeDL(params, auto_init=False)
ydl.add_default_info_extractors()
finished_hook_called = set()
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
@@ -111,6 +119,7 @@ def generator(test_case):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
res_dict = None
def try_rm_tcs_files(tcs=None):
if tcs is None:
tcs = test_cases
@@ -206,7 +215,7 @@ def generator(test_case):
return test_template
### And add them to TestDownload
# And add them to TestDownload
for n, test_case in enumerate(defs):
test_method = generator(test_case)
tname = 'test_' + str(test_case['name'])

View File

@@ -23,6 +23,7 @@ from youtube_dl.extractor import (
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)

View File

@@ -45,8 +45,9 @@ from youtube_dl.utils import (
escape_rfc3986,
escape_url,
js_to_json,
get_filesystem_encoding,
intlist_to_bytes,
args_to_str,
parse_filesize,
)
@@ -361,5 +362,20 @@ class TestUtil(unittest.TestCase):
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
def test_args_to_str(self):
self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\''
)
def test_parse_filesize(self):
self.assertEqual(parse_filesize(None), None)
self.assertEqual(parse_filesize(''), None)
self.assertEqual(parse_filesize('91 B'), 91)
self.assertEqual(parse_filesize('foobar'), None)
self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
if __name__ == '__main__':
unittest.main()

View File

@@ -31,17 +31,16 @@ params = get_params({
})
TEST_ID = 'gr51aVj-mLg'
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
class TestAnnotations(unittest.TestCase):
def setUp(self):
# Clear old files
self.tearDown()
def test_info_json(self):
expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
ie = youtube_dl.extractor.YoutubeIE()
@@ -71,7 +70,6 @@ class TestAnnotations(unittest.TestCase):
# We should have seen (and removed) all the expected annotation texts.
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
def tearDown(self):
try_rm(ANNOTATIONS_FILE)

View File

@@ -12,10 +12,6 @@ from test.helper import FakeYDL
from youtube_dl.extractor import (
YoutubePlaylistIE,
YoutubeIE,
YoutubeChannelIE,
YoutubeShowIE,
YoutubeTopListIE,
YoutubeSearchURLIE,
)

View File

@@ -60,6 +60,7 @@ from .utils import (
write_string,
YoutubeDLHandler,
prepend_extension,
args_to_str,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
@@ -253,6 +254,22 @@ class YoutubeDL(object):
self.print_debug_header()
self.add_default_info_extractors()
def warn_if_short_id(self, argv):
# short YouTube ID starting with dash?
idxs = [
i for i, a in enumerate(argv)
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs:
correct_argv = (
['youtube-dl'] +
[a for i, a in enumerate(argv) if i not in idxs] +
['--'] + [argv[i] for i in idxs]
)
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s\n' %
args_to_str(correct_argv))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -624,7 +641,7 @@ class YoutubeDL(object):
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
elif result_type == 'playlist' or result_type == 'multi_video':
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen('[download] Downloading playlist: %s' % playlist)
@@ -679,14 +696,20 @@ class YoutubeDL(object):
ie_result['entries'] = playlist_results
return ie_result
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
'It needs to be updated.' % ie_result.get('extractor'))
def _fixup(r):
self.add_extra_info(r,
self.add_extra_info(
r,
{
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
}
)
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)

View File

@@ -128,7 +128,6 @@ def _real_main(argv=None):
compat_print(desc)
sys.exit(0)
# Conflicting, missing and erroneous options
if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error('using .netrc conflicts with giving username/password')
@@ -190,7 +189,7 @@ def _real_main(argv=None):
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
# this was the old behaviour if only --all-sub was given.
if opts.allsubtitles and (opts.writeautomaticsub == False):
if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
if sys.version_info < (3,):
@@ -317,7 +316,6 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
@@ -334,11 +332,12 @@ def _real_main(argv=None):
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
if not (opts.update_self or opts.rm_cachedir):
parser.error('you must provide at least one URL')
else:
if opts.update_self or opts.rm_cachedir:
sys.exit()
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
parser.error('you must provide at least one URL')
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)

View File

@@ -7,6 +7,7 @@ from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@@ -32,6 +33,7 @@ def aes_ctr_decrypt(data, key, counter):
return decrypted_data
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
@@ -57,6 +59,7 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@@ -91,6 +94,7 @@ def key_expansion(data):
return data
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
@@ -111,6 +115,7 @@ def aes_encrypt(data, expanded_key):
return data
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
@@ -131,6 +136,7 @@ def aes_decrypt(data, expanded_key):
return data
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
@@ -157,6 +163,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
class Counter:
__value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
@@ -241,15 +248,19 @@ RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data):
return [SBOX[x] for x in data]
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
@@ -257,14 +268,17 @@ def key_schedule_core(data, rcon_iteration):
return data
def xor(data1, data2):
return [x ^ y for x, y in zip(data1, data2)]
def rijndael_mul(a, b):
if(a == 0 or b == 0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
def mix_column(data, matrix):
data_mixed = []
for row in range(4):
@@ -275,6 +289,7 @@ def mix_column(data, matrix):
data_mixed.append(mixed)
return data_mixed
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
@@ -282,9 +297,11 @@ def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed += mix_column(column, matrix)
return data_mixed
def mix_columns_inv(data):
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
def shift_rows(data):
data_shifted = []
for column in range(4):
@@ -292,6 +309,7 @@ def shift_rows(data):
data_shifted.append(data[((column + row) & 0b11) * 4 + row])
return data_shifted
def shift_rows_inv(data):
data_shifted = []
for column in range(4):
@@ -299,6 +317,7 @@ def shift_rows_inv(data):
data_shifted.append(data[((column - row) & 0b11) * 4 + row])
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data) - 1, -1, -1):

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import getpass
import optparse
import os
import re
import subprocess
import sys
@@ -174,12 +175,17 @@ try:
from shlex import quote as shlex_quote
except ImportError: # Python < 3.3
def shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
if type(c) is int:
return c
else:
return ord(c)
if sys.version_info >= (3, 0):
@@ -306,7 +312,7 @@ def workaround_optparse_bug9161():
og = optparse.OptionGroup(op, 'foo')
try:
og.add_option('-t')
except TypeError as te:
except TypeError:
real_add_option = optparse.OptionGroup.add_option
def _compat_add_option(self, *args, **kwargs):

View File

@@ -30,3 +30,8 @@ def get_suitable_downloader(info_dict):
return F4mFD
else:
return HttpFD
__all__ = [
'get_suitable_downloader',
'FileDownloader',
]

View File

@@ -225,13 +225,15 @@ class F4mFD(FileDownloader):
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
http_dl = HttpQuietDownloader(self.ydl,
http_dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'test': self.params.get('test', False),
})
}
)
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]

View File

@@ -101,4 +101,3 @@ class NativeHlsFD(FileDownloader):
})
self.try_rename(tmpfilename, filename)
return True

View File

@@ -32,6 +32,7 @@ from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
@@ -115,6 +116,7 @@ from .fktv import (
FKTVPosteckeIE,
)
from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .fourtube import FourTubeIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
@@ -371,6 +373,7 @@ from .syfy import SyfyIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
from .tapely import TapelyIE
from .tass import TassIE
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
@@ -379,6 +382,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telemb import TeleMBIE
from .tenplay import TenPlayIE
@@ -390,6 +394,7 @@ from .thesixtyone import TheSixtyOneIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .tlc import TlcIE, TlcDeIE
from .tmz import TMZIE
from .tnaflix import TNAFlixIE
from .thvideo import (
THVideoIE,
@@ -403,6 +408,7 @@ from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tvigle import TvigleIE
@@ -452,7 +458,10 @@ from .vine import (
VineUserIE,
)
from .viki import VikiIE
from .vk import VKIE
from .vk import (
VKIE,
VKUserVideosIE,
)
from .vodlocker import VodlockerIE
from .vporn import VpornIE
from .vrt import VRTIE
@@ -476,6 +485,7 @@ from .wrzuta import WrzutaIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
from .xtube import XTubeUserIE, XTubeIE
@@ -506,6 +516,10 @@ from .youtube import (
YoutubeWatchLaterIE,
)
from .zdf import ZDFIE
from .zingmp3 import (
ZingMp3SongIE,
ZingMp3AlbumIE,
)
_ALL_CLASSES = [
klass

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {

View File

@@ -70,11 +70,13 @@ class AppleTrailersIE(InfoExtractor):
uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
s = re.sub(self._JSON_RE, _clean_json, s)

View File

@@ -192,4 +192,3 @@ class ARDIE(InfoExtractor):
'upload_date': upload_date,
'thumbnail': thumbnail,
}

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
find_xpath_attr,
unified_strdate,
get_element_by_id,

View File

@@ -73,7 +73,8 @@ class BambuserChannelIE(InfoExtractor):
urls = []
last_id = ''
for i in itertools.count(1):
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
req_url = (
'http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)

View File

@@ -166,9 +166,17 @@ class BlipTVIE(SubtitlesInfoExtractor):
class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
_TEST = {
'url': 'http://blip.tv/actone',
'info_dict': {
'id': 'actone',
'title': 'Act One: The Series',
},
'playlist_count': 5,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -179,6 +187,7 @@ class BlipTVUserIE(InfoExtractor):
page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
title = self._og_search_title(page)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
@@ -215,4 +224,5 @@ class BlipTVUserIE(InfoExtractor):
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return [self.playlist_result(url_entries, playlist_title=username)]
return self.playlist_result(
url_entries, playlist_title=title, playlist_id=username)

View File

@@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class BpbIE(InfoExtractor):
IE_DESC = 'Bundeszentrale für politische Bildung'
_VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
_TEST = {
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
'info_dict': {
'id': '297',
'ext': 'mp4',
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_url = self._html_search_regex(
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': title,
'description': self._og_search_description(webpage),
}

View File

@@ -111,6 +111,8 @@ class BrightcoveIE(InfoExtractor):
lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
# remove namespace to simplify extraction
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
object_str = fix_xml_ampersands(object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
@@ -219,7 +221,7 @@ class BrightcoveIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex(
r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
'error message', default=None)
if error_msg is not None:
raise ExtractorError(

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
@@ -187,7 +188,8 @@ class Channel9IE(InfoExtractor):
view_count = self._extract_view_count(html)
comment_count = self._extract_comment_count(html)
common = {'_type': 'video',
common = {
'_type': 'video',
'id': content_path,
'description': description,
'thumbnail': thumbnail,

View File

@@ -39,6 +39,7 @@ class ClipsyndicateIE(InfoExtractor):
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:

View File

@@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
'duration': 135,
'upload_date': '20130609',
},
},
{
}, {
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
"info_dict": {

View File

@@ -10,7 +10,8 @@ from ..utils import int_or_none
class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TESTS = [{
_TESTS = [
{
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'info_dict': {
@@ -21,8 +22,7 @@ class CollegeHumorIE(InfoExtractor):
'age_limit': 13,
'duration': 187,
},
},
{
}, {
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
@@ -33,9 +33,8 @@ class CollegeHumorIE(InfoExtractor):
'age_limit': 10,
'duration': 179,
},
},
}, {
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'Z-bao9fg6Yc',

View File

@@ -2,7 +2,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
@@ -110,9 +109,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
mobj = re.match(self._VALID_URL, url)
if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'):

View File

@@ -43,7 +43,11 @@ class InfoExtractor(object):
information possibly downloading the video to the file system, among
other possible outcomes.
The dictionaries must include the following fields:
The type field determines the the type of the result.
By far the most common value (and the default if _type is missing) is
"video", which indicates a single video.
For a video, the dictionaries must include the following fields:
id: Video identifier.
title: Video title, unescaped.
@@ -151,6 +155,38 @@ class InfoExtractor(object):
Unless mentioned otherwise, None is equivalent to absence of information.
_type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each
element of which is a valid dictionary under this specfication.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
_type "multi_video" indicates that there are multiple videos that
form a single show, for examples multiple acts of an opera or TV episode.
It must have an entries key like a playlist and contain all the keys
required for a video at the same time.
_type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract.
Additionally, it may have properties believed to be identical to the
resolved entity, for example "title" if the title of the referred video is
known ahead of time.
_type "url_transparent" entities have the same specification as "url", but
indicate that the given additional information is more precise than the one
associated with the resolved URL.
This is useful when a site employs a video service that hosts the video and
its technical metadata, but that video service does not embed a useful
title, description etc.
Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
@@ -260,9 +296,11 @@ class InfoExtractor(object):
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
return (content, urlh)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
@@ -398,6 +436,7 @@ class InfoExtractor(object):
if video_id is not None:
video_info['id'] = video_id
return video_info
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""

View File

@@ -69,11 +69,9 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(login_request, None, False, 'Wrong login info')
def _real_initialize(self):
self._login()
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
@@ -99,8 +97,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
return shaHash + [0] * 12
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
@@ -248,7 +248,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)

View File

@@ -18,6 +18,7 @@ from ..utils import (
unescapeHTML,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
@@ -27,6 +28,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
request.add_header('Cookie', 'ff=off')
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""

View File

@@ -11,15 +11,15 @@ from ..utils import url_basename
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
_TESTS = [{
_TESTS = [
{
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
'info_dict': {
'id': 'nelirfsxnmcfbfh',
'ext': 'mp4',
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
}
},
{
}, {
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
'only_matching': True,
},

View File

@@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
xpath_text,
)
class FolketingetIE(InfoExtractor):
IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
_TEST = {
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
'info_dict': {
'id': '1165642',
'ext': 'mp4',
'title': 'Åbent samråd i Erhvervsudvalget',
'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
'view_count': int,
'width': 768,
'height': 432,
'tbr': 928000,
'timestamp': 1416493800,
'upload_date': '20141120',
'duration': 3960,
},
'params': {
'skip_download': 'rtmpdump required',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
webpage, 'description', fatal=False)
player_params = compat_parse_qs(self._search_regex(
r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
webpage, 'player params'))
xml_url = player_params['xml'][0]
doc = self._download_xml(xml_url, video_id)
timestamp = parse_iso8601(xpath_text(doc, './/date'))
duration = parse_duration(xpath_text(doc, './/duration'))
width = int_or_none(xpath_text(doc, './/width'))
height = int_or_none(xpath_text(doc, './/height'))
view_count = int_or_none(xpath_text(doc, './/views'))
formats = [{
'format_id': n.attrib['bitrate'],
'url': xpath_text(n, './url', fatal=True),
'tbr': int_or_none(n.attrib['bitrate']),
} for n in doc.findall('.//streams/stream')]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'timestamp': timestamp,
'width': width,
'height': height,
'duration': duration,
'view_count': view_count,
}

View File

@@ -26,6 +26,21 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
if info.get('status') == 'NOK':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
allowed_countries = info['videos'][0].get('geoblocage')
if allowed_countries:
georestricted = True
geo_info = self._download_json(
'http://geo.francetv.fr/ws/edgescape.json', video_id,
'Downloading geo restriction info')
country = geo_info['reponse']['geo_info']['country_code']
if country not in allowed_countries:
raise ExtractorError(
'The video is not available from your location',
expected=True)
else:
georestricted = False
formats = []
for video in info['videos']:
@@ -36,6 +51,10 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
continue
format_id = video['format']
if video_url.endswith('.f4m'):
if georestricted:
# See https://github.com/rg3/youtube-dl/issues/3963
# m3u8 urls work fine
continue
video_url_parsed = compat_urllib_parse_urlparse(video_url)
f4m_url = self._download_webpage(
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,

View File

@@ -445,6 +445,30 @@ class GenericIE(InfoExtractor):
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
# LazyYT
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
'info_dict': {
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
},
'playlist_mincount': 2,
},
# Direct link with incorrect MIME type
{
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
'md5': '4ccbebe5f36706d85221f204d7eb5913',
'info_dict': {
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
'id': '5_Lennart_Poettering_-_Systemd',
'ext': 'webm',
'title': '5_Lennart_Poettering_-_Systemd',
'upload_date': '20141120',
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
]
}
]
def report_following_redirect(self, new_url):
@@ -537,9 +561,9 @@ class GenericIE(InfoExtractor):
if default_search in ('error', 'fixup_error'):
raise ExtractorError(
('%r is not a valid URL. '
'%r is not a valid URL. '
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
% (url, url), expected=True)
else:
if ':' not in default_search:
default_search += ':'
@@ -598,10 +622,28 @@ class GenericIE(InfoExtractor):
if not self._downloader.params.get('test', False) and not is_intentional:
self._downloader.report_warning('Falling back on generic information extractor.')
if full_response:
webpage = self._webpage_read_content(full_response, url, video_id)
else:
webpage = self._download_webpage(url, video_id)
if not full_response:
full_response = self._request_webpage(url, video_id)
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
first_bytes = full_response.read(512)
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
self._downloader.report_warning(
'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
'direct': True,
'url': url,
'upload_date': upload_date,
}
webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes)
self.report_extraction(video_id)
# Is it an RSS feed?
@@ -702,6 +744,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
# Look for lazyYT YouTube embed
matches = re.findall(
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
@@ -979,7 +1027,7 @@ class GenericIE(InfoExtractor):
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
@@ -1025,4 +1073,3 @@ class GenericIE(InfoExtractor):
'_type': 'playlist',
'entries': entries,
}

View File

@@ -1,9 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)

View File

@@ -63,8 +63,10 @@ class IGNIE(InfoExtractor):
'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': 'Giant skeletons, bloody hunts, and captivating'
' natural beauty take our breath away.',
'description': (
'Giant skeletons, bloody hunts, and captivating'
' natural beauty take our breath away.'
),
},
},
]

View File

@@ -58,9 +58,13 @@ class InternetVideoArchiveIE(InfoExtractor):
item = info.find('channel/item')
def _bp(p):
return xpath_with_ns(p,
{'media': 'http://search.yahoo.com/mrss/',
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
return xpath_with_ns(
p,
{
'media': 'http://search.yahoo.com/mrss/',
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
}
)
formats = []
for content in item.findall(_bp('media:group/media:content')):
attr = content.attrib

View File

@@ -45,4 +45,3 @@ class JadoreCettePubIE(InfoExtractor):
'title': title,
'description': description,
}

View File

@@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
'id': '1404461844',
'ext': 'mp4',
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
'description': 'A unique motocross documentary that examines the '
'life and mind of one of sports most elite athletes: Josh Grant.',
'description': (
'A unique motocross documentary that examines the '
'life and mind of one of sports most elite athletes: Josh Grant.'
),
},
}, {
'note': 'Embedded video (not using the native kickstarter video service)',

View File

@@ -30,4 +30,3 @@ class Ku6IE(InfoExtractor):
'title': title,
'url': downloadUrl
}

View File

@@ -75,4 +75,3 @@ class Laola1TvIE(InfoExtractor):
'categories': categories,
'ext': 'mp4',
}

View File

@@ -19,8 +19,7 @@ class LiveLeakIE(InfoExtractor):
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
}
},
{
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
@@ -30,8 +29,7 @@ class LiveLeakIE(InfoExtractor):
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
},
{
}, {
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {

View File

@@ -7,6 +7,7 @@ from ..utils import (
compat_urllib_parse,
)
class MalemotionIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_TEST = {

View File

@@ -145,7 +145,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
idoc = self._download_xml(
feed_url + '?' + data, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
def _real_extract(self, url):
title = url_basename(url)
@@ -244,7 +245,7 @@ class MTVIE(MTVServicesInfoExtractor):
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
webpage, re.DOTALL)
if m_vevo:
vevo_id = m_vevo.group(1);
vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')

View File

@@ -73,4 +73,3 @@ class MuenchenTVIE(InfoExtractor):
'is_live': True,
'thumbnail': thumbnail,
}

View File

@@ -4,7 +4,7 @@ import re
import json
from .common import InfoExtractor
from ..utils import (
from ..compat import (
compat_str,
)
@@ -52,8 +52,8 @@ class MySpaceIE(InfoExtractor):
if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable
def search_data(name):
return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
name)
return self._search_regex(
r'data-%s="(.*?)"' % name, webpage, name)
streamUrl = search_data('stream-url')
info = {
'id': video_id,
@@ -62,8 +62,8 @@ class MySpaceIE(InfoExtractor):
'thumbnail': self._og_search_thumbnail(webpage),
}
else:
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
u'context'))
context = json.loads(self._search_regex(
r'context = ({.*?});', webpage, 'context'))
video = context['video']
streamUrl = video['streamUrl']
info = {

View File

@@ -173,4 +173,3 @@ class MyVideoIE(InfoExtractor):
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@@ -39,7 +39,6 @@ class NBAIE(InfoExtractor):
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
return {
'id': shortened_video_id,
'url': video_url,

View File

@@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
"thumbnail": "re:^https?://",
},
'add_ie': ['Youtube']
},
{
}, {
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': {
'id': 'KklwM',

View File

@@ -97,4 +97,3 @@ class OoyalaIE(InfoExtractor):
}
else:
return self._extract_result(videos_info[0], videos_more_info)

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import int_or_none
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'

View File

@@ -41,4 +41,3 @@ class RingTVIE(InfoExtractor):
'thumbnail': thumbnail_url,
'description': description,
}

View File

@@ -28,9 +28,8 @@ class RtlXlIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
uuid = mobj.group('uuid')
# Use m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
info = self._download_json(
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/d=pc/fmt=adaptive/' % uuid,
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
uuid)
material = info['material'][0]
@@ -39,12 +38,13 @@ class RtlXlIE(InfoExtractor):
progname = info['abstracts'][0]['name']
subtitle = material['title'] or info['episodes'][0]['name']
videopath = material['videopath']
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
videopath = material['videopath'].replace('.f4m', '.m3u8')
m3u8_url = 'http://manifest.us.rtl.nl' + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
video_urlpart = videopath.split('/adaptive/')[1][:-4]
video_urlpart = videopath.split('/flash/')[1][:-5]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([

View File

@@ -54,7 +54,6 @@ def _decrypt_url(png):
return url
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'

View File

@@ -27,8 +27,7 @@ class SBSIE(InfoExtractor):
'thumbnail': 're:http://.*\.jpg',
},
'add_ies': ['generic'],
},
{
}, {
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
'only_matching': True,
}]

View File

@@ -67,5 +67,3 @@ class ServingSysIE(InfoExtractor):
'title': title,
'entries': entries,
}

View File

@@ -1,7 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import os.path
import re
import json
import hashlib
@@ -12,15 +11,15 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
url_basename,
int_or_none,
unified_strdate,
)
class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com'
IE_NAME = 'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri'
_TESTS = [
@@ -35,7 +34,6 @@ class SmotriIE(InfoExtractor):
'uploader': 'rbc2008',
'uploader_id': 'rbc08',
'upload_date': '20131118',
'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
@@ -50,7 +48,6 @@ class SmotriIE(InfoExtractor):
'uploader': 'Support Photofile@photofile',
'uploader_id': 'support-photofile',
'upload_date': '20070704',
'description': 'test, видео test',
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
@@ -66,7 +63,6 @@ class SmotriIE(InfoExtractor):
'uploader_id': 'timoxa40',
'upload_date': '20100404',
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
},
'params': {
'videopassword': 'qwerty',
@@ -85,7 +81,6 @@ class SmotriIE(InfoExtractor):
'upload_date': '20101001',
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
'age_limit': 18,
'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
},
'params': {
'videopassword': '333'
@@ -102,17 +97,11 @@ class SmotriIE(InfoExtractor):
'uploader': 'HannahL',
'uploader_id': 'lisaha95',
'upload_date': '20090331',
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
},
},
]
_SUCCESS = 0
_PASSWORD_NOT_VERIFIED = 1
_PASSWORD_DETECTED = 2
_VIDEO_NOT_FOUND = 3
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
@@ -137,44 +126,44 @@ class SmotriIE(InfoExtractor):
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
real_video_id = mobj.group('realvideoid')
video_id = self._match_id(url)
# Download video JSON data
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
video_json = json.loads(video_json_page)
video_form = {
'ticket': video_id,
'video_url': '1',
'frame_url': '1',
'devid': 'LoadupFlashPlayer',
'getvideoinfo': '1',
}
status = video_json['status']
if status == self._VIDEO_NOT_FOUND:
request = compat_urllib_request.Request(
'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
video = self._download_json(request, video_id, 'Downloading video JSON')
if video.get('_moderate_no') or not video.get('moderated'):
raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True)
if video.get('error'):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
# video-password set
video_password = self._downloader.params.get('videopassword', None)
if not video_password:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
video_json = json.loads(video_json_page)
status = video_json['status']
if status == self._PASSWORD_NOT_VERIFIED:
raise ExtractorError('Video password is invalid', expected=True)
if status != self._SUCCESS:
raise ExtractorError('Unexpected status value %s' % status)
# Extract the URL of the video
video_url = video_json['file_data']
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
title = video['title']
thumbnail = video['_imgURL']
upload_date = unified_strdate(video['added'])
uploader = video['userNick']
uploader_id = video['userLogin']
duration = int_or_none(video['duration'])
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', video_page,
r'<div class="videoUnModer">(.*?)</div>', webpage,
'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
@@ -182,84 +171,32 @@ class SmotriIE(InfoExtractor):
(video_id, warning))
# Adult content
if re.search('EroConfirmText">', video_page) is not None:
if re.search('EroConfirmText">', webpage) is not None:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
video_page, 'confirm string')
confirm_url = video_page_url + '&confirm=%s' % confirm_string
video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
webpage, 'confirm string')
confirm_url = webpage_url + '&confirm=%s' % confirm_string
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
# Extract the rest of meta data
video_title = self._search_meta('name', video_page, 'title')
if not video_title:
video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta('description', video_page)
END_TEXT = ' на сайте Smotri.com'
if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = 'Смотреть онлайн ролик '
if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta('thumbnail', video_page)
upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
if upload_date_str:
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
else:
video_upload_date = None
duration_str = self._search_meta('duration', video_page)
if duration_str:
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
else:
video_duration = None
video_uploader = self._html_search_regex(
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_uploader_id = self._html_search_regex(
'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
video_view_count = self._html_search_regex(
view_count = self._html_search_regex(
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'duration': video_duration,
'view_count': int_or_none(video_view_count),
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'uploader_id': uploader_id,
'duration': duration,
'view_count': int_or_none(view_count),
'age_limit': 18 if adult_content else 0,
'video_page_url': video_page_url
}

View File

@@ -93,4 +93,3 @@ class SportDeutschlandIE(InfoExtractor):
'rtmp_live': asset.get('live'),
'timestamp': parse_iso8601(asset.get('date')),
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -9,22 +11,21 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor):
IE_NAME = u'stanfordoc'
IE_DESC = u'Stanford Open ClassRoom'
_VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
IE_NAME = 'stanfordoc'
IE_DESC = 'Stanford Open ClassRoom'
_VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4',
u'md5': u'544a9468546059d4e80d76265b0443b8',
u'info_dict': {
u"title": u"Intro Environment"
'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
'md5': '544a9468546059d4e80d76265b0443b8',
'info_dict': {
'id': 'PracticalUnix_intro-environment',
'ext': 'mp4',
'title': 'Intro Environment',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
@@ -35,7 +36,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml'
mdoc = self._download_xml(xmlUrl, info['id'])
@@ -43,63 +43,49 @@ class StanfordOpenClassroomIE(InfoExtractor):
info['title'] = mdoc.findall('./title')[0].text
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
except IndexError:
raise ExtractorError(u'Invalid metadata XML file')
info['ext'] = info['url'].rpartition('.')[2]
return [info]
raise ExtractorError('Invalid metadata XML file')
return info
elif mobj.group('course'): # A course page
course = mobj.group('course')
info = {
'id': course,
'type': 'playlist',
'_type': 'playlist',
'uploader': None,
'upload_date': None,
}
coursepage = self._download_webpage(url, info['id'],
coursepage = self._download_webpage(
url, info['id'],
note='Downloading course info page',
errnote='Unable to download course info page')
info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
info['title'] = self._html_search_regex(
r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
info['description'] = self._html_search_regex('<description>([^<]+)</description>',
coursepage, u'description', fatal=False)
info['description'] = self._html_search_regex(
r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False)
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
info['list'] = [
{
'type': 'reference',
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
}
for vpage in links]
results = []
for entry in info['list']:
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
return info
else: # Root page
info = {
'id': 'Stanford OpenClassroom',
'type': 'playlist',
'_type': 'playlist',
'uploader': None,
'upload_date': None,
}
info['title'] = info['id']
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
rootpage = self._download_webpage(rootURL, info['id'],
errnote=u'Unable to download course info page')
info['title'] = info['id']
errnote='Unable to download course info page')
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
info['list'] = [
{
'type': 'reference',
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
}
for cpage in links]
results = []
for entry in info['list']:
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
return info

View File

@@ -50,7 +50,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
sub_lang_list = {}
for sub_lang in requested_langs:
if not sub_lang in available_subs_list:
if sub_lang not in available_subs_list:
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]

View File

@@ -1,27 +1,24 @@
# -*- coding: utf-8 -*-
import re
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import determine_ext
class SztvHuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
_VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
u'file': u'20130909.mp4',
u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
u'info_dict': {
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
'info_dict': {
'id': '20130909',
'ext': 'mp4',
'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
},
u'skip': u'Service temporarily disabled as of 2013-11-20'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_file = self._search_regex(
r'file: "...:(.*?)",', webpage, 'video file')
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
'id': video_id,
'url': video_url,
'title': title,
'ext': determine_ext(video_url),
'description': description,
'thumbnail': thumbnail,
}

View File

@@ -0,0 +1,62 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
js_to_json,
qualities,
)
class TassIE(InfoExtractor):
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://tass.ru/obschestvo/1586870',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': '1586870',
'ext': 'mp4',
'title': 'Посетителям московского зоопарка показали красную панду',
'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://itar-tass.com/obschestvo/1600009',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
sources = json.loads(js_to_json(self._search_regex(
r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
quality = qualities(['sd', 'hd'])
formats = []
for source in sources:
video_url = source.get('file')
if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
continue
label = source.get('label')
formats.append({
'url': video_url,
'format_id': label,
'quality': quality(label),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
}

View File

@@ -16,8 +16,7 @@ class TeamcocoIE(InfoExtractor):
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
},
{
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',

View File

@@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
'md5': '59439e568c9ee42fb77588b2096b214f',
'info_dict': {
'id': '11942',
'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus',
'ext': 'flv',
'title': 'Parlement : Francken et Galant répondent aux interpellations de lopposition',
'description': 're:Les auditions des ministres se poursuivent*'
},
'params': {
'skip_download': 'requires rtmpdump'
},
}, {
'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/',
'md5': '181d3fbdcf20b909309e5aef5c6c6047',
'info_dict': {
'id': '10091',
'display_id': 'basket-brussels-bat-mons-80-74',
'ext': 'flv',
'title': 'Basket : le Brussels bat Mons 80-74',
'description': 're:^Ils l\u2019on fait ! En basket, le B*',
},
'params': {
'skip_download': 'requires rtmpdump'
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
article_id = self._html_search_regex(
r"<article id=\"post-(\d+)\"", webpage, 'article ID')
title = self._html_search_regex(
r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
description = self._og_search_description(webpage)
rtmp_url = self._html_search_regex(
r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
webpage, 'RTMP url')
rtmp_url = rtmp_url.replace("\" + \"", "")
return {
'id': article_id,
'display_id': display_id,
'title': title,
'description': description,
'url': rtmp_url,
'ext': 'flv',
'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
}

View File

@@ -35,6 +35,7 @@ class ThePlatformIE(InfoExtractor):
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@@ -48,7 +49,6 @@ class ThePlatformIE(InfoExtractor):
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id)
try:
error_msg = next(

View File

@@ -36,9 +36,10 @@ class TlcDeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Breaking Amish: Die Welt da draußen',
'uploader': 'Discovery Networks - Germany',
'description': 'Vier Amische und eine Mennonitin wagen in New York'
'description': (
'Vier Amische und eine Mennonitin wagen in New York'
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
' ihrem spannenden Weg.',
' ihrem spannenden Weg.'),
},
}

View File

@@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
_TEST = {
'url': 'http://www.tmz.com/videos/0_okj015ty/',
'md5': '791204e3bf790b1426cb2db0706184c0',
'info_dict': {
'id': '0_okj015ty',
'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
'ext': 'mp4',
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'url': self._html_search_meta('VideoURL', webpage, fatal=True),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._html_search_meta('ThumbURL', webpage),
}

View File

@@ -63,4 +63,3 @@ class TriluliluIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
}

View File

@@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class TuneInIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:
tunein\.com/
(?:
radio/.*?-s|
station/.*?StationId\=
)(?P<id>[0-9]+)
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
)
'''
_INFO_DICT = {
'id': '34682',
'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
'ext': 'AAC',
'thumbnail': 're:^https?://.*\.png$',
'location': 'Tacoma, WA',
}
_TESTS = [
{
'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
{ # test redirection
'url': 'http://tun.in/ser7s',
'info_dict': _INFO_DICT,
'params': {
'skip_download': True, # live stream
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
redirect_id = mobj.group('redirect_id')
if redirect_id:
# The server doesn't support HEAD requests
urlh = self._request_webpage(
url, redirect_id, note='Downloading redirect page')
url = urlh.geturl()
self.to_screen('Following redirect: %s' % url)
mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id')
webpage = self._download_webpage(
url, station_id, note='Downloading station webpage')
payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title']
thumbnail = station_info.get('Logo')
location = station_info.get('Location')
streams_url = station_info.get('StreamUrl')
if not streams_url:
raise ExtractorError('No downloadable streams found',
expected=True)
stream_data = self._download_webpage(
streams_url, station_id, note='Downloading stream data')
streams = json.loads(self._search_regex(
r'\((.*)\);', stream_data, 'stream info'))['Streams']
is_live = None
formats = []
for stream in streams:
if stream.get('Type') == 'Live':
is_live = True
formats.append({
'abr': stream.get('Bandwidth'),
'ext': stream.get('MediaType'),
'acodec': stream.get('MediaType'),
'vcodec': 'none',
'url': stream.get('Url'),
# Sometimes streams with the highest quality do not exist
'preference': stream.get('Reliability'),
})
self._sort_formats(formats)
return {
'id': station_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'location': location,
'is_live': is_live,
}

View File

@@ -121,4 +121,7 @@ class VH1IE(MTVIE):
idoc = self._download_xml(
doc_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return [self._get_video_info(item) for item in idoc.findall('.//item')]
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_id=video_id,
)

View File

@@ -35,4 +35,3 @@ class ViceIE(InfoExtractor):
except ExtractorError:
raise ExtractorError('The page doesn\'t contain a video', expected=True)
return self.url_result(ooyala_url, ie='Ooyala')

View File

@@ -6,6 +6,7 @@ from ..utils import (
determine_ext,
)
class VideofyMeIE(InfoExtractor):
_VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me'

View File

@@ -30,4 +30,3 @@ class VidziIE(InfoExtractor):
'title': title,
'url': video_url,
}

View File

@@ -11,12 +11,13 @@ from ..utils import (
compat_urllib_parse,
compat_str,
unescapeHTML,
)
unified_strdate,
orderedSet)
class VKIE(InfoExtractor):
IE_NAME = 'vk.com'
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
_VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))'
_NETRC_MACHINE = 'vk'
_TESTS = [
@@ -29,17 +30,19 @@ class VKIE(InfoExtractor):
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:Noize MC.*',
'duration': 195,
'upload_date': '20120212',
},
},
{
'url': 'http://vk.com/video4643923_163339118',
'md5': 'f79bccb5cd182b1f43502ca5685b2b36',
'url': 'http://vk.com/video205387401_165548505',
'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': {
'id': '163339118',
'id': '165548505',
'ext': 'mp4',
'uploader': 'Elya Iskhakova',
'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
'duration': 558,
'uploader': 'Tom Cruise',
'title': 'No name',
'duration': 9,
'upload_date': '20130721'
}
},
{
@@ -52,9 +55,12 @@ class VKIE(InfoExtractor):
'uploader': 'Vladimir Gavrin',
'title': 'Lin Dan',
'duration': 101,
'upload_date': '20120730',
}
},
{
# VIDEO NOW REMOVED
# please update if you find a video whose URL follows the same pattern
'url': 'http://vk.com/video-8871596_164049491',
'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users',
@@ -64,18 +70,7 @@ class VKIE(InfoExtractor):
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'duration': 8352,
},
'skip': 'Requires vk account credentials',
},
{
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'md5': 'd82c22e449f036282d1d3f7f4d276869',
'info_dict': {
'id': '166094326',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': 'Запах женщины (1992)',
'duration': 9392,
'upload_date': '20121218'
},
'skip': 'Requires vk account credentials',
},
@@ -88,6 +83,7 @@ class VKIE(InfoExtractor):
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'duration': 7291,
'upload_date': '20140328',
},
'skip': 'Requires vk account credentials',
},
@@ -100,9 +96,15 @@ class VKIE(InfoExtractor):
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
'upload_date': '20140626',
},
'skip': 'Only works from Russia',
},
{
# removed video, just testing that we match the pattern
'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
'only_matching': True,
},
]
def _login(self):
@@ -141,10 +143,12 @@ class VKIE(InfoExtractor):
ERRORS = {
r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
'Video %s has been removed from public access due to rightholder complaint.',
r'<!>Please log in or <':
'Video %s is only available for registered users, '
'use --username and --password options to provide account credentials.',
'<!>Unknown error':
r'<!>Unknown error':
'Video %s does not exist.'
}
@@ -169,6 +173,13 @@ class VKIE(InfoExtractor):
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
data = json.loads(data_json)
# Extract upload date
upload_date = None
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
if mobj is not None:
mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
formats = [{
'format_id': k,
'url': v,
@@ -183,5 +194,28 @@ class VKIE(InfoExtractor):
'title': unescapeHTML(data['md_title']),
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
'duration': data.get('duration')
'duration': data.get('duration'),
'upload_date': upload_date,
}
class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk.com:user-videos'
IE_DESC = 'vk.com:All of a user\'s videos'
_VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
_TEMPLATE_URL = 'https://vk.com/videos'
_TEST = {
'url': 'http://vk.com/videos205387401',
'playlist_mincount': 4,
}
def _real_extract(self, url):
page_id = self._match_id(url)
page = self._download_webpage(url, page_id)
video_ids = orderedSet(
m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
url_entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, page_id)

View File

@@ -51,4 +51,3 @@ class WorldStarHipHopIE(InfoExtractor):
'title': video_title,
'thumbnail': thumbnail,
}

View File

@@ -47,4 +47,3 @@ class XBefIE(InfoExtractor):
'thumbnail': thumbnail,
'age_limit': 18,
}

View File

@@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
)
from ..utils import (
int_or_none,
parse_filesize,
)
class XMinusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?x-minus\.org/track/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://x-minus.org/track/4542/%D0%BF%D0%B5%D1%81%D0%B5%D0%BD%D0%BA%D0%B0-%D1%88%D0%BE%D1%84%D0%B5%D1%80%D0%B0.html',
'md5': '401a15f2d2dcf6d592cb95528d72a2a8',
'info_dict': {
'id': '4542',
'ext': 'mp3',
'title': 'Леонид Агутин-Песенка шофера',
'duration': 156,
'tbr': 320,
'filesize_approx': 5900000,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
artist = self._html_search_regex(
r'minus_track\.artist="(.+?)"', webpage, 'artist')
title = artist + '-' + self._html_search_regex(
r'minus_track\.title="(.+?)"', webpage, 'title')
duration = int_or_none(self._html_search_regex(
r'minus_track\.dur_sec=\'([0-9]*?)\'',
webpage, 'duration', fatal=False))
filesize_approx = parse_filesize(self._html_search_regex(
r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
webpage, 'approximate filesize', fatal=False))
tbr = int_or_none(self._html_search_regex(
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
webpage, 'bitrate', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<div class="quality.*?► ([0-9]+)',
webpage, 'view count', fatal=False))
enc_token = self._html_search_regex(
r'data-mt="(.*?)"', webpage, 'enc_token')
token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))
video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
return {
'id': video_id,
'title': title,
'url': video_url,
'duration': duration,
'filesize_approx': filesize_approx,
'tbr': tbr,
'view_count': view_count,
}

View File

@@ -33,6 +33,7 @@ from ..utils import (
uppercase_escape,
)
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -180,8 +181,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'next_url': '/',
'action_confirm': 'Confirm',
}
req = compat_urllib_request.Request(self._AGE_URL,
compat_urllib_parse.urlencode(age_form).encode('ascii'))
req = compat_urllib_request.Request(
self._AGE_URL,
compat_urllib_parse.urlencode(age_form).encode('ascii')
)
self._download_webpage(
req, None,
@@ -406,6 +409,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'format': '141',
},
},
# Controversy video
{
'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
'info_dict': {
'id': 'T4XJQO3qol8',
'ext': 'mp4',
'upload_date': '20100909',
'uploader': 'The Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
}
]
def __init__(self, *args, **kwargs):
@@ -638,6 +654,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
def _get_urls(_manifest):
lines = _manifest.split('\n')
urls = filter(lambda l: l and not l.startswith('#'),
@@ -666,7 +683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = self.extract_id(url)
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']
@@ -961,6 +978,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
dash_manifest_url = video_info.get('dashmpd')[0]
else:
dash_manifest_url = ytplayer_config['args']['dashmpd']
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
@@ -1020,6 +1038,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'formats': formats,
}
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?:
@@ -1320,8 +1339,10 @@ class YoutubeUserIE(InfoExtractor):
# Don't return True if the url can be extracted with other youtube
# extractor, the regex would is too permissive and it would match.
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
if any(ie.suitable(url) for ie in other_ies): return False
else: return super(YoutubeUserIE, cls).suitable(url)
if any(ie.suitable(url) for ie in other_ies):
return False
else:
return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
# Extract username
@@ -1544,29 +1565,33 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
paging = mobj.group('paging')
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = 'Youtube Watch Later'
_PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
_PERSONAL_FEED = True
_PLAYLIST_TITLE = 'Youtube Watch History'
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:favorites'
IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True

View File

@@ -0,0 +1,107 @@
# coding=utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class ZingMp3BaseInfoExtractor(InfoExtractor):
@staticmethod
def _extract_item(item):
title = item.find('./title').text.strip()
source = item.find('./source').text
extension = item.attrib['type']
thumbnail = item.find('./backimage').text
return {
'title': title,
'url': source,
'ext': extension,
'thumbnail': thumbnail,
}
def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
items = player_xml.findall('./item')
if len(items) == 1:
# one single song
data = self._extract_item(items[0])
data['id'] = id
return data
else:
# playlist of songs
entries = []
for i, item in enumerate(items, 1):
entry = self._extract_item(item)
entry['id'] = '%s-%d' % (id, i)
entries.append(entry)
return {
'_type': 'playlist',
'id': id,
'title': playlist_title,
'entries': entries,
}
class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
_VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
_TESTS = [{
'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
'md5': 'ead7ae13693b3205cbc89536a077daed',
'info_dict': {
'id': 'ZWZB9WAB',
'title': 'Xa Mãi Xa',
'ext': 'mp3',
'thumbnail': 're:^https?://.*\.jpg$',
},
}]
IE_NAME = 'zingmp3:song'
IE_DESC = 'mp3.zing.vn songs'
def _real_extract(self, url):
matched = re.match(self._VALID_URL, url)
slug = matched.group('slug')
song_id = matched.group('song_id')
webpage = self._download_webpage(
'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
player_xml_url = self._search_regex(
r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
return self._extract_player_xml(player_xml_url, song_id)
class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
_VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
_TESTS = [{
'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
'info_dict': {
'_type': 'playlist',
'id': 'ZWZBWDAF',
'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
},
'playlist_count': 10,
}]
IE_NAME = 'zingmp3:album'
IE_DESC = 'mp3.zing.vn albums'
def _real_extract(self, url):
matched = re.match(self._VALID_URL, url)
slug = matched.group('slug')
album_id = matched.group('album_id')
webpage = self._download_webpage(
'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
player_xml_url = self._search_regex(
r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
return self._extract_player_xml(
player_xml_url, album_id,
playlist_title=self._og_search_title(webpage))

View File

@@ -61,7 +61,7 @@ class JSInterpreter(object):
pass
m = re.match(
r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
expr)
if m:
variable = m.group('var')

View File

@@ -262,7 +262,8 @@ def parseOpts(overrideArguments=None):
video_format.add_option(
'-f', '--format',
action='store', dest='format', metavar='FORMAT', default=None,
help='video format code, specify the order of preference using'
help=(
'video format code, specify the order of preference using'
' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
' supported. You can also use the special names "best",'
' "bestvideo", "bestaudio", "worst", "worstvideo" and'
@@ -271,7 +272,7 @@ def parseOpts(overrideArguments=None):
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
' You can merge the video and audio of two formats into a single'
' file using -f <video-format>+<audio-format> (requires ffmpeg or'
' avconv), for example -f bestvideo+bestaudio.')
' avconv), for example -f bestvideo+bestaudio.'))
video_format.add_option(
'--all-formats',
action='store_const', dest='format', const='all',

View File

@@ -26,4 +26,3 @@ class ExecAfterDownloadPP(PostProcessor):
'Command returned error code %d' % retCode)
return None, information # by default, keep file and do nothing

View File

@@ -108,4 +108,3 @@ class XAttrMetadataPP(PostProcessor):
except (subprocess.CalledProcessError, OSError):
self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
return False, info

View File

@@ -827,4 +827,3 @@ class SWFInterpreter(object):
avm_class.method_pyfunctions[func_name] = resfunc
return resfunc

View File

@@ -13,13 +13,17 @@ from .utils import (
)
from .version import __version__
def rsa_verify(message, signature, key):
from struct import pack
from hashlib import sha256
from sys import version_info
def b(x):
if version_info[0] == 2: return x
else: return x.encode('latin1')
if version_info[0] == 2:
return x
else:
return x.encode('latin1')
assert(type(message) == type(b('')))
block_size = 0
n = key[0]
@@ -32,13 +36,17 @@ def rsa_verify(message, signature, key):
raw_bytes.insert(0, pack("B", signature & 0xFF))
signature >>= 8
signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
if signature[0:2] != b('\x00\x01'): return False
if signature[0:2] != b('\x00\x01'):
return False
signature = signature[2:]
if not b('\x00') in signature: return False
if not b('\x00') in signature:
return False
signature = signature[signature.index(b('\x00')) + 1:]
if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')):
return False
signature = signature[19:]
if signature != sha256(message).digest(): return False
if signature != sha256(message).digest():
return False
return True
@@ -58,7 +66,8 @@ def update_self(to_screen, verbose):
try:
newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
except:
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: can\'t find the current version. Please try again later.')
return
if newversion == __version__:
@@ -70,7 +79,8 @@ def update_self(to_screen, verbose):
versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
except:
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: can\'t obtain versions info. Please try again later.')
return
if not 'signature' in versions_info:
@@ -118,7 +128,8 @@ def update_self(to_screen, verbose):
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@@ -131,7 +142,8 @@ def update_self(to_screen, verbose):
with open(exe + '.new', 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to write the new version')
return
@@ -150,7 +162,8 @@ start /b "" cmd /c del "%%~f0"&exit /b"
subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return
@@ -161,7 +174,8 @@ start /b "" cmd /c del "%%~f0"&exit /b"
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@@ -174,12 +188,14 @@ start /b "" cmd /c del "%%~f0"&exit /b"
with open(filename, 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return
to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
def get_notes(versions, fromVersion):
notes = []
for v, vdata in sorted(versions.items()):
@@ -187,6 +203,7 @@ def get_notes(versions, fromVersion):
notes.extend(vdata.get('notes', []))
return notes
def print_notes(to_screen, versions, fromVersion=__version__):
notes = get_notes(versions, fromVersion)
if notes:

View File

@@ -41,6 +41,7 @@ from .compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urlparse,
shlex_quote,
)
@@ -55,6 +56,7 @@ std_headers = {
'Accept-Language': 'en-us,en;q=0.5',
}
def preferredencoding():
"""Get preferred encoding.
@@ -145,6 +147,8 @@ else:
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
def xpath_with_ns(path, ns_map):
components = [c.split(':') for c in path.split('/')]
replaced = []
@@ -255,6 +259,7 @@ def timeconvert(timestr):
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
@@ -287,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
result = '_'
return result
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
@@ -371,6 +377,7 @@ def decodeOption(optval):
assert isinstance(optval, compat_str)
return optval
def formatSeconds(secs):
if secs > 3600:
return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -423,6 +430,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -467,6 +475,7 @@ class DownloadError(Exception):
configured to continue on errors. They will contain the appropriate
error message.
"""
def __init__(self, msg, exc_info=None):
""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
super(DownloadError, self).__init__(msg)
@@ -488,9 +497,11 @@ class PostProcessingError(Exception):
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
def __init__(self, msg):
self.msg = msg
class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """
pass
@@ -520,6 +531,7 @@ class ContentTooShortError(Exception):
self.downloaded = downloaded
self.expected = expected
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""Handler for HTTP requests and responses.
@@ -680,6 +692,7 @@ def unified_strdate(date_str):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date
def determine_ext(url, default_ext='unknown_video'):
if url is None:
return default_ext
@@ -689,9 +702,11 @@ def determine_ext(url, default_ext='unknown_video'):
else:
return default_ext
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or
@@ -718,6 +733,7 @@ def date_from_str(date_str):
return today + delta
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
def hyphenate_date(date_str):
"""
Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -727,8 +743,10 @@ def hyphenate_date(date_str):
else:
return date_str
class DateRange(object):
"""Represents a time interval between two dates"""
def __init__(self, start=None, end=None):
"""start and end must be strings in the format accepted by date"""
if start is not None:
@@ -741,15 +759,18 @@ class DateRange(object):
self.end = datetime.datetime.max.date()
if self.start > self.end:
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
@classmethod
def day(cls, day):
"""Returns a range that only contains the given day"""
return cls(day, day)
def __contains__(self, date):
"""Check if the date is in the range"""
if not isinstance(date, datetime.date):
date = date_from_str(date)
return self.start <= date <= self.end
def __str__(self):
return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
@@ -1025,6 +1046,57 @@ def format_bytes(bytes):
return '%.2f%s' % (converted, suffix)
def parse_filesize(s):
if s is None:
return None
# The lower-case forms are of course incorrect and inofficial,
# but we support those too
_UNIT_TABLE = {
'B': 1,
'b': 1,
'KiB': 1024,
'KB': 1000,
'kB': 1024,
'Kb': 1000,
'MiB': 1024 ** 2,
'MB': 1000 ** 2,
'mB': 1024 ** 2,
'Mb': 1000 ** 2,
'GiB': 1024 ** 3,
'GB': 1000 ** 3,
'gB': 1024 ** 3,
'Gb': 1000 ** 3,
'TiB': 1024 ** 4,
'TB': 1000 ** 4,
'tB': 1024 ** 4,
'Tb': 1000 ** 4,
'PiB': 1024 ** 5,
'PB': 1000 ** 5,
'pB': 1024 ** 5,
'Pb': 1000 ** 5,
'EiB': 1024 ** 6,
'EB': 1000 ** 6,
'eB': 1024 ** 6,
'Eb': 1000 ** 6,
'ZiB': 1024 ** 7,
'ZB': 1000 ** 7,
'zB': 1024 ** 7,
'Zb': 1000 ** 7,
'YiB': 1024 ** 8,
'YB': 1000 ** 8,
'yB': 1024 ** 8,
'Yb': 1000 ** 8,
}
units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
if not m:
return None
return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
def get_term_width():
columns = compat_getenv('COLUMNS', None)
if columns:
@@ -1433,3 +1505,8 @@ def ytdl_is_updateable():
from zipimport import zipimporter
return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.11.20.1'
__version__ = '2014.11.26'