Compare commits

...

67 Commits

Author SHA1 Message Date
4be0aa3539 release 2012.02.22 2013-02-22 16:41:36 +01:00
f636c34481 Stop early in nosetests (in release script) 2013-02-22 16:40:19 +01:00
3bf79c752e Print *all* release notes 2013-02-22 00:36:23 +01:00
8271226a55 Fix --match-title and --reject-title decoding (Closes #690) 2013-02-21 17:09:39 +01:00
1013186a17 Also check for JSLoader of JWSPlayer (thanks to @maximeg, Closes #685) 2013-02-21 16:56:48 +01:00
7c038b3c32 Import HTTPErrorProcessor from the correct module (Closes #696) 2013-02-21 16:49:05 +01:00
c8cd8e5f55 release 2013.02.19 2013-02-19 00:06:04 +01:00
471cf47796 include bash completion and manpage in PyPi dist 2013-02-18 23:56:13 +01:00
d8f64574a4 release 2013.02.18 2013-02-18 23:37:20 +01:00
e711babbd1 Fix YP IE 2013-02-18 23:30:33 +01:00
a72b0f2b6f Use proper echo commands 2013-02-18 23:22:01 +01:00
434eb6f26b Include man and bash completion in PyPi release 2013-02-18 23:19:57 +01:00
197080b10b Merge remote-tracking branch 'jaimeMF/TED' 2013-02-18 23:12:56 +01:00
7796e8c2cb facebook: also download lq videos 2013-02-18 23:12:48 +01:00
6d4363368a Fix MyVideo IE 2013-02-18 22:32:56 +01:00
414638cd50 TED: Add support for playlists 2013-02-18 21:42:06 +01:00
2a9983b78f Fix 8tracks 2013-02-18 19:11:32 +01:00
b17c974a88 Mark DailyMotion as broken for now (#680) 2013-02-18 18:53:40 +01:00
5717d91ab7 Correct --newline and give it a more meaningful title 2013-02-18 18:52:06 +01:00
79eb0287ab Merge remote-tracking branch 'glisignoli/master' 2013-02-18 18:47:35 +01:00
58994225bc Add tests to MySpass 2013-02-18 18:45:09 +01:00
59d4c2fe1b fix some titles in TED 2013-02-17 17:25:02 +01:00
3a468f2d8b Basic support for TED 2013-02-17 17:13:06 +01:00
1ad5d872b9 added new InfoExtractor for myspass.de 2013-02-16 13:46:13 +01:00
355fc8e944 Update README.md 2013-02-15 15:57:40 +13:00
380a29dbf7 Update youtube_dl/__init__.py 2013-02-15 15:55:11 +13:00
1528d6642d Forgot to remove \r 2013-02-13 16:43:08 +13:00
7311fef854 Modified youtube-dl to write new lines with the --newline switch. This
enables easier process monitoring when being called with external
scripts.
2013-02-13 14:02:31 +13:00
906417c7c5 Fix delayed title display in --console-title
With Python 3, the titlebar wouldn't get updated for a long time (due to
stderr buffering), and when it did, the title would be shown as b'...'
representation.
2013-02-09 22:58:12 +02:00
6aabe82035 Credit Osama Khalid for Keek support 2013-02-08 11:01:09 +01:00
f0877a445e Add tests for keek 2013-02-08 11:00:28 +01:00
da06e2daf8 Add KeekIE() 2013-02-08 10:25:55 +03:00
d3f5f9f6b9 Fix login (Closes #658) 2013-02-06 21:22:53 +01:00
bfc6ea7935 Ignore PyPi metadata 2013-02-05 13:42:52 +01:00
8edc2cf8ca Support direct vimeo links (Closes #666) 2013-02-05 13:42:08 +01:00
fb778e66df Fix encoding in youtube subtitle download (Closes #669) 2013-02-05 13:30:02 +01:00
3a9918d37f Escapist continues to be flaky on travis 2013-02-02 14:53:34 +01:00
ccb0cae134 Fix automatic release (oops) 2013-02-02 14:52:38 +01:00
085c8b75a6 release 2013.02.02 2013-02-02 14:45:38 +01:00
dbf2ba3d61 Better help for new options 2013-02-02 14:44:22 +01:00
b47bbac393 Disable Stanford OC test for now, and enable escapist 2013-02-02 14:40:41 +01:00
229cac754a Improve cookie error handling 2013-02-02 13:51:54 +01:00
0e33684194 Switch to m4a by default (Closes #240) 2013-02-01 18:23:20 +01:00
9e982f9e4e Added "min-filesize" and "max-filesize" options 2013-02-01 18:09:34 +01:00
c7a725cfad Merge remote-tracking branch 'dcoppa/master' 2013-02-01 18:05:42 +01:00
450a30cae8 Add PyPi upload to release script 2013-02-01 18:01:53 +01:00
9cd5e4fce8 release 2013.02.01 2013-02-01 17:57:32 +01:00
edba5137b8 Fix Facebook IE 2013-02-01 17:56:22 +01:00
233a22960a Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) 2013-02-01 17:46:03 +01:00
3b024e17af Work around buggy HTML Parser in Python < 2.7.3 (Closes #662) 2013-02-01 17:29:50 +01:00
a32b573ccb Try setuptools first, then fallback to distutils.core 2013-01-30 15:31:38 +01:00
ec71c13ab8 release 2013.01.28 2013-01-27 18:33:58 +01:00
f0bad2b026 Fix Stanford (Closes #653) 2013-01-27 15:23:26 +01:00
25580f3251 8tracks: Ignore hashes 2013-01-27 04:15:12 +01:00
da4de959df 8tracks: Better default titles 2013-01-27 04:05:53 +01:00
d0d51a8afa 8tracks: Include performer as uploader 2013-01-27 03:27:46 +01:00
c67598c3e1 Remove space before shebang 2013-01-27 03:07:07 +01:00
811d253bc2 Merge remote-tracking branch 'jaimeMF/makefilePythonversion' 2013-01-27 03:06:32 +01:00
c3a1642ead release 2013.01.27 2013-01-27 03:03:02 +01:00
ccf65f9dee 8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00
b954070d70 Fix Facebook (Closes #375) 2013-01-25 16:54:48 +01:00
30e9f4496b Drop md5: spec for now (unused and breaks int values) 2013-01-25 16:54:25 +01:00
271d3fbdaa Option in makefile to select python interpreter 2013-01-25 15:11:03 +01:00
6df40dcbe0 Guard against sys.getfilesystemencoding() == None (#503) 2013-01-20 01:48:05 +01:00
97f194c1fb twitch.tv: Use id as title if no title is present (Closes #638) 2013-01-16 09:55:45 +01:00
4da769ccca Do not backup version.py (under version control and frankly, not that complex) 2013-01-12 23:04:46 +01:00
253d96f2e2 Force build removal 2013-01-12 22:25:54 +01:00
16 changed files with 540 additions and 246 deletions

1
.gitignore vendored
View File

@ -17,3 +17,4 @@ youtube-dl.tar.gz
.coverage
cover/
updates_key.pem
*.egg-info

View File

@ -1,3 +1,5 @@
include README.md
include test/*.py
include test/*.json
include youtube-dl.bash-completion
include youtube-dl.1

View File

@ -1,12 +1,16 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
clean:
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
cleanall: clean
rm -f youtube-dl youtube-dl.exe
PREFIX=/usr/local
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
PYTHON=/usr/bin/env python
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR)
@ -22,12 +26,14 @@ test:
tar: youtube-dl.tar.gz
.PHONY: all clean install test tar
.PHONY: all clean install test tar bash-completion pypi-files
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
echo '#!/usr/bin/env python' > youtube-dl
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip
chmod a+x youtube-dl
@ -44,6 +50,8 @@ youtube-dl.1: README.md
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py
bash-completion: youtube-dl.bash-completion
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \

View File

@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like.
--reject-title REGEX skip download for matching titles (regex or
caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g.
50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
50k or 44.6m)
## Filesystem Options:
-t, --title use title in file name
@ -81,6 +85,7 @@ which means you can modify it, redistribute it or use it however you like.
--get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information

View File

@ -20,19 +20,19 @@ if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already pre
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
echo "\n### First of all, testing..."
make clean
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
/bin/echo -e "\n### First of all, testing..."
make cleanall
nosetests --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1
echo "\n### Changing version in version.py..."
sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
/bin/echo -e "\n### Changing version in version.py..."
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
make README.md
git add CHANGELOG README.md youtube_dl/version.py
git commit -m "release $version"
echo "\n### Now tagging, signing and pushing..."
/bin/echo -e "\n### Now tagging, signing and pushing..."
git tag -s -m "Release $version" "$version"
git show "$version"
read -p "Is it good, can I push? (y/n) " -n 1
@ -42,7 +42,7 @@ MASTER=$(git rev-parse --abbrev-ref HEAD)
git push origin $MASTER:master
git push origin "$version"
echo "\n### OK, now it is time to build the binaries..."
/bin/echo -e "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
@ -57,11 +57,11 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
echo "\n### Now switching to gh-pages..."
/bin/echo -e "\n### Now switching to gh-pages..."
git clone --branch gh-pages --single-branch . build/gh-pages
ROOT=$(pwd)
(
@ -81,6 +81,11 @@ ROOT=$(pwd)
git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)
rm -r build
rm -rf build
echo "\n### DONE!"
make pypi-files
echo "Uploading to PyPi ..."
python setup.py sdist upload
make clean
/bin/echo -e "\n### DONE!"

View File

@ -2,10 +2,14 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from distutils.core import setup
import pkg_resources
import sys
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
try:
import py2exe
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""

View File

@ -98,7 +98,7 @@ def generator(test_case):
for tc in test_cases:
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']))
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
if 'md5' in tc:
@ -107,10 +107,6 @@ def generator(test_case):
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, value) in tc.get('info_dict', {}).items():
if value.startswith('md5:'):
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
self.assertEqual(value[3:], md5_info_value)
else:
self.assertEqual(value, info_dict.get(info_field))
finally:
for tc in test_cases:

View File

@ -76,7 +76,8 @@
"name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
"file": "PracticalUnix_intro-environment.mp4"
"file": "PracticalUnix_intro-environment.mp4",
"skip": "Currently offline"
},
{
"name": "XNXX",
@ -181,38 +182,13 @@
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart",
"playlist": [
{
"file": "422204.mp4",
"md5": "7a7abe068b31ff03e7b8a37596e72380",
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
"title": "thedailyshow-kristen-stewart part 1"
}
},
{
"file": "422205.mp4",
"md5": "30552b7274c94dbb933f64600eadddd2",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
}
},
{
"file": "422206.mp4",
"md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
}
},
{
"file": "422207.mp4",
"md5": "f61ee8a4e6bd1308438e03badad78554",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
}
}
]
},
{
"name": "RBMARadio",
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
@ -225,5 +201,108 @@
"uploader_id": "ford-lopatin",
"location": "Spain"
}
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
}
}
]
},
{
"name": "Keek",
"url": "http://www.keek.com/ytdl/keeks/NODfbab",
"file": "NODfbab.mp4",
"md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
"info_dict": {
"title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
}
},
{
"name": "TED",
"url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
"file": "102.mp4",
"md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
"info_dict": {
"title": "Dan Dennett: The illusion of consciousness"
}
},
{
"name": "MySpass",
"url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/",
"file": "11741.mp4",
"md5": "0b49f4844a068f8b33f4b7c88405862b",
"info_dict": {
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
}
]

Binary file not shown.

View File

@ -82,6 +82,8 @@ class FileDownloader(object):
subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
@ -206,7 +208,7 @@ class FileDownloader(object):
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def fixed_template(self):
"""Checks if the output template is fixed."""
@ -303,6 +305,10 @@ class FileDownloader(object):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
else:
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
@ -364,12 +370,10 @@ class FileDownloader(object):
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
matchtitle = matchtitle.decode('utf8')
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
rejecttitle = rejecttitle.decode('utf8')
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
@ -712,6 +716,15 @@ class FileDownloader(object):
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)

View File

@ -5,6 +5,7 @@ from __future__ import absolute_import
import base64
import datetime
import itertools
import netrc
import os
import re
@ -150,7 +151,7 @@ class YoutubeIE(InfoExtractor):
(?(1).+)? # if we found the ID, everything can follow
$"""
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
@ -263,13 +264,18 @@ class YoutubeIE(InfoExtractor):
srt_lang = list(srt_lang_list.keys())[0]
if not srt_lang in srt_lang_list:
return (u'WARNING: no closed captions found in the specified language', None)
request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
params = compat_urllib_parse.urlencode({
'lang': srt_lang,
'name': srt_lang_list[srt_lang].encode('utf-8'),
'v': video_id,
})
url = 'http://www.youtube.com/api/timedtext?' + params
try:
srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
if not srt_xml:
return (u'WARNING: unable to download video subtitles', None)
return (u'WARNING: Did not fetch video subtitles', None)
return (None, self._closed_captions_xml_to_srt(srt_xml))
def _print_formats(self, formats):
@ -314,19 +320,54 @@ class YoutubeIE(InfoExtractor):
if username is None:
return
request = compat_urllib_request.Request(self._LOGIN_URL)
try:
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err))
return
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
# Log in
login_form = {
'current_form': 'loginForm',
'next': '/',
'action_login': 'Log In',
'username': username,
'password': password,
login_form_strs = {
u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
u'Email': username,
u'GALX': galx,
u'Passwd': password,
u'PersistentCookie': u'yes',
u'_utf8': u'',
u'bgresponse': u'js_disabled',
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
u'signIn': u'Sign in',
u'timeStmp': u'',
u'service': u'youtube',
u'uilel': u'3',
u'hl': u'en_US',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
try:
self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -677,6 +718,7 @@ class DailymotionIE(InfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
IE_NAME = u'dailymotion'
_WORKING = False
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
@ -972,7 +1014,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@ -993,7 +1035,11 @@ class VimeoIE(InfoExtractor):
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
video_id = mobj.group(1)
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
if mobj.group('direct_link'):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, std_headers)
@ -1284,7 +1330,7 @@ class GenericIE(InfoExtractor):
opener = compat_urllib_request.OpenerDirector()
for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
HTTPMethodFallback, HEADRedirectHandler,
compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
response = opener.open(HeadRequest(url))
@ -1320,6 +1366,9 @@ class GenericIE(InfoExtractor):
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
@ -1980,62 +2029,14 @@ class DepositFilesIE(InfoExtractor):
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook'
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
'highqual': 'mp4',
'lowqual': 'mp4',
}
IE_NAME = u'facebook'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def _reporter(self, message):
"""Add header and report message."""
self._downloader.to_screen(u'[facebook] %s' % message)
def report_login(self):
"""Report attempt to log in."""
self._reporter(u'Logging in')
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self._reporter(u'%s: Downloading video webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._reporter(u'%s: Extracting video information' % video_id)
def _parse_page(self, video_webpage):
"""Extract video information from page"""
# General data
data = {'title': r'\("video_title", "(.*?)"\)',
'description': r'<div class="datawrap">(.*?)</div>',
'owner': r'\("video_owner_name", "(.*?)"\)',
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
}
video_info = {}
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
for fmt in self._available_formats:
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
def _real_initialize(self):
if self._downloader is None:
@ -2088,100 +2089,39 @@ class FacebookIE(InfoExtractor):
return
video_id = mobj.group('ID')
# Get video webpage
self.report_video_webpage_download(video_id)
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
webpage = self._download_webpage(url, video_id)
# Start extracting information
self.report_information_extraction(video_id)
BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
if not video_url:
video_url = params['sd_src']
if not video_url:
raise ExtractorError(u'Cannot find video URL')
video_duration = int(params['video_duration'])
# Extract information
video_info = self._parse_page(video_webpage)
m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
if not m:
raise ExtractorError(u'Cannot find title in webpage')
video_title = unescapeHTML(m.group(1))
# uploader
if 'owner' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = video_info['owner']
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = video_info['title']
video_title = video_title.decode('utf-8')
# thumbnail image
if 'thumbnail' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else:
video_thumbnail = video_info['thumbnail']
# upload date
upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
if timetuple is not None:
try:
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
except:
pass
# description
video_description = video_info.get('description', 'No description available.')
url_map = video_info['video_urls']
if url_map:
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
format_list = self._available_formats[self._available_formats.index(format_limit):]
else:
format_list = self._available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
self._downloader.trouble(u'ERROR: no known formats available for video')
return
if req_format is None:
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific format
if req_format not in url_map:
self._downloader.trouble(u'ERROR: requested format not available')
return
video_url_list = [(req_format, url_map[req_format])] # Specific format
results = []
for format_param, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'mp4')
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'upload_date': upload_date,
info = {
'id': video_id,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description.decode('utf-8'),
})
return results
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
'thumbnail': params['thumbnail_src'],
}
return [info]
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
@ -2300,7 +2240,7 @@ class MyVideoIE(InfoExtractor):
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\' />',
webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
@ -2983,8 +2923,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
raise ExtractorError(u'Invalid URL: %s' % url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
@ -3021,12 +2960,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
self.report_download_webpage(info['id'])
try:
coursepage = compat_urllib_request.urlopen(url).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
coursepage = self._download_webpage(url, info['id'],
note='Downloading course info page',
errnote='Unable to download course info page')
m = re.search('<h1>([^<]+)</h1>', coursepage)
if m:
@ -3050,7 +2986,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
else: # Root page
info = {
'id': 'Stanford OpenClassroom',
@ -3541,10 +3476,12 @@ class JustinTVIE(InfoExtractor):
video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': clip['id'],
'id': video_id,
'url': video_url,
'title': clip['title'],
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date,
@ -3795,13 +3732,13 @@ class YouPornIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
# Get the video title
result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
raise ExtractorError(u'Unable to extract video title')
video_title = result.group('title').strip()
# Get the video date
result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date')
upload_date = None
@ -3809,9 +3746,9 @@ class YouPornIE(InfoExtractor):
upload_date = result.group('date').strip()
# Get the video uploader
result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'ERROR: unable to extract uploader')
self._downloader.to_stderr(u'WARNING: unable to extract uploader')
video_uploader = None
else:
video_uploader = result.group('uploader').strip()
@ -3925,8 +3862,6 @@ class PornotubeIE(InfoExtractor):
return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
@ -3973,6 +3908,201 @@ class YouJizzIE(InfoExtractor):
return [info]
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
if not m:
raise ExtractorError(u'Cannot find trax information')
json_like = m.group(1)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
api_data = json.loads(api_json)
track_data = api_data[u'set']['track']
info = {
'id': track_data['id'],
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res
class KeekIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
IE_NAME = u'keek'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
webpage = self._download_webpage(url, video_id)
m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
title = unescapeHTML(m.group('title'))
m = re.search(r'<div class="bio-names-and-report">[\s\n]+<h4>(?P<uploader>\w+)</h4>', webpage)
uploader = unescapeHTML(m.group('uploader'))
info = {
'id':video_id,
'url':video_url,
'ext': 'mp4',
'title': title,
'thumbnail': thumbnail,
'uploader': uploader
}
return [info]
class TEDIE(InfoExtractor):
_VALID_URL=r'''http://www.ted.com/
(
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
((?P<type_talk>talks)) # We have a simple talk
)
/(?P<name>\w+) # Here goes the name and then ".html"
'''
def suitable(self, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(self._VALID_URL, url, re.VERBOSE) is not None
def _real_extract(self, url):
m=re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type_talk'):
return [self._talk_info(url)]
else :
playlist_id=m.group('playlist_id')
name=m.group('name')
self._downloader.to_screen(u'[%s] Getting info of playlist %s: "%s"' % (self.IE_NAME,playlist_id,name))
return self._playlist_videos_info(url,name,playlist_id)
def _talk_video_link(self,mediaSlug):
'''Returns the video link for that mediaSlug'''
return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
def _playlist_videos_info(self,url,name,playlist_id=0):
'''Returns the videos of the playlist'''
video_RE=r'''
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
([.\s]*?)data-playlist_item_id="(\d+)"
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
'''
video_name_RE=r'<p\ class="talk-title"><a href="/talks/(.+).html">(?P<fullname>.+?)</a></p>'
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
info=[]
for m_video, m_name in zip(m_videos,m_names):
video_dic={
'id': m_video.group('video_id'),
'url': self._talk_video_link(m_video.group('mediaSlug')),
'ext': 'mp4',
'title': m_name.group('fullname')
}
info.append(video_dic)
return info
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
m=re.match(self._VALID_URL, url,re.VERBOSE)
videoName=m.group('name')
webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
# If the url includes the language we get the title translated
title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-\\\']*)</span></h1>'
title=re.search(title_RE, webpage).group('title')
info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
"id":(?P<videoID>[\d]+).*?
"mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
info_match=re.search(info_RE,webpage,re.VERBOSE)
video_id=info_match.group('videoID')
mediaSlug=info_match.group('mediaSlug')
video_url=self._talk_video_link(mediaSlug)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title
}
return info
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www.myspass.de/.*'
def _real_extract(self, url):
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
# video id is the last path element of the URL
# usually there is a trailing slash, so also try the second but last
url_path = compat_urllib_parse_urlparse(url).path
url_parent_path, video_id = os.path.split(url_path)
if not video_id:
_, video_id = os.path.split(url_parent_path)
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
# extract values from metadata
url_flv_el = metadata.find('url_flv')
if url_flv_el is None:
self._downloader.trouble(u'ERROR: unable to extract download url')
return
video_url = url_flv_el.text
extension = os.path.splitext(video_url)[1][1:]
title_el = metadata.find('title')
if title_el is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
format = ext
else:
format = format_id_el.text
description_el = metadata.find('description')
if description_el is not None:
description = description_el.text
else:
description = None
imagePreview_el = metadata.find('imagePreview')
if imagePreview_el is not None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
info = {
'id': video_id,
'url': video_url,
'title': title,
'ext': extension,
'format': format,
'thumbnail': thumbnail,
'description': description
}
return [info]
def gen_extractors():
""" Return a list of an instance of every supported extractor.
@ -4019,6 +4149,10 @@ def gen_extractors():
SteamIE(),
UstreamIE(),
RBMARadioIE(),
EightTracksIE(),
KeekIE(),
TEDIE(),
MySpassIE(),
GenericIE()
]

View File

@ -143,10 +143,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if self._preferredcodec == 'm4a' and filecodec == 'aac':
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = self._preferredcodec
extension = 'm4a'
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible

View File

@ -23,6 +23,7 @@ __authors__ = (
'Dave Vasilevsky',
'Jaime Marquínez Ferrándiz',
'Jeff Crouse',
'Osama Khalid',
)
__license__ = 'Public Domain'
@ -150,6 +151,9 @@ def parseOpts():
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username')
@ -198,6 +202,8 @@ def parseOpts():
verbosity.add_option('--get-format',
action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False)
verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress',
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
verbosity.add_option('--console-title',
@ -206,7 +212,6 @@ def parseOpts():
verbosity.add_option('-v', '--verbose',
action='store_true', dest='verbose', help='print various debugging information', default=False)
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
filesystem.add_option('--id',
@ -286,10 +291,13 @@ def _real_main():
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
sys.exit(u'ERROR: unable to open cookie file')
if opts.verbose:
traceback.print_exc()
sys.stderr.write(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@ -349,6 +357,16 @@ def _real_main():
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
parser.error(u'invalid min_filesize specified')
opts.min_filesize = numeric_limit
if opts.max_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
if numeric_limit is None:
parser.error(u'invalid max_filesize specified')
opts.max_filesize = numeric_limit
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
@ -394,6 +412,7 @@ def _real_main():
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s')
# File downloader
fd = FileDownloader({
'usenetrc': opts.usenetrc,
@ -421,6 +440,7 @@ def _real_main():
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'logtostderr': opts.outtmpl == '-',
@ -431,13 +451,15 @@ def _real_main():
'writeinfojson': opts.writeinfojson,
'writesubtitles': opts.writesubtitles,
'subtitleslang': opts.subtitleslang,
'matchtitle': opts.matchtitle,
'rejecttitle': opts.rejecttitle,
'matchtitle': decodeOption(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle),
'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize
})
if opts.verbose:

View File

@ -77,10 +77,8 @@ def update_self(to_screen, verbose, filename):
to_screen(u'Updating to version ' + versions_info['latest'] + '...')
version = versions_info['versions'][versions_info['latest']]
if version.get('notes'):
to_screen(u'PLEASE NOTE:')
for note in version['notes']:
to_screen(note)
print_notes(version_info['versions'])
if not os.access(filename, os.W_OK):
to_screen(u'ERROR: no write permissions on %s' % filename)
@ -158,3 +156,13 @@ del "%s"
return
to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
def print_notes(versions, fromVersion=__version__):
notes = []
for v,vdata in sorted(versions.items()):
if v > fromVersion:
notes.extend(vdata.get('notes', []))
if notes:
to_screen(u'PLEASE NOTE:')
for note in notes:
to_screen(note)

View File

@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip()
# Hack for https://github.com/rg3/youtube-dl/issues/662
if sys.version_info < (2, 7, 3):
AttrParser.parse_endtag = (lambda self, i:
i + len("</scr'+'ipt>")
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
@ -409,8 +415,19 @@ def encodeFilename(s):
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s
else:
return s.encode(sys.getfilesystemencoding(), 'ignore')
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
def decodeOption(optval):
if optval is None:
return optval
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
assert isinstance(optval, compat_str)
return optval
class ExtractorError(Exception):
"""Error during info extraction."""

View File

@ -1,2 +1,2 @@
__version__ = '2013.01.13'
__version__ = '2012.02.22'