Compare commits
94 Commits
2013.01.02
...
2013.02.01
Author | SHA1 | Date | |
---|---|---|---|
|
9cd5e4fce8 | ||
|
edba5137b8 | ||
|
233a22960a | ||
|
3b024e17af | ||
|
ec71c13ab8 | ||
|
f0bad2b026 | ||
|
25580f3251 | ||
|
da4de959df | ||
|
d0d51a8afa | ||
|
c67598c3e1 | ||
|
811d253bc2 | ||
|
c3a1642ead | ||
|
ccf65f9dee | ||
|
b954070d70 | ||
|
30e9f4496b | ||
|
271d3fbdaa | ||
|
6df40dcbe0 | ||
|
97f194c1fb | ||
|
4da769ccca | ||
|
253d96f2e2 | ||
|
bbc3e2753a | ||
|
67353612ba | ||
|
bffbd5f038 | ||
|
d8bbf2018e | ||
|
187f491ad2 | ||
|
335959e778 | ||
|
3b83bf8f6a | ||
|
51719893bf | ||
|
1841f65e64 | ||
|
bb28998920 | ||
|
fbc5f99db9 | ||
|
ca0a0bbeec | ||
|
6119f78cb9 | ||
|
539679c7f9 | ||
|
b642cd44c1 | ||
|
fffec3b9d9 | ||
|
3446dfb7cb | ||
|
db16276b7c | ||
|
629fcdd135 | ||
|
64ce2aada8 | ||
|
565f751967 | ||
|
6017964580 | ||
|
1d16b0c3fe | ||
|
7851b37993 | ||
|
d81edc573e | ||
|
ef0c8d5f9f | ||
|
db30f02b50 | ||
|
4ba7262467 | ||
|
67d0c25eab | ||
|
09f9552b40 | ||
|
142d38f776 | ||
|
6dd3471900 | ||
|
280d67896a | ||
|
510e6f6dc1 | ||
|
712e86b999 | ||
|
74fdba620d | ||
|
dc1c479a6f | ||
|
119d536e07 | ||
|
fa1bf9c653 | ||
|
814eed0ea1 | ||
|
0aa3068e9e | ||
|
db2d6124b1 | ||
|
039dc61bd2 | ||
|
4b879984ea | ||
|
55e286ba55 | ||
|
9450bfa26e | ||
|
18be482a6f | ||
|
ca6710ee41 | ||
|
9314810243 | ||
|
7717ae19fa | ||
|
32635ec685 | ||
|
caec7618a1 | ||
|
7e7ab2815c | ||
|
d7744f2219 | ||
|
7161829de5 | ||
|
991ba7fae3 | ||
|
a7539296ce | ||
|
258d5850c9 | ||
|
20759b340a | ||
|
8e5f761870 | ||
|
26714799c9 | ||
|
5e9d042d8f | ||
|
9cf98a2bcc | ||
|
f5ebb61495 | ||
|
431d88dd31 | ||
|
876f1a86af | ||
|
01951dda7a | ||
|
6e3dba168b | ||
|
187da2c093 | ||
|
9a2cf56d51 | ||
|
5f7ad21633 | ||
|
089d47f8d5 | ||
|
fdef722fa1 | ||
|
110d4f4c91 |
17
.tarignore
17
.tarignore
@@ -1,17 +0,0 @@
|
|||||||
updates_key.pem
|
|
||||||
*.pyc
|
|
||||||
*.pyo
|
|
||||||
youtube-dl.exe
|
|
||||||
wine-py2exe/
|
|
||||||
py2exe.log
|
|
||||||
*.kate-swp
|
|
||||||
build/
|
|
||||||
dist/
|
|
||||||
MANIFEST
|
|
||||||
*.DS_Store
|
|
||||||
youtube-dl.tar.gz
|
|
||||||
.coverage
|
|
||||||
cover/
|
|
||||||
__pycache__/
|
|
||||||
.git/
|
|
||||||
*~
|
|
@@ -8,7 +8,7 @@ notifications:
|
|||||||
email:
|
email:
|
||||||
- filippo.valsorda@gmail.com
|
- filippo.valsorda@gmail.com
|
||||||
- phihag@phihag.de
|
- phihag@phihag.de
|
||||||
irc:
|
# irc:
|
||||||
channels:
|
# channels:
|
||||||
- "irc.freenode.org#youtube-dl"
|
# - "irc.freenode.org#youtube-dl"
|
||||||
skip_join: true
|
# skip_join: true
|
||||||
|
@@ -1 +1 @@
|
|||||||
9999.99.99
|
2012.12.99
|
||||||
|
26
Makefile
26
Makefile
@@ -1,12 +1,13 @@
|
|||||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/
|
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||||
|
|
||||||
PREFIX=/usr/local
|
PREFIX=/usr/local
|
||||||
BINDIR=$(PREFIX)/bin
|
BINDIR=$(PREFIX)/bin
|
||||||
MANDIR=$(PREFIX)/man
|
MANDIR=$(PREFIX)/man
|
||||||
SYSCONFDIR=/etc
|
SYSCONFDIR=/etc
|
||||||
|
PYTHON=/usr/bin/env python
|
||||||
|
|
||||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||||
install -d $(DESTDIR)$(BINDIR)
|
install -d $(DESTDIR)$(BINDIR)
|
||||||
@@ -20,12 +21,14 @@ test:
|
|||||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||||
nosetests --verbose test
|
nosetests --verbose test
|
||||||
|
|
||||||
.PHONY: all clean install test
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
|
.PHONY: all clean install test tar
|
||||||
|
|
||||||
youtube-dl: youtube_dl/*.py
|
youtube-dl: youtube_dl/*.py
|
||||||
zip --quiet youtube-dl youtube_dl/*.py
|
zip --quiet youtube-dl youtube_dl/*.py
|
||||||
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
|
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
|
||||||
echo '#!/usr/bin/env python' > youtube-dl
|
echo '#!$(PYTHON)' > youtube-dl
|
||||||
cat youtube-dl.zip >> youtube-dl
|
cat youtube-dl.zip >> youtube-dl
|
||||||
rm youtube-dl.zip
|
rm youtube-dl.zip
|
||||||
chmod a+x youtube-dl
|
chmod a+x youtube-dl
|
||||||
@@ -42,6 +45,17 @@ youtube-dl.1: README.md
|
|||||||
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
|
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
|
||||||
python devscripts/bash-completion.py
|
python devscripts/bash-completion.py
|
||||||
|
|
||||||
youtube-dl.tar.gz: all
|
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||||
tar -cvzf youtube-dl.tar.gz -s "|^./|./youtube-dl/|" \
|
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||||
--exclude-from=".tarignore" -- .
|
--exclude '*.DS_Store' \
|
||||||
|
--exclude '*.kate-swp' \
|
||||||
|
--exclude '*.pyc' \
|
||||||
|
--exclude '*.pyo' \
|
||||||
|
--exclude '*~' \
|
||||||
|
--exclude '__pycache' \
|
||||||
|
--exclude '.git' \
|
||||||
|
-- \
|
||||||
|
bin devscripts test youtube_dl \
|
||||||
|
CHANGELOG LICENSE README.md README.txt \
|
||||||
|
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||||
|
youtube-dl
|
||||||
|
12
README.md
12
README.md
@@ -9,8 +9,8 @@ youtube-dl
|
|||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
**youtube-dl** is a small command-line program to download videos from
|
**youtube-dl** is a small command-line program to download videos from
|
||||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||||
2.x (x being at least 6), and it is not platform specific. It should work in
|
2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
|
||||||
your Unix box, in Windows or in Mac OS X. It is released to the public domain,
|
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||||
which means you can modify it, redistribute it or use it however you like.
|
which means you can modify it, redistribute it or use it however you like.
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
@@ -105,11 +105,13 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
## Post-processing Options:
|
## Post-processing Options:
|
||||||
-x, --extract-audio convert video files to audio-only files (requires
|
-x, --extract-audio convert video files to audio-only files (requires
|
||||||
ffmpeg or avconv and ffprobe or avprobe)
|
ffmpeg or avconv and ffprobe or avprobe)
|
||||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
|
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
|
||||||
best by default
|
"wav"; best by default
|
||||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
|
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
|
||||||
value between 0 (better) and 9 (worse) for VBR or a
|
value between 0 (better) and 9 (worse) for VBR or a
|
||||||
specific bitrate like 128K (default 5)
|
specific bitrate like 128K (default 5)
|
||||||
|
--recode-video FORMAT Encode the video to another format if necessary
|
||||||
|
(currently supported: mp4|flv|ogg|webm)
|
||||||
-k, --keep-video keeps the video file on disk after the post-
|
-k, --keep-video keeps the video file on disk after the post-
|
||||||
processing; the video is erased by default
|
processing; the video is erased by default
|
||||||
--no-post-overwrites do not overwrite post-processed files; the post-
|
--no-post-overwrites do not overwrite post-processed files; the post-
|
||||||
@@ -117,7 +119,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
|
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
|
@@ -4,12 +4,17 @@ import rsa
|
|||||||
import json
|
import json
|
||||||
from binascii import hexlify
|
from binascii import hexlify
|
||||||
|
|
||||||
|
try:
|
||||||
|
input = raw_input
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
|
||||||
versions_info = json.load(open('update/versions.json'))
|
versions_info = json.load(open('update/versions.json'))
|
||||||
if 'signature' in versions_info:
|
if 'signature' in versions_info:
|
||||||
del versions_info['signature']
|
del versions_info['signature']
|
||||||
|
|
||||||
print('Enter the PKCS1 private key, followed by a blank line:')
|
print('Enter the PKCS1 private key, followed by a blank line:')
|
||||||
privkey = ''
|
privkey = b''
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
line = input()
|
line = input()
|
||||||
@@ -17,8 +22,7 @@ while True:
|
|||||||
break
|
break
|
||||||
if line == '':
|
if line == '':
|
||||||
break
|
break
|
||||||
privkey += line + '\n'
|
privkey += line.encode('ascii') + b'\n'
|
||||||
privkey = bytes(privkey, 'ascii')
|
|
||||||
privkey = rsa.PrivateKey.load_pkcs1(privkey)
|
privkey = rsa.PrivateKey.load_pkcs1(privkey)
|
||||||
|
|
||||||
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()
|
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()
|
||||||
|
@@ -1,13 +1,17 @@
|
|||||||
#!/bin/sh
|
#!/bin/bash
|
||||||
|
|
||||||
# IMPORTANT: the following assumptions are made
|
# IMPORTANT: the following assumptions are made
|
||||||
# * you did --set-upstream
|
# * the GH repo is on the origin remote
|
||||||
# * the gh-pages branch is named so locally
|
# * the gh-pages branch is named so locally
|
||||||
# * the git config user.signingkey is properly set
|
# * the git config user.signingkey is properly set
|
||||||
|
|
||||||
# You will need
|
# You will need
|
||||||
# pip install coverage nose rsa
|
# pip install coverage nose rsa
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
# release notes
|
||||||
|
# make hash on local files
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||||
@@ -21,7 +25,7 @@ make clean
|
|||||||
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
|
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
|
||||||
|
|
||||||
echo "\n### Changing version in version.py..."
|
echo "\n### Changing version in version.py..."
|
||||||
sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||||
|
|
||||||
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
||||||
make README.md
|
make README.md
|
||||||
@@ -34,41 +38,49 @@ git show "$version"
|
|||||||
read -p "Is it good, can I push? (y/n) " -n 1
|
read -p "Is it good, can I push? (y/n) " -n 1
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||||
echo
|
echo
|
||||||
git push
|
MASTER=$(git rev-parse --abbrev-ref HEAD)
|
||||||
|
git push origin $MASTER:master
|
||||||
|
git push origin "$version"
|
||||||
|
|
||||||
echo "\n### OK, now it is time to build the binaries..."
|
echo "\n### OK, now it is time to build the binaries..."
|
||||||
REV=$(git rev-parse HEAD)
|
REV=$(git rev-parse HEAD)
|
||||||
make youtube-dl youtube-dl.tar.gz
|
make youtube-dl youtube-dl.tar.gz
|
||||||
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
|
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
|
||||||
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||||
mkdir -p "update_staging/$version"
|
mkdir -p "build/$version"
|
||||||
mv youtube-dl youtube-dl.exe "update_staging/$version"
|
mv youtube-dl youtube-dl.exe "build/$version"
|
||||||
mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz"
|
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||||
|
RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||||
|
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
|
||||||
|
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
|
||||||
|
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
||||||
|
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
||||||
git checkout HEAD -- youtube-dl youtube-dl.exe
|
git checkout HEAD -- youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
|
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
|
||||||
for f in update_staging/$version/*; do gpg --detach-sig "$f"; done
|
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
|
||||||
scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/
|
scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
|
||||||
rm -r update_staging
|
|
||||||
|
|
||||||
echo "\n### Now switching to gh-pages..."
|
echo "\n### Now switching to gh-pages..."
|
||||||
MASTER=$(git rev-parse --abbrev-ref HEAD)
|
git clone --branch gh-pages --single-branch . build/gh-pages
|
||||||
git checkout gh-pages
|
ROOT=$(pwd)
|
||||||
git checkout "$MASTER" -- devscripts/gh-pages/
|
(
|
||||||
git reset devscripts/gh-pages/
|
set -e
|
||||||
devscripts/gh-pages/add-version.py $version
|
ORIGIN_URL=$(git config --get remote.origin.url)
|
||||||
devscripts/gh-pages/sign-versions.py < updates_key.pem
|
cd build/gh-pages
|
||||||
devscripts/gh-pages/generate-download.py
|
"$ROOT/devscripts/gh-pages/add-version.py" $version
|
||||||
devscripts/gh-pages/update-copyright.py
|
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
|
||||||
rm -r test_coverage
|
"$ROOT/devscripts/gh-pages/generate-download.py"
|
||||||
mv cover test_coverage
|
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
||||||
git add *.html *.html.in update test_coverage
|
git add *.html *.html.in update
|
||||||
git commit -m "release $version"
|
git commit -m "release $version"
|
||||||
git show HEAD
|
git show HEAD
|
||||||
read -p "Is it good, can I push? (y/n) " -n 1
|
read -p "Is it good, can I push? (y/n) " -n 1
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||||
echo
|
echo
|
||||||
git push
|
git push "$ROOT" gh-pages
|
||||||
|
git push "$ORIGIN_URL" gh-pages
|
||||||
|
)
|
||||||
|
rm -rf build
|
||||||
|
|
||||||
echo "\n### DONE!"
|
echo "\n### DONE!"
|
||||||
git checkout $MASTER
|
|
||||||
|
@@ -26,6 +26,7 @@ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
|||||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||||
compat_urllib_request.install_opener(opener)
|
compat_urllib_request.install_opener(opener)
|
||||||
|
socket.setdefaulttimeout(10)
|
||||||
|
|
||||||
def _try_rm(filename):
|
def _try_rm(filename):
|
||||||
""" Remove a file if it exists """
|
""" Remove a file if it exists """
|
||||||
@@ -81,6 +82,11 @@ def generator(test_case):
|
|||||||
fd.add_info_extractor(ie())
|
fd.add_info_extractor(ie())
|
||||||
for ien in test_case.get('add_ie', []):
|
for ien in test_case.get('add_ie', []):
|
||||||
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
|
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
|
||||||
|
finished_hook_called = set()
|
||||||
|
def _hook(status):
|
||||||
|
if status['status'] == 'finished':
|
||||||
|
finished_hook_called.add(status['filename'])
|
||||||
|
fd.add_progress_hook(_hook)
|
||||||
|
|
||||||
test_cases = test_case.get('playlist', [test_case])
|
test_cases = test_case.get('playlist', [test_case])
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
@@ -92,7 +98,8 @@ def generator(test_case):
|
|||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc['file']))
|
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
|
||||||
|
self.assertTrue(tc['file'] in finished_hook_called)
|
||||||
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
|
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
|
||||||
if 'md5' in tc:
|
if 'md5' in tc:
|
||||||
md5_for_file = _file_md5(tc['file'])
|
md5_for_file = _file_md5(tc['file'])
|
||||||
@@ -100,10 +107,6 @@ def generator(test_case):
|
|||||||
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, value) in tc.get('info_dict', {}).items():
|
for (info_field, value) in tc.get('info_dict', {}).items():
|
||||||
if value.startswith('md5:'):
|
|
||||||
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
|
|
||||||
self.assertEqual(value[3:], md5_info_value)
|
|
||||||
else:
|
|
||||||
self.assertEqual(value, info_dict.get(info_field))
|
self.assertEqual(value, info_dict.get(info_field))
|
||||||
finally:
|
finally:
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
|
115
test/tests.json
115
test/tests.json
@@ -35,6 +35,24 @@
|
|||||||
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
|
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
|
||||||
"file": "939581.flv"
|
"file": "939581.flv"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "YouPorn",
|
||||||
|
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
|
||||||
|
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
|
||||||
|
"file": "505835.mp4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Pornotube",
|
||||||
|
"md5": "374dd6dcedd24234453b295209aa69b6",
|
||||||
|
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
|
||||||
|
"file": "1689755.flv"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "YouJizz",
|
||||||
|
"md5": "07e15fa469ba384c7693fd246905547c",
|
||||||
|
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
|
||||||
|
"file": "2189178.flv"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Vimeo",
|
"name": "Vimeo",
|
||||||
"md5": "8879b6cc097e987f02484baf890129e5",
|
"md5": "8879b6cc097e987f02484baf890129e5",
|
||||||
@@ -160,5 +178,102 @@
|
|||||||
"params": {
|
"params": {
|
||||||
"skip_download": true
|
"skip_download": true
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ComedyCentral",
|
||||||
|
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
|
||||||
|
"file": "422212.mp4",
|
||||||
|
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "thedailyshow-kristen-stewart part 1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "RBMARadio",
|
||||||
|
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
|
||||||
|
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
|
||||||
|
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "Live at Primavera Sound 2011",
|
||||||
|
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||||
|
"uploader": "Ford & Lopatin",
|
||||||
|
"uploader_id": "ford-lopatin",
|
||||||
|
"location": "Spain"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Facebook",
|
||||||
|
"url": "https://www.facebook.com/photo.php?v=120708114770723",
|
||||||
|
"file": "120708114770723.mp4",
|
||||||
|
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "PEOPLE ARE AWESOME 2013",
|
||||||
|
"duration": 279
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "EightTracks",
|
||||||
|
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||||
|
"playlist": [
|
||||||
|
{
|
||||||
|
"file": "11885610.m4a",
|
||||||
|
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885608.m4a",
|
||||||
|
"md5": "4ab26f05c1f7291ea460a3920be8021f",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885679.m4a",
|
||||||
|
"md5": "d30b5b5f74217410f4689605c35d1fd7",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885680.m4a",
|
||||||
|
"md5": "4eb0a669317cd725f6bbd336a29f923a",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885682.m4a",
|
||||||
|
"md5": "1893e872e263a2705558d1d319ad19e8",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885683.m4a",
|
||||||
|
"md5": "b673c46f47a216ab1741ae8836af5899",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885684.m4a",
|
||||||
|
"md5": "1d74534e95df54986da7f5abf7d842b7",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"file": "11885685.m4a",
|
||||||
|
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
|
||||||
|
"info_dict": {
|
||||||
|
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@@ -81,6 +81,7 @@ class FileDownloader(object):
|
|||||||
writesubtitles: Write the video subtitles to a .srt file
|
writesubtitles: Write the video subtitles to a .srt file
|
||||||
subtitleslang: Language of the subtitles to download
|
subtitleslang: Language of the subtitles to download
|
||||||
test: Download only first bytes to test the downloader.
|
test: Download only first bytes to test the downloader.
|
||||||
|
keepvideo: Keep the video file after post-processing
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
@@ -94,6 +95,7 @@ class FileDownloader(object):
|
|||||||
"""Create a FileDownloader object with the given options."""
|
"""Create a FileDownloader object with the given options."""
|
||||||
self._ies = []
|
self._ies = []
|
||||||
self._pps = []
|
self._pps = []
|
||||||
|
self._progress_hooks = []
|
||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
@@ -216,12 +218,15 @@ class FileDownloader(object):
|
|||||||
Depending on if the downloader has been configured to ignore
|
Depending on if the downloader has been configured to ignore
|
||||||
download errors or not, this method may throw an exception or
|
download errors or not, this method may throw an exception or
|
||||||
not when errors are found, after printing the message.
|
not when errors are found, after printing the message.
|
||||||
|
|
||||||
|
tb, if given, is additional traceback information.
|
||||||
"""
|
"""
|
||||||
if message is not None:
|
if message is not None:
|
||||||
self.to_stderr(message)
|
self.to_stderr(message)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
if tb is None:
|
if tb is None:
|
||||||
tb = u''.join(traceback.format_list(traceback.extract_stack()))
|
tb_data = traceback.format_list(traceback.extract_stack())
|
||||||
|
tb = u''.join(tb_data)
|
||||||
self.to_stderr(tb)
|
self.to_stderr(tb)
|
||||||
if not self.params.get('ignoreerrors', False):
|
if not self.params.get('ignoreerrors', False):
|
||||||
raise DownloadError(message)
|
raise DownloadError(message)
|
||||||
@@ -497,7 +502,7 @@ class FileDownloader(object):
|
|||||||
try:
|
try:
|
||||||
videos = ie.extract(url)
|
videos = ie.extract(url)
|
||||||
except ExtractorError as de: # An error we somewhat expected
|
except ExtractorError as de: # An error we somewhat expected
|
||||||
self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
|
self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.params.get('ignoreerrors', False):
|
if self.params.get('ignoreerrors', False):
|
||||||
@@ -526,15 +531,29 @@ class FileDownloader(object):
|
|||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
def post_process(self, filename, ie_info):
|
def post_process(self, filename, ie_info):
|
||||||
"""Run the postprocessing chain on the given file."""
|
"""Run all the postprocessors on the given file."""
|
||||||
info = dict(ie_info)
|
info = dict(ie_info)
|
||||||
info['filepath'] = filename
|
info['filepath'] = filename
|
||||||
|
keep_video = None
|
||||||
for pp in self._pps:
|
for pp in self._pps:
|
||||||
info = pp.run(info)
|
try:
|
||||||
if info is None:
|
keep_video_wish,new_info = pp.run(info)
|
||||||
break
|
if keep_video_wish is not None:
|
||||||
|
if keep_video_wish:
|
||||||
|
keep_video = keep_video_wish
|
||||||
|
elif keep_video is None:
|
||||||
|
# No clear decision yet, let IE decide
|
||||||
|
keep_video = keep_video_wish
|
||||||
|
except PostProcessingError as e:
|
||||||
|
self.to_stderr(u'ERROR: ' + e.msg)
|
||||||
|
if keep_video is False and not self.params.get('keepvideo', False):
|
||||||
|
try:
|
||||||
|
self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||||
|
os.remove(encodeFilename(filename))
|
||||||
|
except (IOError, OSError):
|
||||||
|
self.to_stderr(u'WARNING: Unable to remove downloaded video file')
|
||||||
|
|
||||||
def _download_with_rtmpdump(self, filename, url, player_url):
|
def _download_with_rtmpdump(self, filename, url, player_url, page_url):
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
@@ -548,7 +567,11 @@ class FileDownloader(object):
|
|||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
# the connection was interrumpted and resuming appears to be
|
# the connection was interrumpted and resuming appears to be
|
||||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||||
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
|
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
|
||||||
|
if player_url is not None:
|
||||||
|
basic_args += ['-W', player_url]
|
||||||
|
if page_url is not None:
|
||||||
|
basic_args += ['--pageUrl', page_url]
|
||||||
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
|
||||||
if self.params.get('verbose', False):
|
if self.params.get('verbose', False):
|
||||||
try:
|
try:
|
||||||
@@ -572,8 +595,15 @@ class FileDownloader(object):
|
|||||||
retval = 0
|
retval = 0
|
||||||
break
|
break
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
|
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': fsize,
|
||||||
|
'total_bytes': fsize,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
|
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
|
||||||
@@ -581,22 +611,29 @@ class FileDownloader(object):
|
|||||||
|
|
||||||
def _do_download(self, filename, info_dict):
|
def _do_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
player_url = info_dict.get('player_url', None)
|
|
||||||
|
|
||||||
# Check file already present
|
# Check file already present
|
||||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(filename)
|
||||||
|
self._hook_progress({
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Attempt to download using rtmpdump
|
# Attempt to download using rtmpdump
|
||||||
if url.startswith('rtmp'):
|
if url.startswith('rtmp'):
|
||||||
return self._download_with_rtmpdump(filename, url, player_url)
|
return self._download_with_rtmpdump(filename, url,
|
||||||
|
info_dict.get('player_url', None),
|
||||||
|
info_dict.get('page_url', None))
|
||||||
|
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
stream = None
|
stream = None
|
||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Do not include the Accept-Encoding header
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
|
if 'user_agent' in info_dict:
|
||||||
|
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
|
|
||||||
@@ -653,6 +690,10 @@ class FileDownloader(object):
|
|||||||
# the one in the hard drive.
|
# the one in the hard drive.
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(filename)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
self._hook_progress({
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
@@ -711,6 +752,14 @@ class FileDownloader(object):
|
|||||||
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
|
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
})
|
||||||
|
|
||||||
# Apply rate limit
|
# Apply rate limit
|
||||||
self.slow_down(start, byte_counter - resume_len)
|
self.slow_down(start, byte_counter - resume_len)
|
||||||
|
|
||||||
@@ -727,4 +776,31 @@ class FileDownloader(object):
|
|||||||
if self.params.get('updatetime', True):
|
if self.params.get('updatetime', True):
|
||||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': byte_counter,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _hook_progress(self, status):
|
||||||
|
for ph in self._progress_hooks:
|
||||||
|
ph(status)
|
||||||
|
|
||||||
|
def add_progress_hook(self, ph):
|
||||||
|
""" ph gets called on download progress, with a dictionary with the entries
|
||||||
|
* filename: The final filename
|
||||||
|
* status: One of "downloading" and "finished"
|
||||||
|
|
||||||
|
It can also have some of the following entries:
|
||||||
|
|
||||||
|
* downloaded_bytes: Bytes on disks
|
||||||
|
* total_bytes: Total bytes, None if unknown
|
||||||
|
* tmpfilename: The filename we're currently writing to
|
||||||
|
|
||||||
|
Hooks are guaranteed to be called at least once (with status "finished")
|
||||||
|
if the download is successful.
|
||||||
|
"""
|
||||||
|
self._progress_hooks.append(ph)
|
||||||
|
@@ -5,6 +5,7 @@ from __future__ import absolute_import
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import datetime
|
import datetime
|
||||||
|
import itertools
|
||||||
import netrc
|
import netrc
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -35,15 +36,16 @@ class InfoExtractor(object):
|
|||||||
url: Final video URL.
|
url: Final video URL.
|
||||||
title: Video title, unescaped.
|
title: Video title, unescaped.
|
||||||
ext: Video filename extension.
|
ext: Video filename extension.
|
||||||
uploader: Full name of the video uploader.
|
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
|
||||||
|
|
||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: One-line video description.
|
description: One-line video description.
|
||||||
|
uploader: Full name of the video uploader.
|
||||||
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
|
location: Physical location of the video.
|
||||||
player_url: SWF Player URL (used for rtmpdump).
|
player_url: SWF Player URL (used for rtmpdump).
|
||||||
subtitles: The .srt file contents.
|
subtitles: The .srt file contents.
|
||||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||||
@@ -106,18 +108,23 @@ class InfoExtractor(object):
|
|||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
return type(self).__name__[:-2]
|
return type(self).__name__[:-2]
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||||
|
""" Returns the response handle """
|
||||||
if note is None:
|
if note is None:
|
||||||
note = u'Downloading video webpage'
|
note = u'Downloading video webpage'
|
||||||
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
|
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
|
||||||
try:
|
try:
|
||||||
urlh = compat_urllib_request.urlopen(url_or_request)
|
return compat_urllib_request.urlopen(url_or_request)
|
||||||
webpage_bytes = urlh.read()
|
|
||||||
return webpage_bytes.decode('utf-8', 'replace')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)))
|
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
|
||||||
|
|
||||||
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||||
|
""" Returns the data of the page as a string """
|
||||||
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
webpage_bytes = urlh.read()
|
||||||
|
return webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
|
||||||
class YoutubeIE(InfoExtractor):
|
class YoutubeIE(InfoExtractor):
|
||||||
@@ -1974,62 +1981,14 @@ class DepositFilesIE(InfoExtractor):
|
|||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
"""Information Extractor for Facebook"""
|
"""Information Extractor for Facebook"""
|
||||||
|
|
||||||
_WORKING = False
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||||
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
_available_formats = ['video', 'highqual', 'lowqual']
|
|
||||||
_video_extensions = {
|
|
||||||
'video': 'mp4',
|
|
||||||
'highqual': 'mp4',
|
|
||||||
'lowqual': 'mp4',
|
|
||||||
}
|
|
||||||
IE_NAME = u'facebook'
|
IE_NAME = u'facebook'
|
||||||
|
|
||||||
def __init__(self, downloader=None):
|
|
||||||
InfoExtractor.__init__(self, downloader)
|
|
||||||
|
|
||||||
def _reporter(self, message):
|
|
||||||
"""Add header and report message."""
|
|
||||||
self._downloader.to_screen(u'[facebook] %s' % message)
|
|
||||||
|
|
||||||
def report_login(self):
|
def report_login(self):
|
||||||
"""Report attempt to log in."""
|
"""Report attempt to log in."""
|
||||||
self._reporter(u'Logging in')
|
self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
|
||||||
|
|
||||||
def report_video_webpage_download(self, video_id):
|
|
||||||
"""Report attempt to download video webpage."""
|
|
||||||
self._reporter(u'%s: Downloading video webpage' % video_id)
|
|
||||||
|
|
||||||
def report_information_extraction(self, video_id):
|
|
||||||
"""Report attempt to extract video information."""
|
|
||||||
self._reporter(u'%s: Extracting video information' % video_id)
|
|
||||||
|
|
||||||
def _parse_page(self, video_webpage):
|
|
||||||
"""Extract video information from page"""
|
|
||||||
# General data
|
|
||||||
data = {'title': r'\("video_title", "(.*?)"\)',
|
|
||||||
'description': r'<div class="datawrap">(.*?)</div>',
|
|
||||||
'owner': r'\("video_owner_name", "(.*?)"\)',
|
|
||||||
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
|
|
||||||
}
|
|
||||||
video_info = {}
|
|
||||||
for piece in data.keys():
|
|
||||||
mobj = re.search(data[piece], video_webpage)
|
|
||||||
if mobj is not None:
|
|
||||||
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
|
|
||||||
|
|
||||||
# Video urls
|
|
||||||
video_urls = {}
|
|
||||||
for fmt in self._available_formats:
|
|
||||||
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
|
|
||||||
if mobj is not None:
|
|
||||||
# URL is in a Javascript segment inside an escaped Unicode format within
|
|
||||||
# the generally utf-8 page
|
|
||||||
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
|
|
||||||
video_info['video_urls'] = video_urls
|
|
||||||
|
|
||||||
return video_info
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
@@ -2082,100 +2041,35 @@ class FacebookIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
video_id = mobj.group('ID')
|
video_id = mobj.group('ID')
|
||||||
|
|
||||||
# Get video webpage
|
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||||
self.report_video_webpage_download(video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
|
||||||
try:
|
|
||||||
page = compat_urllib_request.urlopen(request)
|
|
||||||
video_webpage = page.read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
# Start extracting information
|
BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
|
||||||
self.report_information_extraction(video_id)
|
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||||
|
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError(u'Cannot parse data')
|
||||||
|
data = dict(json.loads(m.group(1)))
|
||||||
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
|
params = json.loads(params_raw)
|
||||||
|
video_url = params['hd_src']
|
||||||
|
video_duration = int(params['video_duration'])
|
||||||
|
|
||||||
# Extract information
|
m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
|
||||||
video_info = self._parse_page(video_webpage)
|
if not m:
|
||||||
|
raise ExtractorError(u'Cannot find title in webpage')
|
||||||
|
video_title = unescapeHTML(m.group(1))
|
||||||
|
|
||||||
# uploader
|
info = {
|
||||||
if 'owner' not in video_info:
|
'id': video_id,
|
||||||
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
|
|
||||||
return
|
|
||||||
video_uploader = video_info['owner']
|
|
||||||
|
|
||||||
# title
|
|
||||||
if 'title' not in video_info:
|
|
||||||
self._downloader.trouble(u'ERROR: unable to extract video title')
|
|
||||||
return
|
|
||||||
video_title = video_info['title']
|
|
||||||
video_title = video_title.decode('utf-8')
|
|
||||||
|
|
||||||
# thumbnail image
|
|
||||||
if 'thumbnail' not in video_info:
|
|
||||||
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
|
|
||||||
video_thumbnail = ''
|
|
||||||
else:
|
|
||||||
video_thumbnail = video_info['thumbnail']
|
|
||||||
|
|
||||||
# upload date
|
|
||||||
upload_date = None
|
|
||||||
if 'upload_date' in video_info:
|
|
||||||
upload_time = video_info['upload_date']
|
|
||||||
timetuple = email.utils.parsedate_tz(upload_time)
|
|
||||||
if timetuple is not None:
|
|
||||||
try:
|
|
||||||
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# description
|
|
||||||
video_description = video_info.get('description', 'No description available.')
|
|
||||||
|
|
||||||
url_map = video_info['video_urls']
|
|
||||||
if url_map:
|
|
||||||
# Decide which formats to download
|
|
||||||
req_format = self._downloader.params.get('format', None)
|
|
||||||
format_limit = self._downloader.params.get('format_limit', None)
|
|
||||||
|
|
||||||
if format_limit is not None and format_limit in self._available_formats:
|
|
||||||
format_list = self._available_formats[self._available_formats.index(format_limit):]
|
|
||||||
else:
|
|
||||||
format_list = self._available_formats
|
|
||||||
existing_formats = [x for x in format_list if x in url_map]
|
|
||||||
if len(existing_formats) == 0:
|
|
||||||
self._downloader.trouble(u'ERROR: no known formats available for video')
|
|
||||||
return
|
|
||||||
if req_format is None:
|
|
||||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
|
||||||
elif req_format == 'worst':
|
|
||||||
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
|
|
||||||
elif req_format == '-1':
|
|
||||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
|
||||||
else:
|
|
||||||
# Specific format
|
|
||||||
if req_format not in url_map:
|
|
||||||
self._downloader.trouble(u'ERROR: requested format not available')
|
|
||||||
return
|
|
||||||
video_url_list = [(req_format, url_map[req_format])] # Specific format
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for format_param, video_real_url in video_url_list:
|
|
||||||
# Extension
|
|
||||||
video_extension = self._video_extensions.get(format_param, 'mp4')
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
'id': video_id.decode('utf-8'),
|
|
||||||
'url': video_real_url.decode('utf-8'),
|
|
||||||
'uploader': video_uploader.decode('utf-8'),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension.decode('utf-8'),
|
'url': video_url,
|
||||||
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
'ext': 'mp4',
|
||||||
'thumbnail': video_thumbnail.decode('utf-8'),
|
'duration': video_duration,
|
||||||
'description': video_description.decode('utf-8'),
|
'thumbnail': params['thumbnail_src'],
|
||||||
})
|
}
|
||||||
return results
|
return [info]
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(InfoExtractor):
|
class BlipTVIE(InfoExtractor):
|
||||||
"""Information extractor for blip.tv"""
|
"""Information extractor for blip.tv"""
|
||||||
@@ -2204,6 +2098,7 @@ class BlipTVIE(InfoExtractor):
|
|||||||
cchar = '?'
|
cchar = '?'
|
||||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||||
request = compat_urllib_request.Request(json_url)
|
request = compat_urllib_request.Request(json_url)
|
||||||
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
self.report_extraction(mobj.group(1))
|
||||||
info = None
|
info = None
|
||||||
try:
|
try:
|
||||||
@@ -2224,8 +2119,7 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'urlhandle': urlh
|
'urlhandle': urlh
|
||||||
}
|
}
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
||||||
return
|
|
||||||
if info is None: # Regular URL
|
if info is None: # Regular URL
|
||||||
try:
|
try:
|
||||||
json_code_bytes = urlh.read()
|
json_code_bytes = urlh.read()
|
||||||
@@ -2258,13 +2152,13 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'format': data['media']['mimeType'],
|
'format': data['media']['mimeType'],
|
||||||
'thumbnail': data['thumbnailUrl'],
|
'thumbnail': data['thumbnailUrl'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
'player_url': data['embedUrl']
|
'player_url': data['embedUrl'],
|
||||||
|
'user_agent': 'iTunes/10.6.1',
|
||||||
}
|
}
|
||||||
except (ValueError,KeyError) as err:
|
except (ValueError,KeyError) as err:
|
||||||
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
|
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
std_headers['User-Agent'] = 'iTunes/10.6.1'
|
|
||||||
return [info]
|
return [info]
|
||||||
|
|
||||||
|
|
||||||
@@ -2333,7 +2227,6 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|
||||||
$"""
|
$"""
|
||||||
IE_NAME = u'comedycentral'
|
|
||||||
|
|
||||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||||
|
|
||||||
@@ -2361,16 +2254,12 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
def report_extraction(self, episode_id):
|
def report_extraction(self, episode_id):
|
||||||
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
|
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
|
||||||
|
|
||||||
def report_config_download(self, episode_id):
|
def report_config_download(self, episode_id, media_id):
|
||||||
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
|
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))
|
||||||
|
|
||||||
def report_index_download(self, episode_id):
|
def report_index_download(self, episode_id):
|
||||||
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
|
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
|
||||||
|
|
||||||
def report_player_url(self, episode_id):
|
|
||||||
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
|
|
||||||
|
|
||||||
|
|
||||||
def _print_formats(self, formats):
|
def _print_formats(self, formats):
|
||||||
print('Available formats:')
|
print('Available formats:')
|
||||||
for x in formats:
|
for x in formats:
|
||||||
@@ -2409,6 +2298,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
try:
|
try:
|
||||||
htmlHandle = compat_urllib_request.urlopen(req)
|
htmlHandle = compat_urllib_request.urlopen(req)
|
||||||
html = htmlHandle.read()
|
html = htmlHandle.read()
|
||||||
|
webpage = html.decode('utf-8')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
|
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
|
||||||
return
|
return
|
||||||
@@ -2423,29 +2313,20 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
epTitle = mobj.group('episode')
|
epTitle = mobj.group('episode')
|
||||||
|
|
||||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html)
|
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||||
|
|
||||||
if len(mMovieParams) == 0:
|
if len(mMovieParams) == 0:
|
||||||
# The Colbert Report embeds the information in a without
|
# The Colbert Report embeds the information in a without
|
||||||
# a URL prefix; so extract the alternate reference
|
# a URL prefix; so extract the alternate reference
|
||||||
# and then add the URL prefix manually.
|
# and then add the URL prefix manually.
|
||||||
|
|
||||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html)
|
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
|
||||||
if len(altMovieParams) == 0:
|
if len(altMovieParams) == 0:
|
||||||
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
|
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||||
|
|
||||||
playerUrl_raw = mMovieParams[0][0]
|
|
||||||
self.report_player_url(epTitle)
|
|
||||||
try:
|
|
||||||
urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
|
|
||||||
playerUrl = urlHandle.geturl()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||||
self.report_index_download(epTitle)
|
self.report_index_download(epTitle)
|
||||||
@@ -2459,7 +2340,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
||||||
itemEls = idoc.findall('.//item')
|
itemEls = idoc.findall('.//item')
|
||||||
for itemEl in itemEls:
|
for partNum,itemEl in enumerate(itemEls):
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
mediaId = itemEl.findall('./guid')[0].text
|
||||||
shortMediaId = mediaId.split(':')[-1]
|
shortMediaId = mediaId.split(':')[-1]
|
||||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||||
@@ -2469,7 +2350,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
configReq = compat_urllib_request.Request(configUrl)
|
configReq = compat_urllib_request.Request(configUrl)
|
||||||
self.report_config_download(epTitle)
|
self.report_config_download(epTitle, shortMediaId)
|
||||||
try:
|
try:
|
||||||
configXml = compat_urllib_request.urlopen(configReq).read()
|
configXml = compat_urllib_request.urlopen(configReq).read()
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
@@ -2491,7 +2372,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# For now, just pick the highest bitrate
|
# For now, just pick the highest bitrate
|
||||||
format,video_url = turls[-1]
|
format,rtmp_video_url = turls[-1]
|
||||||
|
|
||||||
# Get the format arg from the arg stream
|
# Get the format arg from the arg stream
|
||||||
req_format = self._downloader.params.get('format', None)
|
req_format = self._downloader.params.get('format', None)
|
||||||
@@ -2499,18 +2380,16 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
# Select format if we can find one
|
# Select format if we can find one
|
||||||
for f,v in turls:
|
for f,v in turls:
|
||||||
if f == req_format:
|
if f == req_format:
|
||||||
format, video_url = f, v
|
format, rtmp_video_url = f, v
|
||||||
break
|
break
|
||||||
|
|
||||||
# Patch to download from alternative CDN, which does not
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
|
||||||
# break on current RTMPDump builds
|
if not m:
|
||||||
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
|
raise ExtractorError(u'Cannot transform RTMP url')
|
||||||
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
|
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||||
|
video_url = base + m.group('finalid')
|
||||||
|
|
||||||
if video_url.startswith(broken_cdn):
|
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
|
||||||
video_url = video_url.replace(broken_cdn, better_cdn)
|
|
||||||
|
|
||||||
effTitle = showId + u'-' + epTitle
|
|
||||||
info = {
|
info = {
|
||||||
'id': shortMediaId,
|
'id': shortMediaId,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -2521,9 +2400,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
'format': format,
|
'format': format,
|
||||||
'thumbnail': None,
|
'thumbnail': None,
|
||||||
'description': officialTitle,
|
'description': officialTitle,
|
||||||
'player_url': None #playerUrl
|
|
||||||
}
|
}
|
||||||
|
|
||||||
results.append(info)
|
results.append(info)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
@@ -2603,7 +2480,6 @@ class EscapistIE(InfoExtractor):
|
|||||||
|
|
||||||
return [info]
|
return [info]
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
"""Information extractor for collegehumor.com"""
|
"""Information extractor for collegehumor.com"""
|
||||||
|
|
||||||
@@ -2995,8 +2871,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
return
|
|
||||||
|
|
||||||
if mobj.group('course') and mobj.group('video'): # A specific video
|
if mobj.group('course') and mobj.group('video'): # A specific video
|
||||||
course = mobj.group('course')
|
course = mobj.group('course')
|
||||||
@@ -3033,12 +2908,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.report_download_webpage(info['id'])
|
coursepage = self._download_webpage(url, info['id'],
|
||||||
try:
|
note='Downloading course info page',
|
||||||
coursepage = compat_urllib_request.urlopen(url).read()
|
errnote='Unable to download course info page')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
m = re.search('<h1>([^<]+)</h1>', coursepage)
|
m = re.search('<h1>([^<]+)</h1>', coursepage)
|
||||||
if m:
|
if m:
|
||||||
@@ -3062,7 +2934,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
assert entry['type'] == 'reference'
|
assert entry['type'] == 'reference'
|
||||||
results += self.extract(entry['url'])
|
results += self.extract(entry['url'])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
else: # Root page
|
else: # Root page
|
||||||
info = {
|
info = {
|
||||||
'id': 'Stanford OpenClassroom',
|
'id': 'Stanford OpenClassroom',
|
||||||
@@ -3290,7 +3161,7 @@ class YoukuIE(InfoExtractor):
|
|||||||
class XNXXIE(InfoExtractor):
|
class XNXXIE(InfoExtractor):
|
||||||
"""Information extractor for xnxx.com"""
|
"""Information extractor for xnxx.com"""
|
||||||
|
|
||||||
_VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
|
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
|
||||||
IE_NAME = u'xnxx'
|
IE_NAME = u'xnxx'
|
||||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
||||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
||||||
@@ -3542,17 +3413,25 @@ class JustinTVIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
response = json.loads(webpage)
|
response = json.loads(webpage)
|
||||||
|
if type(response) != list:
|
||||||
|
error_text = response.get('error', 'unknown error')
|
||||||
|
self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
|
||||||
|
return
|
||||||
info = []
|
info = []
|
||||||
for clip in response:
|
for clip in response:
|
||||||
video_url = clip['video_file_url']
|
video_url = clip['video_file_url']
|
||||||
if video_url:
|
if video_url:
|
||||||
video_extension = os.path.splitext(video_url)[1][1:]
|
video_extension = os.path.splitext(video_url)[1][1:]
|
||||||
video_date = re.sub('-', '', clip['created_on'][:10])
|
video_date = re.sub('-', '', clip['start_time'][:10])
|
||||||
|
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
|
||||||
|
video_id = clip['id']
|
||||||
|
video_title = clip.get('title', video_id)
|
||||||
info.append({
|
info.append({
|
||||||
'id': clip['id'],
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': clip['title'],
|
'title': video_title,
|
||||||
'uploader': clip.get('user_id', clip.get('channel_id')),
|
'uploader': clip.get('channel_name', video_uploader_id),
|
||||||
|
'uploader_id': video_uploader_id,
|
||||||
'upload_date': video_date,
|
'upload_date': video_date,
|
||||||
'ext': video_extension,
|
'ext': video_extension,
|
||||||
})
|
})
|
||||||
@@ -3571,7 +3450,7 @@ class JustinTVIE(InfoExtractor):
|
|||||||
paged = True
|
paged = True
|
||||||
api += '/channel/archives/%s.json'
|
api += '/channel/archives/%s.json'
|
||||||
else:
|
else:
|
||||||
api += '/clip/show/%s.json'
|
api += '/broadcast/by_archive/%s.json'
|
||||||
api = api % (video_id,)
|
api = api % (video_id,)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
@@ -3694,8 +3573,8 @@ class SteamIE(InfoExtractor):
|
|||||||
videourl = 'http://store.steampowered.com/video/%s/' % gameID
|
videourl = 'http://store.steampowered.com/video/%s/' % gameID
|
||||||
webpage = self._download_webpage(videourl, gameID)
|
webpage = self._download_webpage(videourl, gameID)
|
||||||
mweb = re.finditer(urlRE, webpage)
|
mweb = re.finditer(urlRE, webpage)
|
||||||
namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>'
|
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
|
||||||
titles = list(re.finditer(namesRE, webpage))
|
titles = re.finditer(namesRE, webpage)
|
||||||
videos = []
|
videos = []
|
||||||
for vid,vtitle in zip(mweb,titles):
|
for vid,vtitle in zip(mweb,titles):
|
||||||
video_id = vid.group('videoID')
|
video_id = vid.group('videoID')
|
||||||
@@ -3707,13 +3586,13 @@ class SteamIE(InfoExtractor):
|
|||||||
'id':video_id,
|
'id':video_id,
|
||||||
'url':video_url,
|
'url':video_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': title
|
'title': unescapeHTML(title)
|
||||||
}
|
}
|
||||||
videos.append(info)
|
videos.append(info)
|
||||||
return videos
|
return videos
|
||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||||
IE_NAME = u'ustream'
|
IE_NAME = u'ustream'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -3734,6 +3613,292 @@ class UstreamIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
return [info]
|
return [info]
|
||||||
|
|
||||||
|
class RBMARadioIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError(u'Cannot find metadata')
|
||||||
|
json_data = m.group(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(json_data)
|
||||||
|
except ValueError as e:
|
||||||
|
raise ExtractorError(u'Invalid JSON: ' + str(e))
|
||||||
|
|
||||||
|
video_url = data['akamai_url'] + '&cbr=256'
|
||||||
|
url_parts = compat_urllib_parse_urlparse(video_url)
|
||||||
|
video_ext = url_parts.path.rpartition('.')[2]
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': video_ext,
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data.get('teaser_text'),
|
||||||
|
'location': data.get('country_of_origin'),
|
||||||
|
'uploader': data.get('host', {}).get('name'),
|
||||||
|
'uploader_id': data.get('host', {}).get('slug'),
|
||||||
|
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
||||||
|
'duration': data.get('duration'),
|
||||||
|
}
|
||||||
|
return [info]
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornIE(InfoExtractor):
|
||||||
|
"""Information extractor for youporn.com."""
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
|
||||||
|
|
||||||
|
def _print_formats(self, formats):
|
||||||
|
"""Print all available formats"""
|
||||||
|
print(u'Available formats:')
|
||||||
|
print(u'ext\t\tformat')
|
||||||
|
print(u'---------------------------------')
|
||||||
|
for format in formats:
|
||||||
|
print(u'%s\t\t%s' % (format['ext'], format['format']))
|
||||||
|
|
||||||
|
def _specific(self, req_format, formats):
|
||||||
|
for x in formats:
|
||||||
|
if(x["format"]==req_format):
|
||||||
|
return x
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
||||||
|
return
|
||||||
|
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
# Get the video title
|
||||||
|
result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
|
||||||
|
if result is None:
|
||||||
|
raise ExtractorError(u'ERROR: unable to extract video title')
|
||||||
|
video_title = result.group('title').strip()
|
||||||
|
|
||||||
|
# Get the video date
|
||||||
|
result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
|
||||||
|
if result is None:
|
||||||
|
self._downloader.to_stderr(u'WARNING: unable to extract video date')
|
||||||
|
upload_date = None
|
||||||
|
else:
|
||||||
|
upload_date = result.group('date').strip()
|
||||||
|
|
||||||
|
# Get the video uploader
|
||||||
|
result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
|
||||||
|
if result is None:
|
||||||
|
self._downloader.to_stderr(u'ERROR: unable to extract uploader')
|
||||||
|
video_uploader = None
|
||||||
|
else:
|
||||||
|
video_uploader = result.group('uploader').strip()
|
||||||
|
video_uploader = clean_html( video_uploader )
|
||||||
|
|
||||||
|
# Get all of the formats available
|
||||||
|
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||||
|
result = re.search(DOWNLOAD_LIST_RE, webpage)
|
||||||
|
if result is None:
|
||||||
|
raise ExtractorError(u'Unable to extract download list')
|
||||||
|
download_list_html = result.group('download_list').strip()
|
||||||
|
|
||||||
|
# Get all of the links from the page
|
||||||
|
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
|
||||||
|
links = re.findall(LINK_RE, download_list_html)
|
||||||
|
if(len(links) == 0):
|
||||||
|
raise ExtractorError(u'ERROR: no known formats available for video')
|
||||||
|
|
||||||
|
self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for link in links:
|
||||||
|
|
||||||
|
# A link looks like this:
|
||||||
|
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
|
||||||
|
# A path looks like this:
|
||||||
|
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
|
||||||
|
video_url = unescapeHTML( link )
|
||||||
|
path = compat_urllib_parse_urlparse( video_url ).path
|
||||||
|
extension = os.path.splitext( path )[1][1:]
|
||||||
|
format = path.split('/')[4].split('_')[:2]
|
||||||
|
size = format[0]
|
||||||
|
bitrate = format[1]
|
||||||
|
format = "-".join( format )
|
||||||
|
title = u'%s-%s-%s' % (video_title, size, bitrate)
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'title': title,
|
||||||
|
'ext': extension,
|
||||||
|
'format': format,
|
||||||
|
'thumbnail': None,
|
||||||
|
'description': None,
|
||||||
|
'player_url': None
|
||||||
|
})
|
||||||
|
|
||||||
|
if self._downloader.params.get('listformats', None):
|
||||||
|
self._print_formats(formats)
|
||||||
|
return
|
||||||
|
|
||||||
|
req_format = self._downloader.params.get('format', None)
|
||||||
|
self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
|
||||||
|
|
||||||
|
if req_format is None or req_format == 'best':
|
||||||
|
return [formats[0]]
|
||||||
|
elif req_format == 'worst':
|
||||||
|
return [formats[-1]]
|
||||||
|
elif req_format in ('-1', 'all'):
|
||||||
|
return formats
|
||||||
|
else:
|
||||||
|
format = self._specific( req_format, formats )
|
||||||
|
if result is None:
|
||||||
|
self._downloader.trouble(u'ERROR: requested format not available')
|
||||||
|
return
|
||||||
|
return [format]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PornotubeIE(InfoExtractor):
|
||||||
|
"""Information extractor for pornotube.com."""
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
||||||
|
return
|
||||||
|
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
video_title = mobj.group('title')
|
||||||
|
|
||||||
|
# Get webpage content
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Get the video URL
|
||||||
|
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
|
||||||
|
result = re.search(VIDEO_URL_RE, webpage)
|
||||||
|
if result is None:
|
||||||
|
self._downloader.trouble(u'ERROR: unable to extract video url')
|
||||||
|
return
|
||||||
|
video_url = compat_urllib_parse.unquote(result.group('url'))
|
||||||
|
|
||||||
|
#Get the uploaded date
|
||||||
|
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||||
|
result = re.search(VIDEO_UPLOADED_RE, webpage)
|
||||||
|
if result is None:
|
||||||
|
self._downloader.trouble(u'ERROR: unable to extract video title')
|
||||||
|
return
|
||||||
|
upload_date = result.group('date')
|
||||||
|
|
||||||
|
info = {'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'uploader': None,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'title': video_title,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format': 'flv'}
|
||||||
|
|
||||||
|
return [info]
|
||||||
|
|
||||||
|
class YouJizzIE(InfoExtractor):
|
||||||
|
"""Information extractor for youjizz.com."""
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
|
||||||
|
return
|
||||||
|
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
# Get webpage content
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Get the video title
|
||||||
|
result = re.search(r'<title>(?P<title>.*)</title>', webpage)
|
||||||
|
if result is None:
|
||||||
|
raise ExtractorError(u'ERROR: unable to extract video title')
|
||||||
|
video_title = result.group('title').strip()
|
||||||
|
|
||||||
|
# Get the embed page
|
||||||
|
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
|
||||||
|
if result is None:
|
||||||
|
raise ExtractorError(u'ERROR: unable to extract embed page')
|
||||||
|
|
||||||
|
embed_page_url = result.group(0).strip()
|
||||||
|
video_id = result.group('videoid')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(embed_page_url, video_id)
|
||||||
|
|
||||||
|
# Get the video URL
|
||||||
|
result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
|
||||||
|
if result is None:
|
||||||
|
raise ExtractorError(u'ERROR: unable to extract video url')
|
||||||
|
video_url = result.group('source')
|
||||||
|
|
||||||
|
info = {'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': video_title,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format': 'flv',
|
||||||
|
'player_url': embed_page_url}
|
||||||
|
|
||||||
|
return [info]
|
||||||
|
|
||||||
|
class EightTracksIE(InfoExtractor):
|
||||||
|
IE_NAME = '8tracks'
|
||||||
|
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError(u'Cannot find trax information')
|
||||||
|
json_like = m.group(1)
|
||||||
|
data = json.loads(json_like)
|
||||||
|
|
||||||
|
session = str(random.randint(0, 1000000000))
|
||||||
|
mix_id = data['id']
|
||||||
|
track_count = data['tracks_count']
|
||||||
|
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||||
|
next_url = first_url
|
||||||
|
res = []
|
||||||
|
for i in itertools.count():
|
||||||
|
api_json = self._download_webpage(next_url, playlist_id,
|
||||||
|
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
||||||
|
errnote=u'Failed to download song information')
|
||||||
|
api_data = json.loads(api_json)
|
||||||
|
track_data = api_data[u'set']['track']
|
||||||
|
info = {
|
||||||
|
'id': track_data['id'],
|
||||||
|
'url': track_data['track_file_stream_url'],
|
||||||
|
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||||
|
'raw_title': track_data['name'],
|
||||||
|
'uploader_id': data['user']['login'],
|
||||||
|
'ext': 'm4a',
|
||||||
|
}
|
||||||
|
res.append(info)
|
||||||
|
if api_data['set']['at_last_track']:
|
||||||
|
break
|
||||||
|
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
||||||
|
return res
|
||||||
|
|
||||||
def gen_extractors():
|
def gen_extractors():
|
||||||
""" Return a list of an instance of every supported extractor.
|
""" Return a list of an instance of every supported extractor.
|
||||||
@@ -3768,6 +3933,9 @@ def gen_extractors():
|
|||||||
MTVIE(),
|
MTVIE(),
|
||||||
YoukuIE(),
|
YoukuIE(),
|
||||||
XNXXIE(),
|
XNXXIE(),
|
||||||
|
YouJizzIE(),
|
||||||
|
PornotubeIE(),
|
||||||
|
YouPornIE(),
|
||||||
GooglePlusIE(),
|
GooglePlusIE(),
|
||||||
ArteTvIE(),
|
ArteTvIE(),
|
||||||
NBAIE(),
|
NBAIE(),
|
||||||
@@ -3776,6 +3944,8 @@ def gen_extractors():
|
|||||||
TweetReelIE(),
|
TweetReelIE(),
|
||||||
SteamIE(),
|
SteamIE(),
|
||||||
UstreamIE(),
|
UstreamIE(),
|
||||||
|
RBMARadioIE(),
|
||||||
|
EightTracksIE(),
|
||||||
GenericIE()
|
GenericIE()
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -45,31 +45,24 @@ class PostProcessor(object):
|
|||||||
one has an extra field called "filepath" that points to the
|
one has an extra field called "filepath" that points to the
|
||||||
downloaded file.
|
downloaded file.
|
||||||
|
|
||||||
When this method returns None, the postprocessing chain is
|
This method returns a tuple, the first element of which describes
|
||||||
stopped. However, this method may return an information
|
whether the original file should be kept (i.e. not deleted - None for
|
||||||
dictionary that will be passed to the next postprocessing
|
no preference), and the second of which is the updated information.
|
||||||
object in the chain. It can be the one it received after
|
|
||||||
changing some fields.
|
|
||||||
|
|
||||||
In addition, this method may raise a PostProcessingError
|
In addition, this method may raise a PostProcessingError
|
||||||
exception that will be taken into account by the downloader
|
exception if post processing fails.
|
||||||
it was called from.
|
|
||||||
"""
|
"""
|
||||||
return information # by default, do nothing
|
return None, information # by default, keep file and do nothing
|
||||||
|
|
||||||
class AudioConversionError(BaseException):
|
class FFmpegPostProcessorError(PostProcessingError):
|
||||||
def __init__(self, message):
|
pass
|
||||||
self.message = message
|
|
||||||
|
|
||||||
class FFmpegExtractAudioPP(PostProcessor):
|
class AudioConversionError(PostProcessingError):
|
||||||
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False, nopostoverwrites=False):
|
pass
|
||||||
|
|
||||||
|
class FFmpegPostProcessor(PostProcessor):
|
||||||
|
def __init__(self,downloader=None):
|
||||||
PostProcessor.__init__(self, downloader)
|
PostProcessor.__init__(self, downloader)
|
||||||
if preferredcodec is None:
|
|
||||||
preferredcodec = 'best'
|
|
||||||
self._preferredcodec = preferredcodec
|
|
||||||
self._preferredquality = preferredquality
|
|
||||||
self._keepvideo = keepvideo
|
|
||||||
self._nopostoverwrites = nopostoverwrites
|
|
||||||
self._exes = self.detect_executables()
|
self._exes = self.detect_executables()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -83,10 +76,37 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||||
return dict((program, executable(program)) for program in programs)
|
return dict((program, executable(program)) for program in programs)
|
||||||
|
|
||||||
|
def run_ffmpeg(self, path, out_path, opts):
|
||||||
|
if not self._exes['ffmpeg'] and not self._exes['avconv']:
|
||||||
|
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
|
||||||
|
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
|
||||||
|
+ opts +
|
||||||
|
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
|
||||||
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout,stderr = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
msg = stderr.strip().split('\n')[-1]
|
||||||
|
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
|
||||||
|
|
||||||
|
def _ffmpeg_filename_argument(self, fn):
|
||||||
|
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||||
|
if fn.startswith(u'-'):
|
||||||
|
return u'./' + fn
|
||||||
|
return fn
|
||||||
|
|
||||||
|
class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||||
|
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
|
||||||
|
FFmpegPostProcessor.__init__(self, downloader)
|
||||||
|
if preferredcodec is None:
|
||||||
|
preferredcodec = 'best'
|
||||||
|
self._preferredcodec = preferredcodec
|
||||||
|
self._preferredquality = preferredquality
|
||||||
|
self._nopostoverwrites = nopostoverwrites
|
||||||
|
|
||||||
def get_audio_codec(self, path):
|
def get_audio_codec(self, path):
|
||||||
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
|
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
|
||||||
try:
|
try:
|
||||||
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', '--', encodeFilename(path)]
|
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
|
||||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||||
output = handle.communicate()[0]
|
output = handle.communicate()[0]
|
||||||
if handle.wait() != 0:
|
if handle.wait() != 0:
|
||||||
@@ -108,22 +128,18 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
acodec_opts = []
|
acodec_opts = []
|
||||||
else:
|
else:
|
||||||
acodec_opts = ['-acodec', codec]
|
acodec_opts = ['-acodec', codec]
|
||||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn']
|
opts = ['-vn'] + acodec_opts + more_opts
|
||||||
+ acodec_opts + more_opts +
|
try:
|
||||||
['--', encodeFilename(out_path)])
|
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
except FFmpegPostProcessorError as err:
|
||||||
stdout,stderr = p.communicate()
|
raise AudioConversionError(err.message)
|
||||||
if p.returncode != 0:
|
|
||||||
msg = stderr.strip().split('\n')[-1]
|
|
||||||
raise AudioConversionError(msg)
|
|
||||||
|
|
||||||
def run(self, information):
|
def run(self, information):
|
||||||
path = information['filepath']
|
path = information['filepath']
|
||||||
|
|
||||||
filecodec = self.get_audio_codec(path)
|
filecodec = self.get_audio_codec(path)
|
||||||
if filecodec is None:
|
if filecodec is None:
|
||||||
self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
|
raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
|
||||||
return None
|
|
||||||
|
|
||||||
more_opts = []
|
more_opts = []
|
||||||
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
|
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
|
||||||
@@ -132,7 +148,7 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
acodec = 'copy'
|
acodec = 'copy'
|
||||||
extension = self._preferredcodec
|
extension = self._preferredcodec
|
||||||
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
|
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
|
||||||
elif filecodec in ['aac', 'mp3', 'vorbis']:
|
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
|
||||||
# Lossless if possible
|
# Lossless if possible
|
||||||
acodec = 'copy'
|
acodec = 'copy'
|
||||||
extension = filecodec
|
extension = filecodec
|
||||||
@@ -152,7 +168,7 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
|
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
|
||||||
else:
|
else:
|
||||||
# We convert the audio (lossy)
|
# We convert the audio (lossy)
|
||||||
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
|
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
|
||||||
extension = self._preferredcodec
|
extension = self._preferredcodec
|
||||||
more_opts = []
|
more_opts = []
|
||||||
if self._preferredquality is not None:
|
if self._preferredquality is not None:
|
||||||
@@ -181,10 +197,10 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
except:
|
except:
|
||||||
etype,e,tb = sys.exc_info()
|
etype,e,tb = sys.exc_info()
|
||||||
if isinstance(e, AudioConversionError):
|
if isinstance(e, AudioConversionError):
|
||||||
self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
|
msg = u'audio conversion failed: ' + e.message
|
||||||
else:
|
else:
|
||||||
self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg'))
|
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
|
||||||
return None
|
raise PostProcessingError(msg)
|
||||||
|
|
||||||
# Try to update the date time for extracted audio file.
|
# Try to update the date time for extracted audio file.
|
||||||
if information.get('filetime') is not None:
|
if information.get('filetime') is not None:
|
||||||
@@ -193,12 +209,24 @@ class FFmpegExtractAudioPP(PostProcessor):
|
|||||||
except:
|
except:
|
||||||
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
||||||
|
|
||||||
if not self._keepvideo:
|
|
||||||
try:
|
|
||||||
os.remove(encodeFilename(path))
|
|
||||||
except (IOError, OSError):
|
|
||||||
self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
|
|
||||||
return None
|
|
||||||
|
|
||||||
information['filepath'] = new_path
|
information['filepath'] = new_path
|
||||||
return information
|
return False,information
|
||||||
|
|
||||||
|
class FFmpegVideoConvertor(FFmpegPostProcessor):
|
||||||
|
def __init__(self, downloader=None,preferedformat=None):
|
||||||
|
super(FFmpegVideoConvertor, self).__init__(downloader)
|
||||||
|
self._preferedformat=preferedformat
|
||||||
|
|
||||||
|
def run(self, information):
|
||||||
|
path = information['filepath']
|
||||||
|
prefix, sep, ext = path.rpartition(u'.')
|
||||||
|
outpath = prefix + sep + self._preferedformat
|
||||||
|
if information['ext'] == self._preferedformat:
|
||||||
|
self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
|
||||||
|
return True,information
|
||||||
|
self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
|
||||||
|
self.run_ffmpeg(path, outpath, [])
|
||||||
|
information['filepath'] = outpath
|
||||||
|
information['format'] = self._preferedformat
|
||||||
|
information['ext'] = self._preferedformat
|
||||||
|
return False,information
|
||||||
|
@@ -22,6 +22,7 @@ __authors__ = (
|
|||||||
'Christian Albrecht',
|
'Christian Albrecht',
|
||||||
'Dave Vasilevsky',
|
'Dave Vasilevsky',
|
||||||
'Jaime Marquínez Ferrándiz',
|
'Jaime Marquínez Ferrándiz',
|
||||||
|
'Jeff Crouse',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -175,7 +176,6 @@ def parseOpts():
|
|||||||
action='store', dest='subtitleslang', metavar='LANG',
|
action='store', dest='subtitleslang', metavar='LANG',
|
||||||
help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
|
help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
|
||||||
|
|
||||||
|
|
||||||
verbosity.add_option('-q', '--quiet',
|
verbosity.add_option('-q', '--quiet',
|
||||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||||
verbosity.add_option('-s', '--simulate',
|
verbosity.add_option('-s', '--simulate',
|
||||||
@@ -248,9 +248,11 @@ def parseOpts():
|
|||||||
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
||||||
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
||||||
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||||
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
|
help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
|
||||||
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
|
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
|
||||||
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
|
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
|
||||||
|
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
|
||||||
|
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
|
||||||
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
|
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
|
||||||
help='keeps the video file on disk after the post-processing; the video is erased by default')
|
help='keeps the video file on disk after the post-processing; the video is erased by default')
|
||||||
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
|
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
|
||||||
@@ -370,12 +372,15 @@ def _real_main():
|
|||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError) as err:
|
||||||
parser.error(u'invalid playlist end number specified')
|
parser.error(u'invalid playlist end number specified')
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
parser.error(u'invalid audio format specified')
|
parser.error(u'invalid audio format specified')
|
||||||
if opts.audioquality:
|
if opts.audioquality:
|
||||||
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
opts.audioquality = opts.audioquality.strip('k').strip('K')
|
||||||
if not opts.audioquality.isdigit():
|
if not opts.audioquality.isdigit():
|
||||||
parser.error(u'invalid audio quality specified')
|
parser.error(u'invalid audio quality specified')
|
||||||
|
if opts.recodevideo is not None:
|
||||||
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
||||||
|
parser.error(u'invalid video recode format specified')
|
||||||
|
|
||||||
if sys.version_info < (3,):
|
if sys.version_info < (3,):
|
||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||||
@@ -432,6 +437,7 @@ def _real_main():
|
|||||||
'prefer_free_formats': opts.prefer_free_formats,
|
'prefer_free_formats': opts.prefer_free_formats,
|
||||||
'verbose': opts.verbose,
|
'verbose': opts.verbose,
|
||||||
'test': opts.test,
|
'test': opts.test,
|
||||||
|
'keepvideo': opts.keepvideo,
|
||||||
})
|
})
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
@@ -453,7 +459,9 @@ def _real_main():
|
|||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites))
|
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||||
|
if opts.recodevideo:
|
||||||
|
fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||||
|
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
|
@@ -8,6 +8,7 @@ import locale
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
import zlib
|
import zlib
|
||||||
import email.utils
|
import email.utils
|
||||||
import json
|
import json
|
||||||
@@ -279,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
|
|||||||
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
|
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
|
||||||
lines[-1] = lines[-1][:self.result[2][1]]
|
lines[-1] = lines[-1][:self.result[2][1]]
|
||||||
return '\n'.join(lines).strip()
|
return '\n'.join(lines).strip()
|
||||||
|
# Hack for https://github.com/rg3/youtube-dl/issues/662
|
||||||
|
if sys.version_info < (2, 7, 3):
|
||||||
|
AttrParser.parse_endtag = (lambda self, i:
|
||||||
|
i + len("</scr'+'ipt>")
|
||||||
|
if self.rawdata[i:].startswith("</scr'+'ipt>")
|
||||||
|
else compat_html_parser.HTMLParser.parse_endtag(self, i))
|
||||||
|
|
||||||
def get_element_by_id(id, html):
|
def get_element_by_id(id, html):
|
||||||
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
"""Return the content of the tag with the specified ID in the passed HTML document"""
|
||||||
@@ -408,18 +415,24 @@ def encodeFilename(s):
|
|||||||
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
|
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
|
||||||
return s
|
return s
|
||||||
else:
|
else:
|
||||||
return s.encode(sys.getfilesystemencoding(), 'ignore')
|
encoding = sys.getfilesystemencoding()
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
return s.encode(encoding, 'ignore')
|
||||||
|
|
||||||
|
|
||||||
class ExtractorError(Exception):
|
class ExtractorError(Exception):
|
||||||
"""Error during info extraction."""
|
"""Error during info extraction."""
|
||||||
def __init__(self, msg, tb=None):
|
def __init__(self, msg, tb=None):
|
||||||
""" tb is the original traceback (so that it can be printed out) """
|
""" tb, if given, is the original traceback (so that it can be printed out). """
|
||||||
super(ExtractorError, self).__init__(msg)
|
super(ExtractorError, self).__init__(msg)
|
||||||
if tb is None:
|
|
||||||
tb = sys.exc_info()[2]
|
|
||||||
self.traceback = tb
|
self.traceback = tb
|
||||||
|
|
||||||
|
def format_traceback(self):
|
||||||
|
if self.traceback is None:
|
||||||
|
return None
|
||||||
|
return u''.join(traceback.format_tb(self.traceback))
|
||||||
|
|
||||||
|
|
||||||
class DownloadError(Exception):
|
class DownloadError(Exception):
|
||||||
"""Download Error exception.
|
"""Download Error exception.
|
||||||
@@ -446,7 +459,8 @@ class PostProcessingError(Exception):
|
|||||||
This exception may be raised by PostProcessor's .run() method to
|
This exception may be raised by PostProcessor's .run() method to
|
||||||
indicate an error in the postprocessing task.
|
indicate an error in the postprocessing task.
|
||||||
"""
|
"""
|
||||||
pass
|
def __init__(self, msg):
|
||||||
|
self.msg = msg
|
||||||
|
|
||||||
class MaxDownloadsReached(Exception):
|
class MaxDownloadsReached(Exception):
|
||||||
""" --max-downloads limit has been reached. """
|
""" --max-downloads limit has been reached. """
|
||||||
@@ -511,14 +525,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
for h in std_headers:
|
for h,v in std_headers.items():
|
||||||
if h in req.headers:
|
if h in req.headers:
|
||||||
del req.headers[h]
|
del req.headers[h]
|
||||||
req.add_header(h, std_headers[h])
|
req.add_header(h, v)
|
||||||
if 'Youtubedl-no-compression' in req.headers:
|
if 'Youtubedl-no-compression' in req.headers:
|
||||||
if 'Accept-encoding' in req.headers:
|
if 'Accept-encoding' in req.headers:
|
||||||
del req.headers['Accept-encoding']
|
del req.headers['Accept-encoding']
|
||||||
del req.headers['Youtubedl-no-compression']
|
del req.headers['Youtubedl-no-compression']
|
||||||
|
if 'Youtubedl-user-agent' in req.headers:
|
||||||
|
if 'User-agent' in req.headers:
|
||||||
|
del req.headers['User-agent']
|
||||||
|
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
|
||||||
|
del req.headers['Youtubedl-user-agent']
|
||||||
return req
|
return req
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def http_response(self, req, resp):
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.01.02'
|
__version__ = '2013.02.01'
|
||||||
|
Reference in New Issue
Block a user