Compare commits

..

94 Commits

Author SHA1 Message Date
Philipp Hagemeister
9cd5e4fce8 release 2013.02.01 2013-02-01 17:57:32 +01:00
Philipp Hagemeister
edba5137b8 Fix Facebook IE 2013-02-01 17:56:22 +01:00
Philipp Hagemeister
233a22960a Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) 2013-02-01 17:46:03 +01:00
Philipp Hagemeister
3b024e17af Work around buggy HTML Parser in Python < 2.7.3 (Closes #662) 2013-02-01 17:29:50 +01:00
Philipp Hagemeister
ec71c13ab8 release 2013.01.28 2013-01-27 18:33:58 +01:00
Philipp Hagemeister
f0bad2b026 Fix Stanford (Closes #653) 2013-01-27 15:23:26 +01:00
Philipp Hagemeister
25580f3251 8tracks: Ignore hashes 2013-01-27 04:15:12 +01:00
Philipp Hagemeister
da4de959df 8tracks: Better default titles 2013-01-27 04:05:53 +01:00
Philipp Hagemeister
d0d51a8afa 8tracks: Include performer as uploader 2013-01-27 03:27:46 +01:00
Philipp Hagemeister
c67598c3e1 Remove space before shebang 2013-01-27 03:07:07 +01:00
Philipp Hagemeister
811d253bc2 Merge remote-tracking branch 'jaimeMF/makefilePythonversion' 2013-01-27 03:06:32 +01:00
Philipp Hagemeister
c3a1642ead release 2013.01.27 2013-01-27 03:03:02 +01:00
Philipp Hagemeister
ccf65f9dee 8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00
Philipp Hagemeister
b954070d70 Fix Facebook (Closes #375) 2013-01-25 16:54:48 +01:00
Philipp Hagemeister
30e9f4496b Drop md5: spec for now (unused and breaks int values) 2013-01-25 16:54:25 +01:00
Jaime Marquínez Ferrándiz
271d3fbdaa Option in makefile to select python interpreter 2013-01-25 15:11:03 +01:00
Philipp Hagemeister
6df40dcbe0 Guard against sys.getfilesystemencoding() == None (#503) 2013-01-20 01:48:05 +01:00
Philipp Hagemeister
97f194c1fb twitch.tv: Use id as title if no title is present (Closes #638) 2013-01-16 09:55:45 +01:00
Philipp Hagemeister
4da769ccca Do not backup version.py (under version control and frankly, not that complex) 2013-01-12 23:04:46 +01:00
Philipp Hagemeister
253d96f2e2 Force build removal 2013-01-12 22:25:54 +01:00
Philipp Hagemeister
bbc3e2753a release 2013.01.13 2013-01-12 22:18:13 +01:00
Philipp Hagemeister
67353612ba Revert "Move update to front"
This reverts commit db30f02b50.
2013-01-12 22:10:36 +01:00
Philipp Hagemeister
bffbd5f038 Download progress hooks 2013-01-12 20:34:50 +01:00
Philipp Hagemeister
d8bbf2018e Aggressive test timeout to catch hanging servers 2013-01-12 20:33:03 +01:00
Philipp Hagemeister
187f491ad2 [RBMA] Do not fail if thumbnail is empty 2013-01-12 18:45:50 +01:00
Philipp Hagemeister
335959e778 Correct Blip.tv on 2.6, where HTTP headers are case-sensitive (wtf?) 2013-01-12 18:38:23 +01:00
Philipp Hagemeister
3b83bf8f6a correct pushes in release script 2013-01-12 18:37:21 +01:00
Philipp Hagemeister
51719893bf Default to py3 in sign-versions 2013-01-12 18:14:07 +01:00
Philipp Hagemeister
1841f65e64 Python 2-proof versions.py 2013-01-12 18:12:24 +01:00
Philipp Hagemeister
bb28998920 fix location of updates_key in devscripts/release 2013-01-12 18:07:31 +01:00
Philipp Hagemeister
fbc5f99db9 release 2013.01.12 2013-01-12 17:59:58 +01:00
Philipp Hagemeister
ca0a0bbeec RBMA IE (Closes #630) 2013-01-12 17:58:39 +01:00
Philipp Hagemeister
6119f78cb9 Add location field 2013-01-12 17:34:31 +01:00
Philipp Hagemeister
539679c7f9 Make uploader and upload_date fields optional 2013-01-12 17:34:09 +01:00
Philipp Hagemeister
b642cd44c1 restore youtube-dl (update) binary 2013-01-12 17:07:12 +01:00
Philipp Hagemeister
fffec3b9d9 Credit jefftimesten for YouPornIE, PornoTubeIE, YouJizzIE 2013-01-12 16:51:20 +01:00
Philipp Hagemeister
3446dfb7cb Proper support for changing User-Agents from IEs 2013-01-12 16:49:13 +01:00
Philipp Hagemeister
db16276b7c Improve YouJizz 2013-01-12 16:41:04 +01:00
Philipp Hagemeister
629fcdd135 Add agecheck and various improvements to YouPorn IE 2013-01-12 16:10:35 +01:00
Philipp Hagemeister
64ce2aada8 _request_webpage helper methods for queries that need the final URL 2013-01-12 16:10:16 +01:00
Philipp Hagemeister
565f751967 Clean up porno IEs 2013-01-12 15:17:04 +01:00
Philipp Hagemeister
6017964580 Merge remote-tracking branch 'jefftimesten/master' 2013-01-12 15:12:50 +01:00
Philipp Hagemeister
1d16b0c3fe Keep file without any PPs (oops, missed the obvious case) 2013-01-12 15:12:28 +01:00
Philipp Hagemeister
7851b37993 --recode-video option (Closes #18) 2013-01-12 15:09:09 +01:00
Philipp Hagemeister
d81edc573e Merge 'jaimeMF/videoconversion' (sans actual option for now) 2013-01-12 14:04:30 +01:00
Philipp Hagemeister
ef0c8d5f9f Make ustream IE more robust 2013-01-12 13:49:14 +01:00
Philipp Hagemeister
db30f02b50 Move update to front 2013-01-12 13:45:39 +01:00
Philipp Hagemeister
4ba7262467 Less confusing player version 2013-01-12 13:35:16 +01:00
Jaime Marquínez Ferrándiz
67d0c25eab Add a PostProcessor for converting video format 2013-01-11 20:50:49 +01:00
Philipp Hagemeister
09f9552b40 Less git acrobatics in devscripts/release.sh 2013-01-11 08:28:37 +01:00
Philipp Hagemeister
142d38f776 release 2013.01.11 2013-01-11 08:05:30 +01:00
Philipp Hagemeister
6dd3471900 Add Makefile in tarball (Closes #626) 2013-01-11 08:00:27 +01:00
Philipp Hagemeister
280d67896a Correct documentation (Closes #625) 2013-01-10 23:20:26 +01:00
Philipp Hagemeister
510e6f6dc1 Support --audio-format=opus 2013-01-10 19:15:04 +01:00
Philipp Hagemeister
712e86b999 Fix broken ffmpeg (Closes #623) 2013-01-09 14:46:19 +01:00
Philipp Hagemeister
74fdba620d release 2013.01.08 2013-01-08 10:29:53 +01:00
Philipp Hagemeister
dc1c479a6f Merge pull request #621 from atomizer/master
justin.tv tweaks
2013-01-08 00:57:46 -08:00
atomizer
119d536e07 Merge branch 'my-origin/master' 2013-01-07 17:03:58 +04:00
atomizer
fa1bf9c653 justin.tv tweaks
- download all parts of a broadcast, fixes #614
- set "uploader" variable to channel_name if available
- catch api errors even if http status is 200
2013-01-07 16:59:39 +04:00
Philipp Hagemeister
814eed0ea1 Fix tar target (--exclude-vcs is not supported everywhere, and reading . while writing to it can fail randomly) 2013-01-07 12:48:07 +01:00
Philipp Hagemeister
0aa3068e9e Do not check in test_coverage 2013-01-06 23:38:36 +01:00
Philipp Hagemeister
db2d6124b1 correct quoting 2013-01-06 23:14:56 +01:00
Philipp Hagemeister
039dc61bd2 Simplify Makefile 2013-01-06 23:02:31 +01:00
Philipp Hagemeister
4b879984ea release 2013.01.06 2013-01-06 22:52:04 +01:00
Philipp Hagemeister
55e286ba55 read -n is bash-specific 2013-01-06 22:50:20 +01:00
Jeff Crouse
9450bfa26e fixed tests (used the --test option) so that they pass. go figure 2013-01-06 16:33:37 -05:00
Jeff Crouse
18be482a6f oops - didn't remove some reminders 2013-01-06 15:52:33 -05:00
Jeff Crouse
ca6710ee41 made changes recommended in pull request 2013-01-06 15:40:50 -05:00
Philipp Hagemeister
9314810243 fix ComedyCentral IE in Python3 2013-01-06 21:36:01 +01:00
Philipp Hagemeister
7717ae19fa Add tests for ComedyCentral IE 2013-01-06 21:35:20 +01:00
Philipp Hagemeister
32635ec685 Switch comedycentral IE to http downloads 2013-01-06 21:26:31 +01:00
Jeff Crouse
caec7618a1 re-fixed XNXX regex problem 2013-01-05 16:05:23 -05:00
Jeff Crouse
7e7ab2815c Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:01:03 -05:00
Jeff Crouse
d7744f2219 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:00:50 -05:00
Jeff Crouse
7161829de5 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 15:59:28 -05:00
Jeff Crouse
991ba7fae3 Added extractors for 3 porn sites 2013-01-05 15:59:01 -05:00
Jeff Crouse
a7539296ce Added extractors for 3 porn sites 2013-01-05 15:42:35 -05:00
Jeff Crouse
258d5850c9 Merge branch 'master' of https://github.com/rg3/youtube-dl
Conflicts:
	.gitignore
	LATEST_VERSION
	Makefile
	youtube-dl
	youtube-dl.exe
	youtube_dl/InfoExtractors.py
	youtube_dl/__init__.py
2013-01-05 15:03:54 -05:00
Philipp Hagemeister
20759b340a Disable travis irc notifications
travis is much to verbose for that, with random IEs constantly failing
2013-01-04 00:34:02 +01:00
Philipp Hagemeister
8e5f761870 Merge pull request #617 from jaimeMF/steamIE
[steamIE]Allow downloading videos with other characters in their titles
2013-01-03 15:16:27 -08:00
Jaime Marquínez Ferrándiz
26714799c9 steamIE remove the HTMLparser object 2013-01-03 23:56:02 +01:00
Jaime Marquínez Ferrándiz
5e9d042d8f steamIE follow @phihag suggestions 2013-01-03 23:51:48 +01:00
Jaime Marquínez Ferrándiz
9cf98a2bcc Allow downloading videos with other characters in their titles
Especially html entities
2013-01-03 21:17:35 +01:00
Philipp Hagemeister
f5ebb61495 Support page URL in RTMP downloads 2013-01-03 20:26:38 +01:00
Philipp Hagemeister
431d88dd31 Also generate SHA2-256 2013-01-03 19:49:06 +01:00
Philipp Hagemeister
876f1a86af Also publish hashsums 2013-01-03 19:18:55 +01:00
Philipp Hagemeister
01951dda7a Make ExtractorError usable for other causes 2013-01-03 15:39:55 +01:00
Filippo Valsorda
6e3dba168b release.sh edits based on 2013.01.02 experience 2013-01-02 23:40:24 +01:00
Jeff Crouse
187da2c093 added YouJizz extractor 2012-12-16 00:26:27 -05:00
Jeff Crouse
9a2cf56d51 Fixed a problem with the XNXXIE Regex 2012-12-15 23:22:07 -05:00
Jeff Crouse
5f7ad21633 Strip HTML out of uploader name 2012-11-13 17:48:30 -05:00
Jeff Crouse
089d47f8d5 Removed the README.md build target in the makefile. It is broken... 2012-11-13 17:48:10 -05:00
Jeff Crouse
fdef722fa1 Added YouPorn infoExtractor 2012-11-13 13:10:56 -05:00
Jeff Crouse
110d4f4c91 Added Pornotube support (for Laborers of Love) 2012-11-12 16:17:55 -05:00
15 changed files with 774 additions and 340 deletions

View File

@@ -1,17 +0,0 @@
updates_key.pem
*.pyc
*.pyo
youtube-dl.exe
wine-py2exe/
py2exe.log
*.kate-swp
build/
dist/
MANIFEST
*.DS_Store
youtube-dl.tar.gz
.coverage
cover/
__pycache__/
.git/
*~

View File

@@ -8,7 +8,7 @@ notifications:
email: email:
- filippo.valsorda@gmail.com - filippo.valsorda@gmail.com
- phihag@phihag.de - phihag@phihag.de
irc: # irc:
channels: # channels:
- "irc.freenode.org#youtube-dl" # - "irc.freenode.org#youtube-dl"
skip_join: true # skip_join: true

View File

@@ -1 +1 @@
9999.99.99 2012.12.99

View File

@@ -1,12 +1,13 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
clean: clean:
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
PREFIX=/usr/local PREFIX=/usr/local
BINDIR=$(PREFIX)/bin BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc SYSCONFDIR=/etc
PYTHON=/usr/bin/env python
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(BINDIR)
@@ -20,12 +21,14 @@ test:
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
nosetests --verbose test nosetests --verbose test
.PHONY: all clean install test tar: youtube-dl.tar.gz
.PHONY: all clean install test tar
youtube-dl: youtube_dl/*.py youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
echo '#!/usr/bin/env python' > youtube-dl echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip rm youtube-dl.zip
chmod a+x youtube-dl chmod a+x youtube-dl
@@ -42,6 +45,17 @@ youtube-dl.1: README.md
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py python devscripts/bash-completion.py
youtube-dl.tar.gz: all youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
tar -cvzf youtube-dl.tar.gz -s "|^./|./youtube-dl/|" \ @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude-from=".tarignore" -- . --exclude '*.DS_Store' \
--exclude '*.kate-swp' \
--exclude '*.pyc' \
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache' \
--exclude '.git' \
-- \
bin devscripts test youtube_dl \
CHANGELOG LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
youtube-dl

View File

@@ -9,8 +9,8 @@ youtube-dl
# DESCRIPTION # DESCRIPTION
**youtube-dl** is a small command-line program to download videos from **youtube-dl** is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter, version YouTube.com and a few more sites. It requires the Python interpreter, version
2.x (x being at least 6), and it is not platform specific. It should work in 2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
your Unix box, in Windows or in Mac OS X. It is released to the public domain, your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like. which means you can modify it, redistribute it or use it however you like.
# OPTIONS # OPTIONS
@@ -105,11 +105,13 @@ which means you can modify it, redistribute it or use it however you like.
## Post-processing Options: ## Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires -x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe) ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
best by default "wav"; best by default
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
value between 0 (better) and 9 (worse) for VBR or a value between 0 (better) and 9 (worse) for VBR or a
specific bitrate like 128K (default 5) specific bitrate like 128K (default 5)
--recode-video FORMAT Encode the video to another format if necessary
(currently supported: mp4|flv|ogg|webm)
-k, --keep-video keeps the video file on disk after the post- -k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post- --no-post-overwrites do not overwrite post-processed files; the post-
@@ -117,7 +119,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION # CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
# OUTPUT TEMPLATE # OUTPUT TEMPLATE

View File

@@ -4,12 +4,17 @@ import rsa
import json import json
from binascii import hexlify from binascii import hexlify
try:
input = raw_input
except NameError:
pass
versions_info = json.load(open('update/versions.json')) versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info: if 'signature' in versions_info:
del versions_info['signature'] del versions_info['signature']
print('Enter the PKCS1 private key, followed by a blank line:') print('Enter the PKCS1 private key, followed by a blank line:')
privkey = '' privkey = b''
while True: while True:
try: try:
line = input() line = input()
@@ -17,8 +22,7 @@ while True:
break break
if line == '': if line == '':
break break
privkey += line + '\n' privkey += line.encode('ascii') + b'\n'
privkey = bytes(privkey, 'ascii')
privkey = rsa.PrivateKey.load_pkcs1(privkey) privkey = rsa.PrivateKey.load_pkcs1(privkey)
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()

View File

@@ -1,13 +1,17 @@
#!/bin/sh #!/bin/bash
# IMPORTANT: the following assumptions are made # IMPORTANT: the following assumptions are made
# * you did --set-upstream # * the GH repo is on the origin remote
# * the gh-pages branch is named so locally # * the gh-pages branch is named so locally
# * the git config user.signingkey is properly set # * the git config user.signingkey is properly set
# You will need # You will need
# pip install coverage nose rsa # pip install coverage nose rsa
# TODO
# release notes
# make hash on local files
set -e set -e
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
@@ -21,7 +25,7 @@ make clean
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1 nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
echo "\n### Changing version in version.py..." echo "\n### Changing version in version.py..."
sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
make README.md make README.md
@@ -34,41 +38,49 @@ git show "$version"
read -p "Is it good, can I push? (y/n) " -n 1 read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo echo
git push MASTER=$(git rev-parse --abbrev-ref HEAD)
git push origin $MASTER:master
git push origin "$version"
echo "\n### OK, now it is time to build the binaries..." echo "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD) REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz make youtube-dl youtube-dl.tar.gz
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "update_staging/$version" mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "update_staging/$version" mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe git checkout HEAD -- youtube-dl youtube-dl.exe
echo "\n### Signing and uploading the new binaries to youtube-dl.org..." echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in update_staging/$version/*; do gpg --detach-sig "$f"; done for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/ scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
rm -r update_staging
echo "\n### Now switching to gh-pages..." echo "\n### Now switching to gh-pages..."
MASTER=$(git rev-parse --abbrev-ref HEAD) git clone --branch gh-pages --single-branch . build/gh-pages
git checkout gh-pages ROOT=$(pwd)
git checkout "$MASTER" -- devscripts/gh-pages/ (
git reset devscripts/gh-pages/ set -e
devscripts/gh-pages/add-version.py $version ORIGIN_URL=$(git config --get remote.origin.url)
devscripts/gh-pages/sign-versions.py < updates_key.pem cd build/gh-pages
devscripts/gh-pages/generate-download.py "$ROOT/devscripts/gh-pages/add-version.py" $version
devscripts/gh-pages/update-copyright.py "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
rm -r test_coverage "$ROOT/devscripts/gh-pages/generate-download.py"
mv cover test_coverage "$ROOT/devscripts/gh-pages/update-copyright.py"
git add *.html *.html.in update test_coverage git add *.html *.html.in update
git commit -m "release $version" git commit -m "release $version"
git show HEAD git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1 read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo echo
git push git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)
rm -rf build
echo "\n### DONE!" echo "\n### DONE!"
git checkout $MASTER

View File

@@ -26,6 +26,7 @@ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler() proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener) compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(10)
def _try_rm(filename): def _try_rm(filename):
""" Remove a file if it exists """ """ Remove a file if it exists """
@@ -81,6 +82,11 @@ def generator(test_case):
fd.add_info_extractor(ie()) fd.add_info_extractor(ie())
for ien in test_case.get('add_ie', []): for ien in test_case.get('add_ie', []):
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')()) fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
finished_hook_called = set()
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
fd.add_progress_hook(_hook)
test_cases = test_case.get('playlist', [test_case]) test_cases = test_case.get('playlist', [test_case])
for tc in test_cases: for tc in test_cases:
@@ -92,7 +98,8 @@ def generator(test_case):
for tc in test_cases: for tc in test_cases:
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file'])) self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json')) self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
if 'md5' in tc: if 'md5' in tc:
md5_for_file = _file_md5(tc['file']) md5_for_file = _file_md5(tc['file'])
@@ -100,10 +107,6 @@ def generator(test_case):
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, value) in tc.get('info_dict', {}).items(): for (info_field, value) in tc.get('info_dict', {}).items():
if value.startswith('md5:'):
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
self.assertEqual(value[3:], md5_info_value)
else:
self.assertEqual(value, info_dict.get(info_field)) self.assertEqual(value, info_dict.get(info_field))
finally: finally:
for tc in test_cases: for tc in test_cases:

View File

@@ -35,6 +35,24 @@
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1", "url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
"file": "939581.flv" "file": "939581.flv"
}, },
{
"name": "YouPorn",
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
"file": "505835.mp4"
},
{
"name": "Pornotube",
"md5": "374dd6dcedd24234453b295209aa69b6",
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
"file": "1689755.flv"
},
{
"name": "YouJizz",
"md5": "07e15fa469ba384c7693fd246905547c",
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
"file": "2189178.flv"
},
{ {
"name": "Vimeo", "name": "Vimeo",
"md5": "8879b6cc097e987f02484baf890129e5", "md5": "8879b6cc097e987f02484baf890129e5",
@@ -160,5 +178,102 @@
"params": { "params": {
"skip_download": true "skip_download": true
} }
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
"title": "thedailyshow-kristen-stewart part 1"
}
},
{
"name": "RBMARadio",
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
"info_dict": {
"title": "Live at Primavera Sound 2011",
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
"uploader": "Ford & Lopatin",
"uploader_id": "ford-lopatin",
"location": "Spain"
}
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
}
}
]
} }
] ]

View File

@@ -81,6 +81,7 @@ class FileDownloader(object):
writesubtitles: Write the video subtitles to a .srt file writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader. test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing
""" """
params = None params = None
@@ -94,6 +95,7 @@ class FileDownloader(object):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._pps = [] self._pps = []
self._progress_hooks = []
self._download_retcode = 0 self._download_retcode = 0
self._num_downloads = 0 self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -216,12 +218,15 @@ class FileDownloader(object):
Depending on if the downloader has been configured to ignore Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or download errors or not, this method may throw an exception or
not when errors are found, after printing the message. not when errors are found, after printing the message.
tb, if given, is additional traceback information.
""" """
if message is not None: if message is not None:
self.to_stderr(message) self.to_stderr(message)
if self.params.get('verbose'): if self.params.get('verbose'):
if tb is None: if tb is None:
tb = u''.join(traceback.format_list(traceback.extract_stack())) tb_data = traceback.format_list(traceback.extract_stack())
tb = u''.join(tb_data)
self.to_stderr(tb) self.to_stderr(tb)
if not self.params.get('ignoreerrors', False): if not self.params.get('ignoreerrors', False):
raise DownloadError(message) raise DownloadError(message)
@@ -497,7 +502,7 @@ class FileDownloader(object):
try: try:
videos = ie.extract(url) videos = ie.extract(url)
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback)))) self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
break break
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
@@ -526,15 +531,29 @@ class FileDownloader(object):
return self._download_retcode return self._download_retcode
def post_process(self, filename, ie_info): def post_process(self, filename, ie_info):
"""Run the postprocessing chain on the given file.""" """Run all the postprocessors on the given file."""
info = dict(ie_info) info = dict(ie_info)
info['filepath'] = filename info['filepath'] = filename
keep_video = None
for pp in self._pps: for pp in self._pps:
info = pp.run(info) try:
if info is None: keep_video_wish,new_info = pp.run(info)
break if keep_video_wish is not None:
if keep_video_wish:
keep_video = keep_video_wish
elif keep_video is None:
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.to_stderr(u'WARNING: Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url): def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@@ -548,7 +567,11 @@ class FileDownloader(object):
# Download using rtmpdump. rtmpdump returns exit code 2 when # Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be # the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['-W', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False): if self.params.get('verbose', False):
try: try:
@@ -572,8 +595,15 @@ class FileDownloader(object):
retval = 0 retval = 0
break break
if retval == 0: if retval == 0:
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename))) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True return True
else: else:
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@@ -581,22 +611,29 @@ class FileDownloader(object):
def _do_download(self, filename, info_dict): def _do_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
player_url = info_dict.get('player_url', None)
# Check file already present # Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename) self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True return True
# Attempt to download using rtmpdump # Attempt to download using rtmpdump
if url.startswith('rtmp'): if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url, player_url) return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None))
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None
# Do not include the Accept-Encoding header # Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'} headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers) basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers)
@@ -653,6 +690,10 @@ class FileDownloader(object):
# the one in the hard drive. # the one in the hard drive.
self.report_file_already_downloaded(filename) self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True return True
else: else:
# The length does not match, we start the download over # The length does not match, we start the download over
@@ -711,6 +752,14 @@ class FileDownloader(object):
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str) self.report_progress(percent_str, data_len_str, speed_str, eta_str)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
})
# Apply rate limit # Apply rate limit
self.slow_down(start, byte_counter - resume_len) self.slow_down(start, byte_counter - resume_len)
@@ -727,4 +776,31 @@ class FileDownloader(object):
if self.params.get('updatetime', True): if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True return True
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@@ -5,6 +5,7 @@ from __future__ import absolute_import
import base64 import base64
import datetime import datetime
import itertools
import netrc import netrc
import os import os
import re import re
@@ -35,15 +36,16 @@ class InfoExtractor(object):
url: Final video URL. url: Final video URL.
title: Video title, unescaped. title: Video title, unescaped.
ext: Video filename extension. ext: Video filename extension.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
The following fields are optional: The following fields are optional:
format: The video format, defaults to ext (used for --get-format) format: The video format, defaults to ext (used for --get-format)
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
description: One-line video description. description: One-line video description.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump). player_url: SWF Player URL (used for rtmpdump).
subtitles: The .srt file contents. subtitles: The .srt file contents.
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
@@ -106,18 +108,23 @@ class InfoExtractor(object):
def IE_NAME(self): def IE_NAME(self):
return type(self).__name__[:-2] return type(self).__name__[:-2]
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None: if note is None:
note = u'Downloading video webpage' note = u'Downloading video webpage'
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note)) self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
try: try:
urlh = compat_urllib_request.urlopen(url_or_request) return compat_urllib_request.urlopen(url_or_request)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None: if errnote is None:
errnote = u'Unable to download webpage' errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err))) raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the data of the page as a string """
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
class YoutubeIE(InfoExtractor): class YoutubeIE(InfoExtractor):
@@ -1974,62 +1981,14 @@ class DepositFilesIE(InfoExtractor):
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook""" """Information Extractor for Facebook"""
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook' _NETRC_MACHINE = 'facebook'
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
'highqual': 'mp4',
'lowqual': 'mp4',
}
IE_NAME = u'facebook' IE_NAME = u'facebook'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def _reporter(self, message):
"""Add header and report message."""
self._downloader.to_screen(u'[facebook] %s' % message)
def report_login(self): def report_login(self):
"""Report attempt to log in.""" """Report attempt to log in."""
self._reporter(u'Logging in') self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self._reporter(u'%s: Downloading video webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._reporter(u'%s: Extracting video information' % video_id)
def _parse_page(self, video_webpage):
"""Extract video information from page"""
# General data
data = {'title': r'\("video_title", "(.*?)"\)',
'description': r'<div class="datawrap">(.*?)</div>',
'owner': r'\("video_owner_name", "(.*?)"\)',
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
}
video_info = {}
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
for fmt in self._available_formats:
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
@@ -2082,100 +2041,35 @@ class FacebookIE(InfoExtractor):
return return
video_id = mobj.group('ID') video_id = mobj.group('ID')
# Get video webpage url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
self.report_video_webpage_download(video_id) webpage = self._download_webpage(url, video_id)
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Start extracting information BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
self.report_information_extraction(video_id) AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
video_duration = int(params['video_duration'])
# Extract information m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
video_info = self._parse_page(video_webpage) if not m:
raise ExtractorError(u'Cannot find title in webpage')
video_title = unescapeHTML(m.group(1))
# uploader info = {
if 'owner' not in video_info: 'id': video_id,
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = video_info['owner']
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = video_info['title']
video_title = video_title.decode('utf-8')
# thumbnail image
if 'thumbnail' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else:
video_thumbnail = video_info['thumbnail']
# upload date
upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
if timetuple is not None:
try:
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
except:
pass
# description
video_description = video_info.get('description', 'No description available.')
url_map = video_info['video_urls']
if url_map:
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
format_list = self._available_formats[self._available_formats.index(format_limit):]
else:
format_list = self._available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
self._downloader.trouble(u'ERROR: no known formats available for video')
return
if req_format is None:
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific format
if req_format not in url_map:
self._downloader.trouble(u'ERROR: requested format not available')
return
video_url_list = [(req_format, url_map[req_format])] # Specific format
results = []
for format_param, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'mp4')
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'upload_date': upload_date,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'url': video_url,
'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'ext': 'mp4',
'thumbnail': video_thumbnail.decode('utf-8'), 'duration': video_duration,
'description': video_description.decode('utf-8'), 'thumbnail': params['thumbnail_src'],
}) }
return results return [info]
class BlipTVIE(InfoExtractor): class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv""" """Information extractor for blip.tv"""
@@ -2204,6 +2098,7 @@ class BlipTVIE(InfoExtractor):
cchar = '?' cchar = '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1' json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
request = compat_urllib_request.Request(json_url) request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1)) self.report_extraction(mobj.group(1))
info = None info = None
try: try:
@@ -2224,8 +2119,7 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh 'urlhandle': urlh
} }
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if info is None: # Regular URL if info is None: # Regular URL
try: try:
json_code_bytes = urlh.read() json_code_bytes = urlh.read()
@@ -2258,13 +2152,13 @@ class BlipTVIE(InfoExtractor):
'format': data['media']['mimeType'], 'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'], 'thumbnail': data['thumbnailUrl'],
'description': data['description'], 'description': data['description'],
'player_url': data['embedUrl'] 'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
} }
except (ValueError,KeyError) as err: except (ValueError,KeyError) as err:
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
return return
std_headers['User-Agent'] = 'iTunes/10.6.1'
return [info] return [info]
@@ -2333,7 +2227,6 @@ class ComedyCentralIE(InfoExtractor):
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
$""" $"""
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400'] _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -2361,16 +2254,12 @@ class ComedyCentralIE(InfoExtractor):
def report_extraction(self, episode_id): def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
def report_config_download(self, episode_id): def report_config_download(self, episode_id, media_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))
def report_index_download(self, episode_id): def report_index_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats): def _print_formats(self, formats):
print('Available formats:') print('Available formats:')
for x in formats: for x in formats:
@@ -2409,6 +2298,7 @@ class ComedyCentralIE(InfoExtractor):
try: try:
htmlHandle = compat_urllib_request.urlopen(req) htmlHandle = compat_urllib_request.urlopen(req)
html = htmlHandle.read() html = htmlHandle.read()
webpage = html.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return return
@@ -2423,29 +2313,20 @@ class ComedyCentralIE(InfoExtractor):
return return
epTitle = mobj.group('episode') epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html) mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0: if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without # The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference # a URL prefix; so extract the alternate reference
# and then add the URL prefix manually. # and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html) altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
if len(altMovieParams) == 0: if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return return
else: else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
return
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
self.report_index_download(epTitle) self.report_index_download(epTitle)
@@ -2459,7 +2340,7 @@ class ComedyCentralIE(InfoExtractor):
idoc = xml.etree.ElementTree.fromstring(indexXml) idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item') itemEls = idoc.findall('.//item')
for itemEl in itemEls: for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text mediaId = itemEl.findall('./guid')[0].text
shortMediaId = mediaId.split(':')[-1] shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '') showId = mediaId.split(':')[-2].replace('.com', '')
@@ -2469,7 +2350,7 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId})) compat_urllib_parse.urlencode({'uri': mediaId}))
configReq = compat_urllib_request.Request(configUrl) configReq = compat_urllib_request.Request(configUrl)
self.report_config_download(epTitle) self.report_config_download(epTitle, shortMediaId)
try: try:
configXml = compat_urllib_request.urlopen(configReq).read() configXml = compat_urllib_request.urlopen(configReq).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -2491,7 +2372,7 @@ class ComedyCentralIE(InfoExtractor):
return return
# For now, just pick the highest bitrate # For now, just pick the highest bitrate
format,video_url = turls[-1] format,rtmp_video_url = turls[-1]
# Get the format arg from the arg stream # Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None) req_format = self._downloader.params.get('format', None)
@@ -2499,18 +2380,16 @@ class ComedyCentralIE(InfoExtractor):
# Select format if we can find one # Select format if we can find one
for f,v in turls: for f,v in turls:
if f == req_format: if f == req_format:
format, video_url = f, v format, rtmp_video_url = f, v
break break
# Patch to download from alternative CDN, which does not m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
# break on current RTMPDump builds if not m:
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/" raise ExtractorError(u'Cannot transform RTMP url')
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/" base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
video_url = base + m.group('finalid')
if video_url.startswith(broken_cdn): effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
info = { info = {
'id': shortMediaId, 'id': shortMediaId,
'url': video_url, 'url': video_url,
@@ -2521,9 +2400,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format, 'format': format,
'thumbnail': None, 'thumbnail': None,
'description': officialTitle, 'description': officialTitle,
'player_url': None #playerUrl
} }
results.append(info) results.append(info)
return results return results
@@ -2603,7 +2480,6 @@ class EscapistIE(InfoExtractor):
return [info] return [info]
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
"""Information extractor for collegehumor.com""" """Information extractor for collegehumor.com"""
@@ -2995,8 +2871,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
return
if mobj.group('course') and mobj.group('video'): # A specific video if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course') course = mobj.group('course')
@@ -3033,12 +2908,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None, 'upload_date': None,
} }
self.report_download_webpage(info['id']) coursepage = self._download_webpage(url, info['id'],
try: note='Downloading course info page',
coursepage = compat_urllib_request.urlopen(url).read() errnote='Unable to download course info page')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
m = re.search('<h1>([^<]+)</h1>', coursepage) m = re.search('<h1>([^<]+)</h1>', coursepage)
if m: if m:
@@ -3062,7 +2934,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
assert entry['type'] == 'reference' assert entry['type'] == 'reference'
results += self.extract(entry['url']) results += self.extract(entry['url'])
return results return results
else: # Root page else: # Root page
info = { info = {
'id': 'Stanford OpenClassroom', 'id': 'Stanford OpenClassroom',
@@ -3290,7 +3161,7 @@ class YoukuIE(InfoExtractor):
class XNXXIE(InfoExtractor): class XNXXIE(InfoExtractor):
"""Information extractor for xnxx.com""" """Information extractor for xnxx.com"""
_VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)' _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
IE_NAME = u'xnxx' IE_NAME = u'xnxx'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;' VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
@@ -3542,17 +3413,25 @@ class JustinTVIE(InfoExtractor):
return return
response = json.loads(webpage) response = json.loads(webpage)
if type(response) != list:
error_text = response.get('error', 'unknown error')
self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
return
info = [] info = []
for clip in response: for clip in response:
video_url = clip['video_file_url'] video_url = clip['video_file_url']
if video_url: if video_url:
video_extension = os.path.splitext(video_url)[1][1:] video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['created_on'][:10]) video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({ info.append({
'id': clip['id'], 'id': video_id,
'url': video_url, 'url': video_url,
'title': clip['title'], 'title': video_title,
'uploader': clip.get('user_id', clip.get('channel_id')), 'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date, 'upload_date': video_date,
'ext': video_extension, 'ext': video_extension,
}) })
@@ -3571,7 +3450,7 @@ class JustinTVIE(InfoExtractor):
paged = True paged = True
api += '/channel/archives/%s.json' api += '/channel/archives/%s.json'
else: else:
api += '/clip/show/%s.json' api += '/broadcast/by_archive/%s.json'
api = api % (video_id,) api = api % (video_id,)
self.report_extraction(video_id) self.report_extraction(video_id)
@@ -3694,8 +3573,8 @@ class SteamIE(InfoExtractor):
videourl = 'http://store.steampowered.com/video/%s/' % gameID videourl = 'http://store.steampowered.com/video/%s/' % gameID
webpage = self._download_webpage(videourl, gameID) webpage = self._download_webpage(videourl, gameID)
mweb = re.finditer(urlRE, webpage) mweb = re.finditer(urlRE, webpage)
namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>' namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
titles = list(re.finditer(namesRE, webpage)) titles = re.finditer(namesRE, webpage)
videos = [] videos = []
for vid,vtitle in zip(mweb,titles): for vid,vtitle in zip(mweb,titles):
video_id = vid.group('videoID') video_id = vid.group('videoID')
@@ -3707,13 +3586,13 @@ class SteamIE(InfoExtractor):
'id':video_id, 'id':video_id,
'url':video_url, 'url':video_url,
'ext': 'flv', 'ext': 'flv',
'title': title 'title': unescapeHTML(title)
} }
videos.append(info) videos.append(info)
return videos return videos
class UstreamIE(InfoExtractor): class UstreamIE(InfoExtractor):
_VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)' _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
IE_NAME = u'ustream' IE_NAME = u'ustream'
def _real_extract(self, url): def _real_extract(self, url):
@@ -3734,6 +3613,292 @@ class UstreamIE(InfoExtractor):
} }
return [info] return [info]
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
if not m:
raise ExtractorError(u'Cannot find metadata')
json_data = m.group(1)
try:
data = json.loads(json_data)
except ValueError as e:
raise ExtractorError(u'Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
video_ext = url_parts.path.rpartition('.')[2]
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': data['title'],
'description': data.get('teaser_text'),
'location': data.get('country_of_origin'),
'uploader': data.get('host', {}).get('name'),
'uploader_id': data.get('host', {}).get('slug'),
'thumbnail': data.get('image', {}).get('large_url_2x'),
'duration': data.get('duration'),
}
return [info]
class YouPornIE(InfoExtractor):
"""Information extractor for youporn.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
def _print_formats(self, formats):
"""Print all available formats"""
print(u'Available formats:')
print(u'ext\t\tformat')
print(u'---------------------------------')
for format in formats:
print(u'%s\t\t%s' % (format['ext'], format['format']))
def _specific(self, req_format, formats):
for x in formats:
if(x["format"]==req_format):
return x
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
# Get the video title
result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
video_title = result.group('title').strip()
# Get the video date
result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date')
upload_date = None
else:
upload_date = result.group('date').strip()
# Get the video uploader
result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'ERROR: unable to extract uploader')
video_uploader = None
else:
video_uploader = result.group('uploader').strip()
video_uploader = clean_html( video_uploader )
# Get all of the formats available
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
result = re.search(DOWNLOAD_LIST_RE, webpage)
if result is None:
raise ExtractorError(u'Unable to extract download list')
download_list_html = result.group('download_list').strip()
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
raise ExtractorError(u'ERROR: no known formats available for video')
self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
formats = []
for link in links:
# A link looks like this:
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML( link )
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
format = path.split('/')[4].split('_')[:2]
size = format[0]
bitrate = format[1]
format = "-".join( format )
title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
'title': title,
'ext': extension,
'format': format,
'thumbnail': None,
'description': None,
'player_url': None
})
if self._downloader.params.get('listformats', None):
self._print_formats(formats)
return
req_format = self._downloader.params.get('format', None)
self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
if req_format is None or req_format == 'best':
return [formats[0]]
elif req_format == 'worst':
return [formats[-1]]
elif req_format in ('-1', 'all'):
return formats
else:
format = self._specific( req_format, formats )
if result is None:
self._downloader.trouble(u'ERROR: requested format not available')
return
return [format]
class PornotubeIE(InfoExtractor):
"""Information extractor for pornotube.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
video_title = mobj.group('title')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video URL
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
result = re.search(VIDEO_URL_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
video_url = compat_urllib_parse.unquote(result.group('url'))
#Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
result = re.search(VIDEO_UPLOADED_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
upload_date = result.group('date')
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
'format': 'flv'}
return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video title
result = re.search(r'<title>(?P<title>.*)</title>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
video_title = result.group('title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
webpage = self._download_webpage(embed_page_url, video_id)
# Get the video URL
result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video url')
video_url = result.group('source')
info = {'id': video_id,
'url': video_url,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url}
return [info]
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
if not m:
raise ExtractorError(u'Cannot find trax information')
json_like = m.group(1)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
api_data = json.loads(api_json)
track_data = api_data[u'set']['track']
info = {
'id': track_data['id'],
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
@@ -3768,6 +3933,9 @@ def gen_extractors():
MTVIE(), MTVIE(),
YoukuIE(), YoukuIE(),
XNXXIE(), XNXXIE(),
YouJizzIE(),
PornotubeIE(),
YouPornIE(),
GooglePlusIE(), GooglePlusIE(),
ArteTvIE(), ArteTvIE(),
NBAIE(), NBAIE(),
@@ -3776,6 +3944,8 @@ def gen_extractors():
TweetReelIE(), TweetReelIE(),
SteamIE(), SteamIE(),
UstreamIE(), UstreamIE(),
RBMARadioIE(),
EightTracksIE(),
GenericIE() GenericIE()
] ]

View File

@@ -45,31 +45,24 @@ class PostProcessor(object):
one has an extra field called "filepath" that points to the one has an extra field called "filepath" that points to the
downloaded file. downloaded file.
When this method returns None, the postprocessing chain is This method returns a tuple, the first element of which describes
stopped. However, this method may return an information whether the original file should be kept (i.e. not deleted - None for
dictionary that will be passed to the next postprocessing no preference), and the second of which is the updated information.
object in the chain. It can be the one it received after
changing some fields.
In addition, this method may raise a PostProcessingError In addition, this method may raise a PostProcessingError
exception that will be taken into account by the downloader exception if post processing fails.
it was called from.
""" """
return information # by default, do nothing return None, information # by default, keep file and do nothing
class AudioConversionError(BaseException): class FFmpegPostProcessorError(PostProcessingError):
def __init__(self, message): pass
self.message = message
class FFmpegExtractAudioPP(PostProcessor): class AudioConversionError(PostProcessingError):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False, nopostoverwrites=False): pass
class FFmpegPostProcessor(PostProcessor):
def __init__(self,downloader=None):
PostProcessor.__init__(self, downloader) PostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._keepvideo = keepvideo
self._nopostoverwrites = nopostoverwrites
self._exes = self.detect_executables() self._exes = self.detect_executables()
@staticmethod @staticmethod
@@ -83,10 +76,37 @@ class FFmpegExtractAudioPP(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs) return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'):
return u'./' + fn
return fn
class FFmpegExtractAudioPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path): def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
try: try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', '--', encodeFilename(path)] cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
output = handle.communicate()[0] output = handle.communicate()[0]
if handle.wait() != 0: if handle.wait() != 0:
@@ -108,22 +128,18 @@ class FFmpegExtractAudioPP(PostProcessor):
acodec_opts = [] acodec_opts = []
else: else:
acodec_opts = ['-acodec', codec] acodec_opts = ['-acodec', codec]
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn'] opts = ['-vn'] + acodec_opts + more_opts
+ acodec_opts + more_opts + try:
['--', encodeFilename(out_path)]) FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except FFmpegPostProcessorError as err:
stdout,stderr = p.communicate() raise AudioConversionError(err.message)
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise AudioConversionError(msg)
def run(self, information): def run(self, information):
path = information['filepath'] path = information['filepath']
filecodec = self.get_audio_codec(path) filecodec = self.get_audio_codec(path)
if filecodec is None: if filecodec is None:
self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
return None
more_opts = [] more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
@@ -132,7 +148,7 @@ class FFmpegExtractAudioPP(PostProcessor):
acodec = 'copy' acodec = 'copy'
extension = self._preferredcodec extension = self._preferredcodec
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc'] more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis']: elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible # Lossless if possible
acodec = 'copy' acodec = 'copy'
extension = filecodec extension = filecodec
@@ -152,7 +168,7 @@ class FFmpegExtractAudioPP(PostProcessor):
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
else: else:
# We convert the audio (lossy) # We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
extension = self._preferredcodec extension = self._preferredcodec
more_opts = [] more_opts = []
if self._preferredquality is not None: if self._preferredquality is not None:
@@ -181,10 +197,10 @@ class FFmpegExtractAudioPP(PostProcessor):
except: except:
etype,e,tb = sys.exc_info() etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError): if isinstance(e, AudioConversionError):
self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message) msg = u'audio conversion failed: ' + e.message
else: else:
self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')) msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
return None raise PostProcessingError(msg)
# Try to update the date time for extracted audio file. # Try to update the date time for extracted audio file.
if information.get('filetime') is not None: if information.get('filetime') is not None:
@@ -193,12 +209,24 @@ class FFmpegExtractAudioPP(PostProcessor):
except: except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
if not self._keepvideo:
try:
os.remove(encodeFilename(path))
except (IOError, OSError):
self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
return None
information['filepath'] = new_path information['filepath'] = new_path
return information return False,information
class FFmpegVideoConvertor(FFmpegPostProcessor):
def __init__(self, downloader=None,preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader)
self._preferedformat=preferedformat
def run(self, information):
path = information['filepath']
prefix, sep, ext = path.rpartition(u'.')
outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
return True,information
self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
return False,information

View File

@@ -22,6 +22,7 @@ __authors__ = (
'Christian Albrecht', 'Christian Albrecht',
'Dave Vasilevsky', 'Dave Vasilevsky',
'Jaime Marquínez Ferrándiz', 'Jaime Marquínez Ferrándiz',
'Jeff Crouse',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@@ -175,7 +176,6 @@ def parseOpts():
action='store', dest='subtitleslang', metavar='LANG', action='store', dest='subtitleslang', metavar='LANG',
help='language of the closed captions to download (optional) use IETF language tags like \'en\'') help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
verbosity.add_option('-q', '--quiet', verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False) action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option('-s', '--simulate', verbosity.add_option('-s', '--simulate',
@@ -248,9 +248,11 @@ def parseOpts():
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default') help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
@@ -370,12 +372,15 @@ def _real_main():
except (TypeError, ValueError) as err: except (TypeError, ValueError) as err:
parser.error(u'invalid playlist end number specified') parser.error(u'invalid playlist end number specified')
if opts.extractaudio: if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']: if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error(u'invalid audio format specified') parser.error(u'invalid audio format specified')
if opts.audioquality: if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K') opts.audioquality = opts.audioquality.strip('k').strip('K')
if not opts.audioquality.isdigit(): if not opts.audioquality.isdigit():
parser.error(u'invalid audio quality specified') parser.error(u'invalid audio quality specified')
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
parser.error(u'invalid video recode format specified')
if sys.version_info < (3,): if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
@@ -432,6 +437,7 @@ def _real_main():
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose, 'verbose': opts.verbose,
'test': opts.test, 'test': opts.test,
'keepvideo': opts.keepvideo,
}) })
if opts.verbose: if opts.verbose:
@@ -453,7 +459,9 @@ def _real_main():
# PostProcessors # PostProcessors
if opts.extractaudio: if opts.extractaudio:
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites)) fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
# Update version # Update version
if opts.update_self: if opts.update_self:

View File

@@ -8,6 +8,7 @@ import locale
import os import os
import re import re
import sys import sys
import traceback
import zlib import zlib
import email.utils import email.utils
import json import json
@@ -279,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]] lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip() return '\n'.join(lines).strip()
# Hack for https://github.com/rg3/youtube-dl/issues/662
if sys.version_info < (2, 7, 3):
AttrParser.parse_endtag = (lambda self, i:
i + len("</scr'+'ipt>")
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html): def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document""" """Return the content of the tag with the specified ID in the passed HTML document"""
@@ -408,18 +415,24 @@ def encodeFilename(s):
# match Windows 9x series as well. Besides, NT 4 is obsolete.) # match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s return s
else: else:
return s.encode(sys.getfilesystemencoding(), 'ignore') encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
class ExtractorError(Exception): class ExtractorError(Exception):
"""Error during info extraction.""" """Error during info extraction."""
def __init__(self, msg, tb=None): def __init__(self, msg, tb=None):
""" tb is the original traceback (so that it can be printed out) """ """ tb, if given, is the original traceback (so that it can be printed out). """
super(ExtractorError, self).__init__(msg) super(ExtractorError, self).__init__(msg)
if tb is None:
tb = sys.exc_info()[2]
self.traceback = tb self.traceback = tb
def format_traceback(self):
if self.traceback is None:
return None
return u''.join(traceback.format_tb(self.traceback))
class DownloadError(Exception): class DownloadError(Exception):
"""Download Error exception. """Download Error exception.
@@ -446,7 +459,8 @@ class PostProcessingError(Exception):
This exception may be raised by PostProcessor's .run() method to This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task. indicate an error in the postprocessing task.
""" """
pass def __init__(self, msg):
self.msg = msg
class MaxDownloadsReached(Exception): class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """ """ --max-downloads limit has been reached. """
@@ -511,14 +525,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
return ret return ret
def http_request(self, req): def http_request(self, req):
for h in std_headers: for h,v in std_headers.items():
if h in req.headers: if h in req.headers:
del req.headers[h] del req.headers[h]
req.add_header(h, std_headers[h]) req.add_header(h, v)
if 'Youtubedl-no-compression' in req.headers: if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers: if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding'] del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression'] del req.headers['Youtubedl-no-compression']
if 'Youtubedl-user-agent' in req.headers:
if 'User-agent' in req.headers:
del req.headers['User-agent']
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
del req.headers['Youtubedl-user-agent']
return req return req
def http_response(self, req, resp): def http_response(self, req, resp):

View File

@@ -1,2 +1,2 @@
__version__ = '2013.01.02' __version__ = '2013.02.01'