Compare commits

...

107 Commits

Author SHA1 Message Date
Philipp Hagemeister
c8cd8e5f55 release 2013.02.19 2013-02-19 00:06:04 +01:00
Philipp Hagemeister
471cf47796 include bash completion and manpage in PyPi dist 2013-02-18 23:56:13 +01:00
Philipp Hagemeister
d8f64574a4 release 2013.02.18 2013-02-18 23:37:20 +01:00
Philipp Hagemeister
e711babbd1 Fix YP IE 2013-02-18 23:30:33 +01:00
Philipp Hagemeister
a72b0f2b6f Use proper echo commands 2013-02-18 23:22:01 +01:00
Philipp Hagemeister
434eb6f26b Include man and bash completion in PyPi release 2013-02-18 23:19:57 +01:00
Philipp Hagemeister
197080b10b Merge remote-tracking branch 'jaimeMF/TED' 2013-02-18 23:12:56 +01:00
Philipp Hagemeister
7796e8c2cb facebook: also download lq videos 2013-02-18 23:12:48 +01:00
Philipp Hagemeister
6d4363368a Fix MyVideo IE 2013-02-18 22:32:56 +01:00
Jaime Marquínez Ferrándiz
414638cd50 TED: Add support for playlists 2013-02-18 21:42:06 +01:00
Philipp Hagemeister
2a9983b78f Fix 8tracks 2013-02-18 19:11:32 +01:00
Philipp Hagemeister
b17c974a88 Mark DailyMotion as broken for now (#680) 2013-02-18 18:53:40 +01:00
Philipp Hagemeister
5717d91ab7 Correct --newline and give it a more meaningful title 2013-02-18 18:52:06 +01:00
Philipp Hagemeister
79eb0287ab Merge remote-tracking branch 'glisignoli/master' 2013-02-18 18:47:35 +01:00
Philipp Hagemeister
58994225bc Add tests to MySpass 2013-02-18 18:45:09 +01:00
Jaime Marquínez Ferrándiz
59d4c2fe1b fix some titles in TED 2013-02-17 17:25:02 +01:00
Jaime Marquínez Ferrándiz
3a468f2d8b Basic support for TED 2013-02-17 17:13:06 +01:00
bastik
1ad5d872b9 added new InfoExtractor for myspass.de 2013-02-16 13:46:13 +01:00
glisignoli
355fc8e944 Update README.md 2013-02-15 15:57:40 +13:00
glisignoli
380a29dbf7 Update youtube_dl/__init__.py 2013-02-15 15:55:11 +13:00
Gino Lisignoli
1528d6642d Forgot to remove \r 2013-02-13 16:43:08 +13:00
Gino Lisignoli
7311fef854 Modified youtube-dl to write new lines with the --newline switch. This
enables easier process monitoring when being called with external
scripts.
2013-02-13 14:02:31 +13:00
Mantas Mikulėnas
906417c7c5 Fix delayed title display in --console-title
With Python 3, the titlebar wouldn't get updated for a long time (due to
stderr buffering), and when it did, the title would be shown as b'...'
representation.
2013-02-09 22:58:12 +02:00
Philipp Hagemeister
6aabe82035 Credit Osama Khalid for Keek support 2013-02-08 11:01:09 +01:00
Philipp Hagemeister
f0877a445e Add tests for keek 2013-02-08 11:00:28 +01:00
Osama Khalid
da06e2daf8 Add KeekIE() 2013-02-08 10:25:55 +03:00
Philipp Hagemeister
d3f5f9f6b9 Fix login (Closes #658) 2013-02-06 21:22:53 +01:00
Philipp Hagemeister
bfc6ea7935 Ignore PyPi metadata 2013-02-05 13:42:52 +01:00
Philipp Hagemeister
8edc2cf8ca Support direct vimeo links (Closes #666) 2013-02-05 13:42:08 +01:00
Philipp Hagemeister
fb778e66df Fix encoding in youtube subtitle download (Closes #669) 2013-02-05 13:30:02 +01:00
Philipp Hagemeister
3a9918d37f Escapist continues to be flaky on travis 2013-02-02 14:53:34 +01:00
Philipp Hagemeister
ccb0cae134 Fix automatic release (oops) 2013-02-02 14:52:38 +01:00
Philipp Hagemeister
085c8b75a6 release 2013.02.02 2013-02-02 14:45:38 +01:00
Philipp Hagemeister
dbf2ba3d61 Better help for new options 2013-02-02 14:44:22 +01:00
Philipp Hagemeister
b47bbac393 Disable Stanford OC test for now, and enable escapist 2013-02-02 14:40:41 +01:00
Philipp Hagemeister
229cac754a Improve cookie error handling 2013-02-02 13:51:54 +01:00
Philipp Hagemeister
0e33684194 Switch to m4a by default (Closes #240) 2013-02-01 18:23:20 +01:00
Jeff Crouse
9e982f9e4e Added "min-filesize" and "max-filesize" options 2013-02-01 18:09:34 +01:00
Philipp Hagemeister
c7a725cfad Merge remote-tracking branch 'dcoppa/master' 2013-02-01 18:05:42 +01:00
Philipp Hagemeister
450a30cae8 Add PyPi upload to release script 2013-02-01 18:01:53 +01:00
Philipp Hagemeister
9cd5e4fce8 release 2013.02.01 2013-02-01 17:57:32 +01:00
Philipp Hagemeister
edba5137b8 Fix Facebook IE 2013-02-01 17:56:22 +01:00
Philipp Hagemeister
233a22960a Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month) 2013-02-01 17:46:03 +01:00
Philipp Hagemeister
3b024e17af Work around buggy HTML Parser in Python < 2.7.3 (Closes #662) 2013-02-01 17:29:50 +01:00
David Coppa
a32b573ccb Try setuptools first, then fallback to distutils.core 2013-01-30 15:31:38 +01:00
Philipp Hagemeister
ec71c13ab8 release 2013.01.28 2013-01-27 18:33:58 +01:00
Philipp Hagemeister
f0bad2b026 Fix Stanford (Closes #653) 2013-01-27 15:23:26 +01:00
Philipp Hagemeister
25580f3251 8tracks: Ignore hashes 2013-01-27 04:15:12 +01:00
Philipp Hagemeister
da4de959df 8tracks: Better default titles 2013-01-27 04:05:53 +01:00
Philipp Hagemeister
d0d51a8afa 8tracks: Include performer as uploader 2013-01-27 03:27:46 +01:00
Philipp Hagemeister
c67598c3e1 Remove space before shebang 2013-01-27 03:07:07 +01:00
Philipp Hagemeister
811d253bc2 Merge remote-tracking branch 'jaimeMF/makefilePythonversion' 2013-01-27 03:06:32 +01:00
Philipp Hagemeister
c3a1642ead release 2013.01.27 2013-01-27 03:03:02 +01:00
Philipp Hagemeister
ccf65f9dee 8tracks IE (Closes #652) 2013-01-27 03:01:23 +01:00
Philipp Hagemeister
b954070d70 Fix Facebook (Closes #375) 2013-01-25 16:54:48 +01:00
Philipp Hagemeister
30e9f4496b Drop md5: spec for now (unused and breaks int values) 2013-01-25 16:54:25 +01:00
Jaime Marquínez Ferrándiz
271d3fbdaa Option in makefile to select python interpreter 2013-01-25 15:11:03 +01:00
Philipp Hagemeister
6df40dcbe0 Guard against sys.getfilesystemencoding() == None (#503) 2013-01-20 01:48:05 +01:00
Philipp Hagemeister
97f194c1fb twitch.tv: Use id as title if no title is present (Closes #638) 2013-01-16 09:55:45 +01:00
Philipp Hagemeister
4da769ccca Do not backup version.py (under version control and frankly, not that complex) 2013-01-12 23:04:46 +01:00
Philipp Hagemeister
253d96f2e2 Force build removal 2013-01-12 22:25:54 +01:00
Philipp Hagemeister
bbc3e2753a release 2013.01.13 2013-01-12 22:18:13 +01:00
Philipp Hagemeister
67353612ba Revert "Move update to front"
This reverts commit db30f02b50.
2013-01-12 22:10:36 +01:00
Philipp Hagemeister
bffbd5f038 Download progress hooks 2013-01-12 20:34:50 +01:00
Philipp Hagemeister
d8bbf2018e Aggressive test timeout to catch hanging servers 2013-01-12 20:33:03 +01:00
Philipp Hagemeister
187f491ad2 [RBMA] Do not fail if thumbnail is empty 2013-01-12 18:45:50 +01:00
Philipp Hagemeister
335959e778 Correct Blip.tv on 2.6, where HTTP headers are case-sensitive (wtf?) 2013-01-12 18:38:23 +01:00
Philipp Hagemeister
3b83bf8f6a correct pushes in release script 2013-01-12 18:37:21 +01:00
Philipp Hagemeister
51719893bf Default to py3 in sign-versions 2013-01-12 18:14:07 +01:00
Philipp Hagemeister
1841f65e64 Python 2-proof versions.py 2013-01-12 18:12:24 +01:00
Philipp Hagemeister
bb28998920 fix location of updates_key in devscripts/release 2013-01-12 18:07:31 +01:00
Philipp Hagemeister
fbc5f99db9 release 2013.01.12 2013-01-12 17:59:58 +01:00
Philipp Hagemeister
ca0a0bbeec RBMA IE (Closes #630) 2013-01-12 17:58:39 +01:00
Philipp Hagemeister
6119f78cb9 Add location field 2013-01-12 17:34:31 +01:00
Philipp Hagemeister
539679c7f9 Make uploader and upload_date fields optional 2013-01-12 17:34:09 +01:00
Philipp Hagemeister
b642cd44c1 restore youtube-dl (update) binary 2013-01-12 17:07:12 +01:00
Philipp Hagemeister
fffec3b9d9 Credit jefftimesten for YouPornIE, PornoTubeIE, YouJizzIE 2013-01-12 16:51:20 +01:00
Philipp Hagemeister
3446dfb7cb Proper support for changing User-Agents from IEs 2013-01-12 16:49:13 +01:00
Philipp Hagemeister
db16276b7c Improve YouJizz 2013-01-12 16:41:04 +01:00
Philipp Hagemeister
629fcdd135 Add agecheck and various improvements to YouPorn IE 2013-01-12 16:10:35 +01:00
Philipp Hagemeister
64ce2aada8 _request_webpage helper methods for queries that need the final URL 2013-01-12 16:10:16 +01:00
Philipp Hagemeister
565f751967 Clean up porno IEs 2013-01-12 15:17:04 +01:00
Philipp Hagemeister
6017964580 Merge remote-tracking branch 'jefftimesten/master' 2013-01-12 15:12:50 +01:00
Philipp Hagemeister
1d16b0c3fe Keep file without any PPs (oops, missed the obvious case) 2013-01-12 15:12:28 +01:00
Philipp Hagemeister
7851b37993 --recode-video option (Closes #18) 2013-01-12 15:09:09 +01:00
Philipp Hagemeister
d81edc573e Merge 'jaimeMF/videoconversion' (sans actual option for now) 2013-01-12 14:04:30 +01:00
Philipp Hagemeister
ef0c8d5f9f Make ustream IE more robust 2013-01-12 13:49:14 +01:00
Philipp Hagemeister
db30f02b50 Move update to front 2013-01-12 13:45:39 +01:00
Philipp Hagemeister
4ba7262467 Less confusing player version 2013-01-12 13:35:16 +01:00
Jaime Marquínez Ferrándiz
67d0c25eab Add a PostProcessor for converting video format 2013-01-11 20:50:49 +01:00
Philipp Hagemeister
09f9552b40 Less git acrobatics in devscripts/release.sh 2013-01-11 08:28:37 +01:00
Jeff Crouse
9450bfa26e fixed tests (used the --test option) so that they pass. go figure 2013-01-06 16:33:37 -05:00
Jeff Crouse
18be482a6f oops - didn't remove some reminders 2013-01-06 15:52:33 -05:00
Jeff Crouse
ca6710ee41 made changes recommended in pull request 2013-01-06 15:40:50 -05:00
Jeff Crouse
caec7618a1 re-fixed XNXX regex problem 2013-01-05 16:05:23 -05:00
Jeff Crouse
7e7ab2815c Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:01:03 -05:00
Jeff Crouse
d7744f2219 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 16:00:50 -05:00
Jeff Crouse
7161829de5 Merge branch 'master' of https://github.com/jefftimesten/youtube-dl 2013-01-05 15:59:28 -05:00
Jeff Crouse
991ba7fae3 Added extractors for 3 porn sites 2013-01-05 15:59:01 -05:00
Jeff Crouse
a7539296ce Added extractors for 3 porn sites 2013-01-05 15:42:35 -05:00
Jeff Crouse
258d5850c9 Merge branch 'master' of https://github.com/rg3/youtube-dl
Conflicts:
	.gitignore
	LATEST_VERSION
	Makefile
	youtube-dl
	youtube-dl.exe
	youtube_dl/InfoExtractors.py
	youtube_dl/__init__.py
2013-01-05 15:03:54 -05:00
Jeff Crouse
187da2c093 added YouJizz extractor 2012-12-16 00:26:27 -05:00
Jeff Crouse
9a2cf56d51 Fixed a problem with the XNXXIE Regex 2012-12-15 23:22:07 -05:00
Jeff Crouse
5f7ad21633 Strip HTML out of uploader name 2012-11-13 17:48:30 -05:00
Jeff Crouse
089d47f8d5 Removed the README.md build target in the makefile. It is broken... 2012-11-13 17:48:10 -05:00
Jeff Crouse
fdef722fa1 Added YouPorn infoExtractor 2012-11-13 13:10:56 -05:00
Jeff Crouse
110d4f4c91 Added Pornotube support (for Laborers of Love) 2012-11-12 16:17:55 -05:00
17 changed files with 990 additions and 309 deletions

1
.gitignore vendored
View File

@@ -17,3 +17,4 @@ youtube-dl.tar.gz
.coverage
cover/
updates_key.pem
*.egg-info

View File

@@ -1 +1 @@
9999.99.99
2012.12.99

View File

@@ -1,3 +1,5 @@
include README.md
include test/*.py
include test/*.json
include test/*.json
include youtube-dl.bash-completion
include youtube-dl.1

View File

@@ -1,12 +1,16 @@
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
clean:
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
cleanall: clean
rm -f youtube-dl youtube-dl.exe
PREFIX=/usr/local
BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc
PYTHON=/usr/bin/env python
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR)
@@ -22,12 +26,14 @@ test:
tar: youtube-dl.tar.gz
.PHONY: all clean install test tar
.PHONY: all clean install test tar bash-completion pypi-files
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
echo '#!/usr/bin/env python' > youtube-dl
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip
chmod a+x youtube-dl
@@ -44,6 +50,8 @@ youtube-dl.1: README.md
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py
bash-completion: youtube-dl.bash-completion
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \

View File

@@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like.
--reject-title REGEX skip download for matching titles (regex or
caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g.
50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
50k or 44.6m)
## Filesystem Options:
-t, --title use title in file name
@@ -81,6 +85,7 @@ which means you can modify it, redistribute it or use it however you like.
--get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information
@@ -110,6 +115,8 @@ which means you can modify it, redistribute it or use it however you like.
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
value between 0 (better) and 9 (worse) for VBR or a
specific bitrate like 128K (default 5)
--recode-video FORMAT Encode the video to another format if necessary
(currently supported: mp4|flv|ogg|webm)
-k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post-

View File

@@ -4,12 +4,17 @@ import rsa
import json
from binascii import hexlify
try:
input = raw_input
except NameError:
pass
versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info:
del versions_info['signature']
print('Enter the PKCS1 private key, followed by a blank line:')
privkey = ''
privkey = b''
while True:
try:
line = input()
@@ -17,8 +22,7 @@ while True:
break
if line == '':
break
privkey += line + '\n'
privkey = bytes(privkey, 'ascii')
privkey += line.encode('ascii') + b'\n'
privkey = rsa.PrivateKey.load_pkcs1(privkey)
signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode()

View File

@@ -20,19 +20,19 @@ if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already pre
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
echo "\n### First of all, testing..."
make clean
/bin/echo -e "\n### First of all, testing..."
make cleanall
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
echo "\n### Changing version in version.py..."
sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
/bin/echo -e "\n### Changing version in version.py..."
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
make README.md
git add CHANGELOG README.md youtube_dl/version.py
git commit -m "release $version"
echo "\n### Now tagging, signing and pushing..."
/bin/echo -e "\n### Now tagging, signing and pushing..."
git tag -s -m "Release $version" "$version"
git show "$version"
read -p "Is it good, can I push? (y/n) " -n 1
@@ -42,42 +42,50 @@ MASTER=$(git rev-parse --abbrev-ref HEAD)
git push origin $MASTER:master
git push origin "$version"
echo "\n### OK, now it is time to build the binaries..."
/bin/echo -e "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "update_staging/$version"
mv youtube-dl youtube-dl.exe "update_staging/$version"
mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz"
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd update_staging/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd update_staging/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd update_staging/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
(cd update_staging/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in $RELEASE_FILES; do gpg --detach-sig "update_staging/$version/$f"; done
scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/
rm -r update_staging
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
echo "\n### Now switching to gh-pages..."
git checkout gh-pages
git checkout "$MASTER" -- devscripts/gh-pages/
git reset devscripts/gh-pages/
devscripts/gh-pages/add-version.py $version
devscripts/gh-pages/sign-versions.py < updates_key.pem
devscripts/gh-pages/generate-download.py
devscripts/gh-pages/update-copyright.py
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push origin gh-pages
/bin/echo -e "\n### Now switching to gh-pages..."
git clone --branch gh-pages --single-branch . build/gh-pages
ROOT=$(pwd)
(
set -e
ORIGIN_URL=$(git config --get remote.origin.url)
cd build/gh-pages
"$ROOT/devscripts/gh-pages/add-version.py" $version
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py"
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)
rm -rf build
echo "\n### DONE!"
rm -r devscripts
git checkout $MASTER
make pypi-files
echo "Uploading to PyPi ..."
python setup.py sdist upload
make clean
/bin/echo -e "\n### DONE!"

View File

@@ -2,10 +2,14 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
from distutils.core import setup
import pkg_resources
import sys
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
try:
import py2exe
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""

View File

@@ -26,6 +26,7 @@ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
proxy_handler = compat_urllib_request.ProxyHandler()
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(10)
def _try_rm(filename):
""" Remove a file if it exists """
@@ -81,6 +82,11 @@ def generator(test_case):
fd.add_info_extractor(ie())
for ien in test_case.get('add_ie', []):
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
finished_hook_called = set()
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
fd.add_progress_hook(_hook)
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
@@ -92,7 +98,8 @@ def generator(test_case):
for tc in test_cases:
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']))
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
if 'md5' in tc:
md5_for_file = _file_md5(tc['file'])
@@ -100,11 +107,7 @@ def generator(test_case):
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, value) in tc.get('info_dict', {}).items():
if value.startswith('md5:'):
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
self.assertEqual(value[3:], md5_info_value)
else:
self.assertEqual(value, info_dict.get(info_field))
self.assertEqual(value, info_dict.get(info_field))
finally:
for tc in test_cases:
_try_rm(tc['file'])

View File

@@ -35,6 +35,24 @@
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
"file": "939581.flv"
},
{
"name": "YouPorn",
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
"file": "505835.mp4"
},
{
"name": "Pornotube",
"md5": "374dd6dcedd24234453b295209aa69b6",
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
"file": "1689755.flv"
},
{
"name": "YouJizz",
"md5": "07e15fa469ba384c7693fd246905547c",
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
"file": "2189178.flv"
},
{
"name": "Vimeo",
"md5": "8879b6cc097e987f02484baf890129e5",
@@ -58,7 +76,8 @@
"name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
"file": "PracticalUnix_intro-environment.mp4"
"file": "PracticalUnix_intro-environment.mp4",
"skip": "Currently offline"
},
{
"name": "XNXX",
@@ -163,36 +182,127 @@
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart",
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"file": "422212.mp4",
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"info_dict": {
"title": "thedailyshow-kristen-stewart part 1"
}
},
{
"name": "RBMARadio",
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
"info_dict": {
"title": "Live at Primavera Sound 2011",
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
"uploader": "Ford & Lopatin",
"uploader_id": "ford-lopatin",
"location": "Spain"
}
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "422204.mp4",
"md5": "7a7abe068b31ff03e7b8a37596e72380",
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "422205.mp4",
"md5": "30552b7274c94dbb933f64600eadddd2",
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "422206.mp4",
"md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
}
},
{
"file": "422207.mp4",
"md5": "f61ee8a4e6bd1308438e03badad78554",
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
}
}
]
},
{
"name": "Keek",
"url": "http://www.keek.com/ytdl/keeks/NODfbab",
"file": "NODfbab.mp4",
"md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",
"info_dict": {
"title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
}
},
{
"name": "TED",
"url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
"file": "102.mp4",
"md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
"info_dict": {
"title": "Dan Dennett: The illusion of consciousness"
}
},
{
"name": "MySpass",
"url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/",
"file": "11741.mp4",
"md5": "0b49f4844a068f8b33f4b7c88405862b",
"info_dict": {
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
}
]

Binary file not shown.

View File

@@ -81,6 +81,9 @@ class FileDownloader(object):
writesubtitles: Write the video subtitles to a .srt file
subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
@@ -94,6 +97,7 @@ class FileDownloader(object):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._pps = []
self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -204,7 +208,7 @@ class FileDownloader(object):
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def fixed_template(self):
"""Checks if the output template is fixed."""
@@ -301,7 +305,11 @@ class FileDownloader(object):
"""Report download progress."""
if self.params.get('noprogress', False):
return
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
else:
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
@@ -529,13 +537,27 @@ class FileDownloader(object):
return self._download_retcode
def post_process(self, filename, ie_info):
"""Run the postprocessing chain on the given file."""
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
keep_video = None
for pp in self._pps:
info = pp.run(info)
if info is None:
break
try:
keep_video_wish,new_info = pp.run(info)
if keep_video_wish is not None:
if keep_video_wish:
keep_video = keep_video_wish
elif keep_video is None:
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.to_stderr(u'WARNING: Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename)
@@ -579,8 +601,15 @@ class FileDownloader(object):
retval = 0
break
if retval == 0:
self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
@@ -592,6 +621,10 @@ class FileDownloader(object):
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
# Attempt to download using rtmpdump
@@ -605,6 +638,8 @@ class FileDownloader(object):
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
@@ -661,6 +696,10 @@ class FileDownloader(object):
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
@@ -679,6 +718,15 @@ class FileDownloader(object):
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
@@ -719,6 +767,14 @@ class FileDownloader(object):
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
@@ -735,4 +791,31 @@ class FileDownloader(object):
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@@ -5,6 +5,7 @@ from __future__ import absolute_import
import base64
import datetime
import itertools
import netrc
import os
import re
@@ -35,15 +36,16 @@ class InfoExtractor(object):
url: Final video URL.
title: Video title, unescaped.
ext: Video filename extension.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
uploader: Full name of the video uploader.
upload_date: Video upload date (YYYYMMDD).
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The .srt file contents.
urlhandle: [internal] The urlHandle to be used to download the file,
@@ -106,19 +108,24 @@ class InfoExtractor(object):
def IE_NAME(self):
return type(self).__name__[:-2]
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None:
note = u'Downloading video webpage'
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
try:
urlh = compat_urllib_request.urlopen(url_or_request)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
return compat_urllib_request.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the data of the page as a string """
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
@@ -144,7 +151,7 @@ class YoutubeIE(InfoExtractor):
(?(1).+)? # if we found the ID, everything can follow
$"""
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
@@ -257,13 +264,18 @@ class YoutubeIE(InfoExtractor):
srt_lang = list(srt_lang_list.keys())[0]
if not srt_lang in srt_lang_list:
return (u'WARNING: no closed captions found in the specified language', None)
request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
params = compat_urllib_parse.urlencode({
'lang': srt_lang,
'name': srt_lang_list[srt_lang].encode('utf-8'),
'v': video_id,
})
url = 'http://www.youtube.com/api/timedtext?' + params
try:
srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
if not srt_xml:
return (u'WARNING: unable to download video subtitles', None)
return (u'WARNING: Did not fetch video subtitles', None)
return (None, self._closed_captions_xml_to_srt(srt_xml))
def _print_formats(self, formats):
@@ -308,19 +320,54 @@ class YoutubeIE(InfoExtractor):
if username is None:
return
request = compat_urllib_request.Request(self._LOGIN_URL)
try:
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err))
return
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
# Log in
login_form = {
'current_form': 'loginForm',
'next': '/',
'action_login': 'Log In',
'username': username,
'password': password,
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_form_strs = {
u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
u'Email': username,
u'GALX': galx,
u'Passwd': password,
u'PersistentCookie': u'yes',
u'_utf8': u'',
u'bgresponse': u'js_disabled',
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
u'signIn': u'Sign in',
u'timeStmp': u'',
u'service': u'youtube',
u'uilel': u'3',
u'hl': u'en_US',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
try:
self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -671,6 +718,7 @@ class DailymotionIE(InfoExtractor):
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
IE_NAME = u'dailymotion'
_WORKING = False
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
@@ -966,7 +1014,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -987,7 +1035,11 @@ class VimeoIE(InfoExtractor):
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
video_id = mobj.group(1)
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
if mobj.group('direct_link'):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, std_headers)
@@ -1974,62 +2026,14 @@ class DepositFilesIE(InfoExtractor):
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook'
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
'highqual': 'mp4',
'lowqual': 'mp4',
}
IE_NAME = u'facebook'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def _reporter(self, message):
"""Add header and report message."""
self._downloader.to_screen(u'[facebook] %s' % message)
def report_login(self):
"""Report attempt to log in."""
self._reporter(u'Logging in')
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self._reporter(u'%s: Downloading video webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._reporter(u'%s: Extracting video information' % video_id)
def _parse_page(self, video_webpage):
"""Extract video information from page"""
# General data
data = {'title': r'\("video_title", "(.*?)"\)',
'description': r'<div class="datawrap">(.*?)</div>',
'owner': r'\("video_owner_name", "(.*?)"\)',
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
}
video_info = {}
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
for fmt in self._available_formats:
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
def _real_initialize(self):
if self._downloader is None:
@@ -2082,100 +2086,39 @@ class FacebookIE(InfoExtractor):
return
video_id = mobj.group('ID')
# Get video webpage
self.report_video_webpage_download(video_id)
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
webpage = self._download_webpage(url, video_id)
# Start extracting information
self.report_information_extraction(video_id)
BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
if not video_url:
video_url = params['sd_src']
if not video_url:
raise ExtractorError(u'Cannot find video URL')
video_duration = int(params['video_duration'])
# Extract information
video_info = self._parse_page(video_webpage)
m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
if not m:
raise ExtractorError(u'Cannot find title in webpage')
video_title = unescapeHTML(m.group(1))
# uploader
if 'owner' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = video_info['owner']
info = {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
'thumbnail': params['thumbnail_src'],
}
return [info]
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = video_info['title']
video_title = video_title.decode('utf-8')
# thumbnail image
if 'thumbnail' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else:
video_thumbnail = video_info['thumbnail']
# upload date
upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
if timetuple is not None:
try:
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
except:
pass
# description
video_description = video_info.get('description', 'No description available.')
url_map = video_info['video_urls']
if url_map:
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
format_list = self._available_formats[self._available_formats.index(format_limit):]
else:
format_list = self._available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
self._downloader.trouble(u'ERROR: no known formats available for video')
return
if req_format is None:
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific format
if req_format not in url_map:
self._downloader.trouble(u'ERROR: requested format not available')
return
video_url_list = [(req_format, url_map[req_format])] # Specific format
results = []
for format_param, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'mp4')
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'upload_date': upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description.decode('utf-8'),
})
return results
class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv"""
@@ -2204,6 +2147,7 @@ class BlipTVIE(InfoExtractor):
cchar = '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
try:
@@ -2224,8 +2168,7 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh
}
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
@@ -2258,13 +2201,13 @@ class BlipTVIE(InfoExtractor):
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl']
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
except (ValueError,KeyError) as err:
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
return
std_headers['User-Agent'] = 'iTunes/10.6.1'
return [info]
@@ -2294,7 +2237,7 @@ class MyVideoIE(InfoExtractor):
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\' />',
webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
@@ -2977,8 +2920,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
raise ExtractorError(u'Invalid URL: %s' % url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
@@ -3015,12 +2957,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None,
}
self.report_download_webpage(info['id'])
try:
coursepage = compat_urllib_request.urlopen(url).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
coursepage = self._download_webpage(url, info['id'],
note='Downloading course info page',
errnote='Unable to download course info page')
m = re.search('<h1>([^<]+)</h1>', coursepage)
if m:
@@ -3044,7 +2983,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
else: # Root page
info = {
'id': 'Stanford OpenClassroom',
@@ -3272,7 +3210,7 @@ class YoukuIE(InfoExtractor):
class XNXXIE(InfoExtractor):
"""Information extractor for xnxx.com"""
_VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
IE_NAME = u'xnxx'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
@@ -3535,10 +3473,12 @@ class JustinTVIE(InfoExtractor):
video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': clip['id'],
'id': video_id,
'url': video_url,
'title': clip['title'],
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date,
@@ -3699,11 +3639,11 @@ class SteamIE(InfoExtractor):
}
videos.append(info)
return videos
class UstreamIE(InfoExtractor):
_VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)'
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
IE_NAME = u'ustream'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
@@ -3722,6 +3662,444 @@ class UstreamIE(InfoExtractor):
}
return [info]
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
if not m:
raise ExtractorError(u'Cannot find metadata')
json_data = m.group(1)
try:
data = json.loads(json_data)
except ValueError as e:
raise ExtractorError(u'Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
video_ext = url_parts.path.rpartition('.')[2]
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': data['title'],
'description': data.get('teaser_text'),
'location': data.get('country_of_origin'),
'uploader': data.get('host', {}).get('name'),
'uploader_id': data.get('host', {}).get('slug'),
'thumbnail': data.get('image', {}).get('large_url_2x'),
'duration': data.get('duration'),
}
return [info]
class YouPornIE(InfoExtractor):
"""Information extractor for youporn.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
def _print_formats(self, formats):
"""Print all available formats"""
print(u'Available formats:')
print(u'ext\t\tformat')
print(u'---------------------------------')
for format in formats:
print(u'%s\t\t%s' % (format['ext'], format['format']))
def _specific(self, req_format, formats):
for x in formats:
if(x["format"]==req_format):
return x
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
# Get the video title
result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage)
if result is None:
raise ExtractorError(u'Unable to extract video title')
video_title = result.group('title').strip()
# Get the video date
result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date')
upload_date = None
else:
upload_date = result.group('date').strip()
# Get the video uploader
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract uploader')
video_uploader = None
else:
video_uploader = result.group('uploader').strip()
video_uploader = clean_html( video_uploader )
# Get all of the formats available
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
result = re.search(DOWNLOAD_LIST_RE, webpage)
if result is None:
raise ExtractorError(u'Unable to extract download list')
download_list_html = result.group('download_list').strip()
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
raise ExtractorError(u'ERROR: no known formats available for video')
self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
formats = []
for link in links:
# A link looks like this:
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
# A path looks like this:
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
video_url = unescapeHTML( link )
path = compat_urllib_parse_urlparse( video_url ).path
extension = os.path.splitext( path )[1][1:]
format = path.split('/')[4].split('_')[:2]
size = format[0]
bitrate = format[1]
format = "-".join( format )
title = u'%s-%s-%s' % (video_title, size, bitrate)
formats.append({
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': upload_date,
'title': title,
'ext': extension,
'format': format,
'thumbnail': None,
'description': None,
'player_url': None
})
if self._downloader.params.get('listformats', None):
self._print_formats(formats)
return
req_format = self._downloader.params.get('format', None)
self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
if req_format is None or req_format == 'best':
return [formats[0]]
elif req_format == 'worst':
return [formats[-1]]
elif req_format in ('-1', 'all'):
return formats
else:
format = self._specific( req_format, formats )
if result is None:
self._downloader.trouble(u'ERROR: requested format not available')
return
return [format]
class PornotubeIE(InfoExtractor):
"""Information extractor for pornotube.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
video_title = mobj.group('title')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video URL
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
result = re.search(VIDEO_URL_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
video_url = compat_urllib_parse.unquote(result.group('url'))
#Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
result = re.search(VIDEO_UPLOADED_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
upload_date = result.group('date')
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
'format': 'flv'}
return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('videoid')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video title
result = re.search(r'<title>(?P<title>.*)</title>', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video title')
video_title = result.group('title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
webpage = self._download_webpage(embed_page_url, video_id)
# Get the video URL
result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract video url')
video_url = result.group('source')
info = {'id': video_id,
'url': video_url,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url}
return [info]
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL)
if not m:
raise ExtractorError(u'Cannot find trax information')
json_like = m.group(1)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
api_data = json.loads(api_json)
track_data = api_data[u'set']['track']
info = {
'id': track_data['id'],
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res
class KeekIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
IE_NAME = u'keek'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
webpage = self._download_webpage(url, video_id)
m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
title = unescapeHTML(m.group('title'))
m = re.search(r'<div class="bio-names-and-report">[\s\n]+<h4>(?P<uploader>\w+)</h4>', webpage)
uploader = unescapeHTML(m.group('uploader'))
info = {
'id':video_id,
'url':video_url,
'ext': 'mp4',
'title': title,
'thumbnail': thumbnail,
'uploader': uploader
}
return [info]
class TEDIE(InfoExtractor):
_VALID_URL=r'''http://www.ted.com/
(
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
((?P<type_talk>talks)) # We have a simple talk
)
/(?P<name>\w+) # Here goes the name and then ".html"
'''
def suitable(self, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(self._VALID_URL, url, re.VERBOSE) is not None
def _real_extract(self, url):
m=re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type_talk'):
return [self._talk_info(url)]
else :
playlist_id=m.group('playlist_id')
name=m.group('name')
self._downloader.to_screen(u'[%s] Getting info of playlist %s: "%s"' % (self.IE_NAME,playlist_id,name))
return self._playlist_videos_info(url,name,playlist_id)
def _talk_video_link(self,mediaSlug):
'''Returns the video link for that mediaSlug'''
return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
def _playlist_videos_info(self,url,name,playlist_id=0):
'''Returns the videos of the playlist'''
video_RE=r'''
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
([.\s]*?)data-playlist_item_id="(\d+)"
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
'''
video_name_RE=r'<p\ class="talk-title"><a href="/talks/(.+).html">(?P<fullname>.+?)</a></p>'
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
info=[]
for m_video, m_name in zip(m_videos,m_names):
video_dic={
'id': m_video.group('video_id'),
'url': self._talk_video_link(m_video.group('mediaSlug')),
'ext': 'mp4',
'title': m_name.group('fullname')
}
info.append(video_dic)
return info
def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url"""
m=re.match(self._VALID_URL, url,re.VERBOSE)
videoName=m.group('name')
webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
# If the url includes the language we get the title translated
title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-\\\']*)</span></h1>'
title=re.search(title_RE, webpage).group('title')
info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
"id":(?P<videoID>[\d]+).*?
"mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
info_match=re.search(info_RE,webpage,re.VERBOSE)
video_id=info_match.group('videoID')
mediaSlug=info_match.group('mediaSlug')
video_url=self._talk_video_link(mediaSlug)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title
}
return info
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www.myspass.de/.*'
def _real_extract(self, url):
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
# video id is the last path element of the URL
# usually there is a trailing slash, so also try the second but last
url_path = compat_urllib_parse_urlparse(url).path
url_parent_path, video_id = os.path.split(url_path)
if not video_id:
_, video_id = os.path.split(url_parent_path)
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
# extract values from metadata
url_flv_el = metadata.find('url_flv')
if url_flv_el is None:
self._downloader.trouble(u'ERROR: unable to extract download url')
return
video_url = url_flv_el.text
extension = os.path.splitext(video_url)[1][1:]
title_el = metadata.find('title')
if title_el is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
format = ext
else:
format = format_id_el.text
description_el = metadata.find('description')
if description_el is not None:
description = description_el.text
else:
description = None
imagePreview_el = metadata.find('imagePreview')
if imagePreview_el is not None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
info = {
'id': video_id,
'url': video_url,
'title': title,
'ext': extension,
'format': format,
'thumbnail': thumbnail,
'description': description
}
return [info]
def gen_extractors():
""" Return a list of an instance of every supported extractor.
@@ -3756,6 +4134,9 @@ def gen_extractors():
MTVIE(),
YoukuIE(),
XNXXIE(),
YouJizzIE(),
PornotubeIE(),
YouPornIE(),
GooglePlusIE(),
ArteTvIE(),
NBAIE(),
@@ -3764,6 +4145,11 @@ def gen_extractors():
TweetReelIE(),
SteamIE(),
UstreamIE(),
RBMARadioIE(),
EightTracksIE(),
KeekIE(),
TEDIE(),
MySpassIE(),
GenericIE()
]

View File

@@ -45,31 +45,24 @@ class PostProcessor(object):
one has an extra field called "filepath" that points to the
downloaded file.
When this method returns None, the postprocessing chain is
stopped. However, this method may return an information
dictionary that will be passed to the next postprocessing
object in the chain. It can be the one it received after
changing some fields.
This method returns a tuple, the first element of which describes
whether the original file should be kept (i.e. not deleted - None for
no preference), and the second of which is the updated information.
In addition, this method may raise a PostProcessingError
exception that will be taken into account by the downloader
it was called from.
exception if post processing fails.
"""
return information # by default, do nothing
return None, information # by default, keep file and do nothing
class AudioConversionError(BaseException):
def __init__(self, message):
self.message = message
class FFmpegPostProcessorError(PostProcessingError):
pass
class FFmpegExtractAudioPP(PostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False, nopostoverwrites=False):
class AudioConversionError(PostProcessingError):
pass
class FFmpegPostProcessor(PostProcessor):
def __init__(self,downloader=None):
PostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._keepvideo = keepvideo
self._nopostoverwrites = nopostoverwrites
self._exes = self.detect_executables()
@staticmethod
@@ -83,6 +76,33 @@ class FFmpegExtractAudioPP(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'):
return u'./' + fn
return fn
class FFmpegExtractAudioPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
if preferredcodec is None:
preferredcodec = 'best'
self._preferredcodec = preferredcodec
self._preferredquality = preferredquality
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
try:
@@ -108,29 +128,25 @@ class FFmpegExtractAudioPP(PostProcessor):
acodec_opts = []
else:
acodec_opts = ['-acodec', codec]
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn']
+ acodec_opts + more_opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
msg = stderr.strip().split('\n')[-1]
raise AudioConversionError(msg)
opts = ['-vn'] + acodec_opts + more_opts
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
raise AudioConversionError(err.message)
def run(self, information):
path = information['filepath']
filecodec = self.get_audio_codec(path)
if filecodec is None:
self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
return None
raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if self._preferredcodec == 'm4a' and filecodec == 'aac':
if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container
acodec = 'copy'
extension = self._preferredcodec
extension = 'm4a'
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible
@@ -181,10 +197,10 @@ class FFmpegExtractAudioPP(PostProcessor):
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
msg = u'audio conversion failed: ' + e.message
else:
self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg'))
return None
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
@@ -193,19 +209,24 @@ class FFmpegExtractAudioPP(PostProcessor):
except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
if not self._keepvideo:
try:
os.remove(encodeFilename(path))
except (IOError, OSError):
self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
return None
information['filepath'] = new_path
return information
return False,information
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'):
return u'./' + fn
return fn
class FFmpegVideoConvertor(FFmpegPostProcessor):
def __init__(self, downloader=None,preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader)
self._preferedformat=preferedformat
def run(self, information):
path = information['filepath']
prefix, sep, ext = path.rpartition(u'.')
outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat:
self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
return True,information
self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
return False,information

View File

@@ -22,6 +22,8 @@ __authors__ = (
'Christian Albrecht',
'Dave Vasilevsky',
'Jaime Marquínez Ferrándiz',
'Jeff Crouse',
'Osama Khalid',
)
__license__ = 'Public Domain'
@@ -149,6 +151,9 @@ def parseOpts():
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username')
@@ -175,7 +180,6 @@ def parseOpts():
action='store', dest='subtitleslang', metavar='LANG',
help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option('-s', '--simulate',
@@ -198,6 +202,8 @@ def parseOpts():
verbosity.add_option('--get-format',
action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False)
verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress',
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
verbosity.add_option('--console-title',
@@ -206,7 +212,6 @@ def parseOpts():
verbosity.add_option('-v', '--verbose',
action='store_true', dest='verbose', help='print various debugging information', default=False)
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
filesystem.add_option('--id',
@@ -251,6 +256,8 @@ def parseOpts():
help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
@@ -284,10 +291,13 @@ def _real_main():
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
sys.exit(u'ERROR: unable to open cookie file')
if opts.verbose:
traceback.print_exc()
sys.stderr.write(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@@ -347,6 +357,16 @@ def _real_main():
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
parser.error(u'invalid min_filesize specified')
opts.min_filesize = numeric_limit
if opts.max_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
if numeric_limit is None:
parser.error(u'invalid max_filesize specified')
opts.max_filesize = numeric_limit
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
@@ -376,6 +396,9 @@ def _real_main():
opts.audioquality = opts.audioquality.strip('k').strip('K')
if not opts.audioquality.isdigit():
parser.error(u'invalid audio quality specified')
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
parser.error(u'invalid video recode format specified')
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
@@ -416,6 +439,7 @@ def _real_main():
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'logtostderr': opts.outtmpl == '-',
@@ -432,6 +456,9 @@ def _real_main():
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize
})
if opts.verbose:
@@ -453,7 +480,9 @@ def _real_main():
# PostProcessors
if opts.extractaudio:
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites))
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
# Update version
if opts.update_self:

View File

@@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip()
# Hack for https://github.com/rg3/youtube-dl/issues/662
if sys.version_info < (2, 7, 3):
AttrParser.parse_endtag = (lambda self, i:
i + len("</scr'+'ipt>")
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
@@ -409,7 +415,10 @@ def encodeFilename(s):
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
return s
else:
return s.encode(sys.getfilesystemencoding(), 'ignore')
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
class ExtractorError(Exception):
@@ -450,7 +459,8 @@ class PostProcessingError(Exception):
This exception may be raised by PostProcessor's .run() method to
indicate an error in the postprocessing task.
"""
pass
def __init__(self, msg):
self.msg = msg
class MaxDownloadsReached(Exception):
""" --max-downloads limit has been reached. """
@@ -515,14 +525,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
return ret
def http_request(self, req):
for h in std_headers:
for h,v in std_headers.items():
if h in req.headers:
del req.headers[h]
req.add_header(h, std_headers[h])
req.add_header(h, v)
if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
if 'Youtubedl-user-agent' in req.headers:
if 'User-agent' in req.headers:
del req.headers['User-agent']
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
del req.headers['Youtubedl-user-agent']
return req
def http_response(self, req, resp):

View File

@@ -1,2 +1,2 @@
__version__ = '2013.01.11'
__version__ = '2013.02.19'