Compare commits

..

23 Commits

Author SHA1 Message Date
Philipp Hagemeister
74fdba620d release 2013.01.08 2013-01-08 10:29:53 +01:00
Philipp Hagemeister
dc1c479a6f Merge pull request #621 from atomizer/master
justin.tv tweaks
2013-01-08 00:57:46 -08:00
atomizer
119d536e07 Merge branch 'my-origin/master' 2013-01-07 17:03:58 +04:00
atomizer
fa1bf9c653 justin.tv tweaks
- download all parts of a broadcast, fixes #614
- set "uploader" variable to channel_name if available
- catch api errors even if http status is 200
2013-01-07 16:59:39 +04:00
Philipp Hagemeister
814eed0ea1 Fix tar target (--exclude-vcs is not supported everywhere, and reading . while writing to it can fail randomly) 2013-01-07 12:48:07 +01:00
Philipp Hagemeister
0aa3068e9e Do not check in test_coverage 2013-01-06 23:38:36 +01:00
Philipp Hagemeister
db2d6124b1 correct quoting 2013-01-06 23:14:56 +01:00
Philipp Hagemeister
039dc61bd2 Simplify Makefile 2013-01-06 23:02:31 +01:00
Philipp Hagemeister
4b879984ea release 2013.01.06 2013-01-06 22:52:04 +01:00
Philipp Hagemeister
55e286ba55 read -n is bash-specific 2013-01-06 22:50:20 +01:00
Philipp Hagemeister
9314810243 fix ComedyCentral IE in Python3 2013-01-06 21:36:01 +01:00
Philipp Hagemeister
7717ae19fa Add tests for ComedyCentral IE 2013-01-06 21:35:20 +01:00
Philipp Hagemeister
32635ec685 Switch comedycentral IE to http downloads 2013-01-06 21:26:31 +01:00
Philipp Hagemeister
20759b340a Disable travis irc notifications
travis is much to verbose for that, with random IEs constantly failing
2013-01-04 00:34:02 +01:00
Philipp Hagemeister
8e5f761870 Merge pull request #617 from jaimeMF/steamIE
[steamIE]Allow downloading videos with other characters in their titles
2013-01-03 15:16:27 -08:00
Jaime Marquínez Ferrándiz
26714799c9 steamIE remove the HTMLparser object 2013-01-03 23:56:02 +01:00
Jaime Marquínez Ferrándiz
5e9d042d8f steamIE follow @phihag suggestions 2013-01-03 23:51:48 +01:00
Jaime Marquínez Ferrándiz
9cf98a2bcc Allow downloading videos with other characters in their titles
Especially html entities
2013-01-03 21:17:35 +01:00
Philipp Hagemeister
f5ebb61495 Support page URL in RTMP downloads 2013-01-03 20:26:38 +01:00
Philipp Hagemeister
431d88dd31 Also generate SHA2-256 2013-01-03 19:49:06 +01:00
Philipp Hagemeister
876f1a86af Also publish hashsums 2013-01-03 19:18:55 +01:00
Philipp Hagemeister
01951dda7a Make ExtractorError usable for other causes 2013-01-03 15:39:55 +01:00
Filippo Valsorda
6e3dba168b release.sh edits based on 2013.01.02 experience 2013-01-02 23:40:24 +01:00
9 changed files with 122 additions and 83 deletions

View File

@@ -1,17 +0,0 @@
updates_key.pem
*.pyc
*.pyo
youtube-dl.exe
wine-py2exe/
py2exe.log
*.kate-swp
build/
dist/
MANIFEST
*.DS_Store
youtube-dl.tar.gz
.coverage
cover/
__pycache__/
.git/
*~

View File

@@ -8,7 +8,7 @@ notifications:
email:
- filippo.valsorda@gmail.com
- phihag@phihag.de
irc:
channels:
- "irc.freenode.org#youtube-dl"
skip_join: true
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

View File

@@ -20,7 +20,9 @@ test:
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
nosetests --verbose test
.PHONY: all clean install test
tar: youtube-dl.tar.gz
.PHONY: all clean install test tar
youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py
@@ -43,5 +45,16 @@ youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
python devscripts/bash-completion.py
youtube-dl.tar.gz: all
tar -cvzf youtube-dl.tar.gz -s "|^./|./youtube-dl/|" \
--exclude-from=".tarignore" -- .
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \
--exclude '*.kate-swp' \
--exclude '*.pyc' \
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache' \
--exclude '.git' \
-- \
bin devscripts test youtube_dl \
CHANGELOG LICENSE README.md README.txt \
MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
youtube-dl

View File

@@ -1,13 +1,17 @@
#!/bin/sh
#!/bin/bash
# IMPORTANT: the following assumptions are made
# * you did --set-upstream
# * the GH repo is on the origin remote
# * the gh-pages branch is named so locally
# * the git config user.signingkey is properly set
# You will need
# pip install coverage nose rsa
# TODO
# release notes
# make hash on local files
set -e
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
@@ -34,7 +38,9 @@ git show "$version"
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push
MASTER=$(git rev-parse --abbrev-ref HEAD)
git push origin $MASTER:master
git push origin "$version"
echo "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
@@ -44,15 +50,19 @@ wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=
mkdir -p "update_staging/$version"
mv youtube-dl youtube-dl.exe "update_staging/$version"
mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz"
RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd update_staging/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd update_staging/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd update_staging/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
(cd update_staging/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
for f in update_staging/$version/*; do gpg --detach-sig "$f"; done
for f in $RELEASE_FILES; do gpg --detach-sig "update_staging/$version/$f"; done
scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/
rm -r update_staging
echo "\n### Now switching to gh-pages..."
MASTER=$(git rev-parse --abbrev-ref HEAD)
git checkout gh-pages
git checkout "$MASTER" -- devscripts/gh-pages/
git reset devscripts/gh-pages/
@@ -60,15 +70,14 @@ devscripts/gh-pages/add-version.py $version
devscripts/gh-pages/sign-versions.py < updates_key.pem
devscripts/gh-pages/generate-download.py
devscripts/gh-pages/update-copyright.py
rm -r test_coverage
mv cover test_coverage
git add *.html *.html.in update test_coverage
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
read -p "Is it good, can I push? (y/n) " -n 1
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
echo
git push
git push origin gh-pages
echo "\n### DONE!"
rm -r devscripts
git checkout $MASTER

View File

@@ -160,5 +160,39 @@
"params": {
"skip_download": true
}
},
{
"name": "ComedyCentral",
"url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart",
"playlist": [
{
"file": "422204.mp4",
"md5": "7a7abe068b31ff03e7b8a37596e72380",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
}
},
{
"file": "422205.mp4",
"md5": "30552b7274c94dbb933f64600eadddd2",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
}
},
{
"file": "422206.mp4",
"md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
}
},
{
"file": "422207.mp4",
"md5": "f61ee8a4e6bd1308438e03badad78554",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
}
}
]
}
]

View File

@@ -216,12 +216,15 @@ class FileDownloader(object):
Depending on if the downloader has been configured to ignore
download errors or not, this method may throw an exception or
not when errors are found, after printing the message.
tb, if given, is additional traceback information.
"""
if message is not None:
self.to_stderr(message)
if self.params.get('verbose'):
if tb is None:
tb = u''.join(traceback.format_list(traceback.extract_stack()))
tb_data = traceback.format_list(traceback.extract_stack())
tb = u''.join(tb_data)
self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
raise DownloadError(message)
@@ -497,7 +500,7 @@ class FileDownloader(object):
try:
videos = ie.extract(url)
except ExtractorError as de: # An error we somewhat expected
self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
break
except Exception as e:
if self.params.get('ignoreerrors', False):
@@ -534,7 +537,7 @@ class FileDownloader(object):
if info is None:
break
def _download_with_rtmpdump(self, filename, url, player_url):
def _download_with_rtmpdump(self, filename, url, player_url, page_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@@ -548,7 +551,11 @@ class FileDownloader(object):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['-W', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@@ -581,7 +588,6 @@ class FileDownloader(object):
def _do_download(self, filename, info_dict):
url = info_dict['url']
player_url = info_dict.get('player_url', None)
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
@@ -590,7 +596,9 @@ class FileDownloader(object):
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url, player_url)
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None))
tmpfilename = self.temp_name(filename)
stream = None

View File

@@ -117,7 +117,7 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)))
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
class YoutubeIE(InfoExtractor):
@@ -2333,7 +2333,6 @@ class ComedyCentralIE(InfoExtractor):
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
$"""
IE_NAME = u'comedycentral'
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -2361,16 +2360,12 @@ class ComedyCentralIE(InfoExtractor):
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
def report_config_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
def report_config_download(self, episode_id, media_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))
def report_index_download(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
@@ -2409,6 +2404,7 @@ class ComedyCentralIE(InfoExtractor):
try:
htmlHandle = compat_urllib_request.urlopen(req)
html = htmlHandle.read()
webpage = html.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
@@ -2423,29 +2419,20 @@ class ComedyCentralIE(InfoExtractor):
return
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html)
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html)
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
if len(altMovieParams) == 0:
self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
return
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
return
uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
self.report_index_download(epTitle)
@@ -2459,7 +2446,7 @@ class ComedyCentralIE(InfoExtractor):
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item')
for itemEl in itemEls:
for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text
shortMediaId = mediaId.split(':')[-1]
showId = mediaId.split(':')[-2].replace('.com', '')
@@ -2469,7 +2456,7 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId}))
configReq = compat_urllib_request.Request(configUrl)
self.report_config_download(epTitle)
self.report_config_download(epTitle, shortMediaId)
try:
configXml = compat_urllib_request.urlopen(configReq).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -2491,7 +2478,7 @@ class ComedyCentralIE(InfoExtractor):
return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
format,rtmp_video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
@@ -2499,18 +2486,16 @@ class ComedyCentralIE(InfoExtractor):
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, video_url = f, v
format, rtmp_video_url = f, v
break
# Patch to download from alternative CDN, which does not
# break on current RTMPDump builds
broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
video_url = base + m.group('finalid')
if video_url.startswith(broken_cdn):
video_url = video_url.replace(broken_cdn, better_cdn)
effTitle = showId + u'-' + epTitle
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
'id': shortMediaId,
'url': video_url,
@@ -2521,9 +2506,7 @@ class ComedyCentralIE(InfoExtractor):
'format': format,
'thumbnail': None,
'description': officialTitle,
'player_url': None #playerUrl
}
results.append(info)
return results
@@ -2603,7 +2586,6 @@ class EscapistIE(InfoExtractor):
return [info]
class CollegeHumorIE(InfoExtractor):
"""Information extractor for collegehumor.com"""
@@ -3542,17 +3524,23 @@ class JustinTVIE(InfoExtractor):
return
response = json.loads(webpage)
if type(response) != list:
error_text = response.get('error', 'unknown error')
self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
return
info = []
for clip in response:
video_url = clip['video_file_url']
if video_url:
video_extension = os.path.splitext(video_url)[1][1:]
video_date = re.sub('-', '', clip['created_on'][:10])
video_date = re.sub('-', '', clip['start_time'][:10])
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
info.append({
'id': clip['id'],
'url': video_url,
'title': clip['title'],
'uploader': clip.get('user_id', clip.get('channel_id')),
'uploader': clip.get('channel_name', video_uploader_id),
'uploader_id': video_uploader_id,
'upload_date': video_date,
'ext': video_extension,
})
@@ -3571,7 +3559,7 @@ class JustinTVIE(InfoExtractor):
paged = True
api += '/channel/archives/%s.json'
else:
api += '/clip/show/%s.json'
api += '/broadcast/by_archive/%s.json'
api = api % (video_id,)
self.report_extraction(video_id)
@@ -3694,8 +3682,8 @@ class SteamIE(InfoExtractor):
videourl = 'http://store.steampowered.com/video/%s/' % gameID
webpage = self._download_webpage(videourl, gameID)
mweb = re.finditer(urlRE, webpage)
namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>'
titles = list(re.finditer(namesRE, webpage))
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
titles = re.finditer(namesRE, webpage)
videos = []
for vid,vtitle in zip(mweb,titles):
video_id = vid.group('videoID')
@@ -3707,7 +3695,7 @@ class SteamIE(InfoExtractor):
'id':video_id,
'url':video_url,
'ext': 'flv',
'title': title
'title': unescapeHTML(title)
}
videos.append(info)
return videos

View File

@@ -8,6 +8,7 @@ import locale
import os
import re
import sys
import traceback
import zlib
import email.utils
import json
@@ -414,12 +415,15 @@ def encodeFilename(s):
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None):
""" tb is the original traceback (so that it can be printed out) """
""" tb, if given, is the original traceback (so that it can be printed out). """
super(ExtractorError, self).__init__(msg)
if tb is None:
tb = sys.exc_info()[2]
self.traceback = tb
def format_traceback(self):
if self.traceback is None:
return None
return u''.join(traceback.format_tb(self.traceback))
class DownloadError(Exception):
"""Download Error exception.

View File

@@ -1,2 +1,2 @@
__version__ = '2013.01.02'
__version__ = '2013.01.08'