Compare commits

...

9 Commits

7 changed files with 116 additions and 71 deletions

View File

@ -20,7 +20,7 @@ which means you can modify it, redistribute it or use it however you like.
sure that you have sufficient permissions sure that you have sufficient permissions
(run with sudo if needed) (run with sudo if needed)
-i, --ignore-errors continue on download errors, for example to -i, --ignore-errors continue on download errors, for example to
to skip unavailable videos in a playlist skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the --abort-on-error Abort downloading of further videos (in the
playlist or the command line) if an error playlist or the command line) if an error
occurs occurs
@ -246,7 +246,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION # CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE # OUTPUT TEMPLATE
@ -357,7 +357,7 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site ### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
# BUGS # BUGS

View File

@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'python language') self.assertEqual(result['title'], 'python language')
self.assertTrue(len(result['entries']) == 15) self.assertTrue(len(result['entries']) == 15)
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -208,7 +208,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-U', '--update', general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors', general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
general.add_option('--abort-on-error', general.add_option('--abort-on-error',
action='store_false', dest='ignoreerrors', action='store_false', dest='ignoreerrors',
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')

View File

@ -162,6 +162,11 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id') group_id = mobj.group('id')
webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML') 'Downloading playlist XML')

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import os import os
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
@ -159,6 +160,25 @@ class GenericIE(InfoExtractor):
raise ExtractorError('Invalid URL protocol') raise ExtractorError('Invalid URL protocol')
return response return response
def _extract_rss(self, url, video_id, doc):
playlist_title = doc.find('./channel/title').text
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]
return {
'_type': 'playlist',
'id': url,
'title': playlist_title,
'description': playlist_desc,
'entries': entries,
}
def _real_extract(self, url): def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url) parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme: if not parsed_url.scheme:
@ -219,6 +239,14 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed?
try:
doc = xml.etree.ElementTree.fromstring(webpage)
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
except xml.etree.ElementTree.ParseError:
pass
# it's tempting to parse this further, but you would # it's tempting to parse this further, but you would
# have to take into account all the variations like # have to take into account all the variations like
# Video Title - Site Name # Video Title - Site Name

View File

@ -4,51 +4,51 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse,
ExtractorError, ExtractorError,
unified_strdate,
str_to_int,
int_or_none,
parse_duration,
) )
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster""" """Information Extractor for xHamster"""
_VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' _VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [{ _TESTS = [
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', {
'file': '1509445.mp4', 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e', 'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': { 'info_dict': {
"upload_date": "20121014", 'id': '1509445',
"uploader_id": "Ruseful2011", 'ext': 'mp4',
"title": "FemaleAgent Shy beauty takes the bait", 'title': 'FemaleAgent Shy beauty takes the bait',
"age_limit": 18, 'upload_date': '20121014',
'uploader_id': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
}
},
{
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'md5': '4cbd8d56708ecb4fb4124c23e4acb81a',
'info_dict': {
'id': '2221348',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914',
'uploader_id': 'jojo747400',
'duration': 200,
'age_limit': 18,
}
} }
}, ]
{
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'file': '2221348.flv',
'md5': 'e767b9475de189320f691f49c679c4c7',
'info_dict': {
"upload_date": "20130914",
"uploader_id": "jojo747400",
"title": "Britney Spears Sexy Booty",
"age_limit": 18,
}
}]
def _real_extract(self,url): def _real_extract(self,url):
def extract_video_url(webpage): def extract_video_url(webpage):
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
if mobj is None:
raise ExtractorError('Unable to extract media URL')
if len(mobj.group('server')) == 0:
return compat_urllib_parse.unquote(mobj.group('file'))
else:
return mobj.group('server')+'/key='+mobj.group('file')
def extract_mp4_video_url(webpage):
mp4 = re.search(r'<a href=\"(.+?)\" class=\"mp4Play\"',webpage)
if mp4 is None: if mp4 is None:
return None raise ExtractorError('Unable to extract media URL')
else: else:
return mp4.group(1) return mp4.group(1)
@ -62,50 +62,48 @@ class XHamsterIE(InfoExtractor):
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
webpage = self._download_webpage(mrss_url, video_id) webpage = self._download_webpage(mrss_url, video_id)
video_title = self._html_search_regex( title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
# Only a few videos have an description # Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
video_description = mobj.group(1) if mobj else None description = mobj.group(1) if mobj else None
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
if mobj: webpage, 'upload date', fatal=False)
video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') if upload_date:
else: upload_date = unified_strdate(upload_date)
video_upload_date = None
self._downloader.report_warning('Unable to extract upload date')
video_uploader_id = self._html_search_regex( uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
webpage, 'uploader id', default='anonymous') webpage, 'uploader id', default='anonymous')
video_thumbnail = self._search_regex( thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
r'\'image\':\'(?P<thumbnail>[^\']+)\'',
webpage, 'thumbnail', fatal=False) duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
webpage, 'duration', fatal=False))
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
comment_count = mobj.group('commentcount') if mobj else 0
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
hd = is_hd(webpage) hd = is_hd(webpage)
video_url = extract_video_url(webpage) video_url = extract_video_url(webpage)
formats = [{ formats = [{
'url': video_url, 'url': video_url,
'format_id': 'hd' if hd else 'sd', 'format_id': 'hd' if hd else 'sd',
'preference': 0, 'preference': 1,
}] }]
video_mp4_url = extract_mp4_video_url(webpage)
if video_mp4_url is not None:
formats.append({
'url': video_mp4_url,
'ext': 'mp4',
'format_id': 'mp4-hd' if hd else 'mp4-sd',
'preference': 1,
})
if not hd: if not hd:
webpage = self._download_webpage( webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
mrss_url + '?hd', video_id, note='Downloading HD webpage')
if is_hd(webpage): if is_hd(webpage):
video_url = extract_video_url(webpage) video_url = extract_video_url(webpage)
formats.append({ formats.append({
@ -118,11 +116,16 @@ class XHamsterIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': title,
'formats': formats, 'description': description,
'description': video_description, 'upload_date': upload_date,
'upload_date': video_upload_date, 'uploader_id': uploader_id,
'uploader_id': video_uploader_id, 'thumbnail': thumbnail,
'thumbnail': video_thumbnail, 'duration': duration,
'view_count': view_count,
'like_count': int_or_none(like_count),
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats,
} }

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.19.1' __version__ = '2014.02.20'