Compare commits

..

6 Commits

Author SHA1 Message Date
df872ec4e7 release 2014.02.06.1 2014-02-06 11:30:00 +01:00
5de90176d9 [elpais] Add extractor 2014-02-06 11:29:46 +01:00
dcf3eec47a [test_download] Skip over BadStatusLine errors
An error like https://travis-ci.org/rg3/youtube-dl/jobs/18317799#L449 is almost certainly the server's fault.
2014-02-06 04:19:57 +01:00
e9e4f30d26 [pbs] Remove unused import 2014-02-06 04:19:43 +01:00
83cebd73d4 [collegehumor] We only get shortened descriptions now 2014-02-06 04:16:22 +01:00
1df4229bd7 [mtv/gametrailers] Change order of title preference
It looks like the plain title is better again
2014-02-06 04:15:12 +01:00
8 changed files with 82 additions and 11 deletions

View File

@ -22,6 +22,7 @@ import socket
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
from youtube_dl.utils import ( from youtube_dl.utils import (
compat_http_client,
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_HTTPError, compat_HTTPError,
@ -110,7 +111,7 @@ def generator(test_case):
ydl.download([test_case['url']]) ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err: except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one # Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise raise
if try_num == RETRIES: if try_num == RETRIES:

View File

@ -54,6 +54,7 @@ from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE from .ehow import EHowIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .eitb import EitbIE from .eitb import EitbIE
from .elpais import ElPaisIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE

View File

@ -4,6 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'file': '6902724.mp4',
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'info_dict': { 'info_dict': {
'id': '6902724',
'ext': 'mp4',
'title': 'Comic-Con Cosplay Catastrophe', 'title': 'Comic-Con Cosplay Catastrophe',
'description': 'Fans get creative this year at San Diego. Too', 'description': 'Fans get creative this year',
'age_limit': 13, 'age_limit': 13,
}, },
}, },
{ {
'url': 'http://www.collegehumor.com/video/3505939/font-conference', 'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'file': '3505939.mp4',
'md5': '72fa701d8ef38664a4dbb9e2ab721816', 'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': { 'info_dict': {
'id': '3505939',
'ext': 'mp4',
'title': 'Font Conference', 'title': 'Font Conference',
'description': 'This video wasn\'t long enough, so we made it double-spaced.', 'description': 'This video wasn\'t long enough,',
'age_limit': 10, 'age_limit': 10,
'duration': 179,
}, },
}, },
# embedded youtube video # embedded youtube video
@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
duration = int_or_none(vdata.get('duration'), 1000)
return { return {
'id': video_id, 'id': video_id,
'title': vdata['title'], 'title': vdata['title'],
@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
'thumbnail': vdata.get('thumbnail'), 'thumbnail': vdata.get('thumbnail'),
'formats': formats, 'formats': formats,
'age_limit': age_limit, 'age_limit': age_limit,
'duration': duration,
} }

View File

@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESCR = 'El País'
_TEST = {
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
'id': 'tiempo-nuevo-recetas-viejas',
'ext': 'mp4',
'title': 'Tiempo nuevo, recetas viejas',
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
webpage, 'title')
date_str = self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False)
upload_date = (None if date_str is None else unified_strdate(date_str))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View File

@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
title_el = find_xpath_attr( title_el = find_xpath_attr(
itemdoc, './/{http://search.yahoo.com/mrss/}category', itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title') 'scheme', 'urn:mtvn:video_title')
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
if title_el is None: if title_el is None:
title_el = itemdoc.find('.//title') title_el = itemdoc.find('.//title')
if title_el.text is None:
title_el = None
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
title = title_el.text title = title_el.text
if title is None: if title is None:
raise ExtractorError('Could not find video title') raise ExtractorError('Could not find video title')

View File

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor

View File

@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_request = http_request https_request = http_request
https_response = http_response https_response = http_response
def unified_strdate(date_str): def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD""" """Return a string with the date in the format YYYYMMDD"""
upload_date = None upload_date = None
#Replace commas #Replace commas
date_str = date_str.replace(',',' ') date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2 # %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
format_expressions = [ format_expressions = [
'%d %B %Y', '%d %B %Y',
'%B %d %Y', '%B %d %Y',
@ -771,11 +772,12 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z', '%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M',
] ]
for expression in format_expressions: for expression in format_expressions:
try: try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except: except ValueError:
pass pass
if upload_date is None: if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.06' __version__ = '2014.02.06.1'