Compare commits

...

11 Commits

Author SHA1 Message Date
882907a818 release 2014.02.19.1 2014-02-19 01:27:22 +01:00
572a89cc4e [liveleak] Add support for prochan embeds (Fixes #2406) 2014-02-19 01:27:12 +01:00
c377110539 release 2014.02.19 2014-02-19 01:08:16 +01:00
a9c7198a0b [testurl] Add extractor
This is a pseudo extractor that can be used to quickly look up test URLs, or test without the test harness.
2014-02-19 01:06:16 +01:00
f6f01ea17b [space] modernize 2014-02-19 01:04:24 +01:00
f2d0fc6823 [bbccouk] Replace test
This older episode is from 1994 and hopefully won't get deleted.
2014-02-19 06:46:14 +07:00
f7000f3a1b [youtube] Add support for yourepeat.com URLs (Closes #2397) 2014-02-19 02:00:54 +07:00
c7f0177fa7 [bbccouk] Skip test 2014-02-18 00:26:12 +07:00
09c4d50944 Fix indenting in README 2014-02-17 14:58:39 +01:00
2eb5d315d4 [youtube] Match more truncated URLs (Closes #2402) 2014-02-17 14:56:21 +01:00
ad5976b4d9 [vimeo] Modernize test definition 2014-02-17 11:44:24 +01:00
10 changed files with 150 additions and 38 deletions

View File

@ -281,12 +281,14 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
Examples:
$ # Download only the videos uploaded in the last 6 months
$ youtube-dl --dateafter now-6months
$ # Download only the videos uploaded on January 1, 1970
$ youtube-dl --date 19700101
$ # will only download the videos uploaded in the 200x decade
$ youtube-dl --dateafter 20000101 --datebefore 20091231
# Download only the videos uploaded in the last 6 months
$ youtube-dl --dateafter now-6months
# Download only the videos uploaded on January 1, 1970
$ youtube-dl --date 19700101
$ # will only download the videos uploaded in the 200x decade
$ youtube-dl --dateafter 20000101 --datebefore 20091231
# FAQ

View File

@ -68,6 +68,9 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))

View File

@ -216,6 +216,7 @@ from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .testurl import TestURLIE
from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE

View File

@ -13,13 +13,13 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
_TESTS = [
{
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
'info_dict': {
'id': 'p01q7wz4',
'id': 'b039d07m',
'ext': 'flv',
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
'duration': 1936,
'title': 'Kaleidoscope: Leonard Cohen',
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
'duration': 1740,
},
'params': {
# rtmp download
@ -38,7 +38,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
'skip': 'Episode is no longer available on BBC iPlayer Radio',
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',

View File

@ -4,15 +4,17 @@ import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'file': '757_1364311680.mp4',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'info_dict': {
'id': '757_1364311680',
'ext': 'mp4',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
@ -20,25 +22,62 @@ class LiveLeakIE(InfoExtractor):
},
{
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'file': 'f93_1390833151.mp4',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'id': 'f93_1390833151',
'ext': 'mp4',
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
},
{
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
'id': '4f7_1392687779',
'ext': 'mp4',
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
'uploader': 'CapObveus',
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
sources_raw = self._search_regex(
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
if sources_raw is None:
sources_raw = '[{ %s}]' % (
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
alt_source = self._search_regex(
r'(file: ".*?"),', webpage, 'video URL', default=None)
if alt_source:
sources_raw = '[{ %s}]' % alt_source
else:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
sources = json.loads(sources_json)
@ -49,15 +88,11 @@ class LiveLeakIE(InfoExtractor):
} for s in sources]
self._sort_formats(formats)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
return {
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
}

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -8,14 +10,14 @@ from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
u'info_dict': {
u'id': u'2780937028001',
u'ext': u'mp4',
u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
u'uploader': u'TechMedia Networks',
'add_ie': ['Brightcove'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': {
'id': '2780937028001',
'ext': 'mp4',
'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61',
'uploader': 'TechMedia Networks',
},
}

View File

@ -0,0 +1,66 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class TestURLIE(InfoExtractor):
""" Allows adressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$'
def _real_extract(self, url):
from ..extractor import gen_extractors
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
extractor_id = mobj.group('extractor')
all_extractors = gen_extractors()
rex = re.compile(extractor_id, flags=re.IGNORECASE)
matching_extractors = [
e for e in all_extractors if rex.search(e.IE_NAME)]
if len(matching_extractors) == 0:
raise ExtractorError(
'No extractors matching %r found' % extractor_id,
expected=True)
elif len(matching_extractors) > 1:
# Is it obvious which one to pick?
try:
extractor = next(
ie for ie in matching_extractors
if ie.IE_NAME.lower() == extractor_id.lower())
except StopIteration:
raise ExtractorError(
('Found multiple matching extractors: %s' %
' '.join(ie.IE_NAME for ie in matching_extractors)),
expected=True)
num_str = mobj.group('num')
num = int(num_str) if num_str else 0
testcases = []
t = getattr(extractor, '_TEST', None)
if t:
testcases.append(t)
testcases.extend(getattr(extractor, '_TESTS', []))
try:
tc = testcases[num]
except IndexError:
raise ExtractorError(
('Test case %d not found, got only %d tests' %
(num, len(testcases))),
expected=True)
self.to_screen('Test URL: %s' % tc['url'])
return {
'_type': 'url',
'url': tc['url'],
'id': video_id,
}

View File

@ -37,13 +37,14 @@ class VimeoIE(SubtitlesInfoExtractor):
_TESTS = [
{
'url': 'http://vimeo.com/56015672#at=0',
'file': '56015672.mp4',
'md5': '8879b6cc097e987f02484baf890129e5',
'info_dict': {
"upload_date": "20121220",
"description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
"uploader_id": "user7108434",
"uploader": "Filippo Valsorda",
'id': '56015672',
'ext': 'mp4',
"upload_date": "20121220",
"description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
"uploader_id": "user7108434",
"uploader": "Filippo Valsorda",
"title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
},
},

View File

@ -138,13 +138,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
(?:www\.)?deturl\.com/www\.youtube\.com/|
(?:www\.)?pwnyoutube\.com/|
(?:www\.)?yourepeat\.com/|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|(?: # or the v= param in all its forms
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
v=
@ -1815,7 +1816,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list
_VALID_URL = r'''(?x)
(?:https?://)?[^/]+/watch\?feature=[a-z_]+$|
(?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
'''

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.17'
__version__ = '2014.02.19.1'