Compare commits

..

54 Commits

Author SHA1 Message Date
Philipp Hagemeister
34ca5d9ba0 release 2014.03.11 2014-03-11 16:51:50 +01:00
Philipp Hagemeister
60cc4dc4b4 [generic/funnyordie] Add support for funnyordie embeds (Fixes #2546) 2014-03-11 16:51:36 +01:00
Philipp Hagemeister
db95dc13a1 [playvid] Simplify (#2539) 2014-03-10 20:55:47 +01:00
Philipp Hagemeister
777ac90791 Merge remote-tracking branch 'MikeCol/playvid_extract' 2014-03-10 20:45:45 +01:00
Philipp Hagemeister
04f9bebbcb Merge remote-tracking branch 'jaimeMF/remove_global_opener' 2014-03-10 20:42:54 +01:00
MikeCol
4ea3137e41 Playvid extractor 2014-03-10 20:16:49 +01:00
Jaime Marquínez Ferrándiz
a0792b738e Don't install the global url opener
All the code uses now the urlopen method of YoutubeDL
2014-03-10 19:04:51 +01:00
Jaime Marquínez Ferrándiz
19a41fc613 Don't set the global socket timeout
Use the timeout argument of the `OpenerDirector.open` method instead
2014-03-10 19:03:37 +01:00
Sergey M․
3ee52157fb [vgtrk] Rename vesti extractor 2014-03-11 00:58:05 +07:00
Sergey M․
c4d197ee2d [vesti] Fix _VALID_URL regex 2014-03-11 00:49:41 +07:00
Philipp Hagemeister
a33932cfe3 [vevo] Correct test value
The date is now interpreted as UTC for consistency.
2014-03-10 17:56:54 +01:00
Philipp Hagemeister
bcf89ce62c [generic] Suppress warning about doctypes in RSS parser 2014-03-10 17:31:32 +01:00
Philipp Hagemeister
e3899d0e00 Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-10 16:42:22 +01:00
Philipp Hagemeister
dcb00da49c [depositfiles] Remove extractor
This site requires a CAPTCHA to download, supports arbitrary files and not only audio/video, and I can't find a single uncopyrighted video with a quick google search.
Closes #1255
2014-03-10 16:41:08 +01:00
Sergey M․
aa51d20d19 [vesti] Skip geo restricted test 2014-03-10 22:31:22 +07:00
Philipp Hagemeister
ae7ed92057 [youtube] Fix up invalid JSON 2014-03-10 13:35:45 +01:00
Philipp Hagemeister
e45b31d9bd [vevo] Interpret date as UTC instead of local time 2014-03-10 13:12:57 +01:00
Philipp Hagemeister
5a25f39653 Correct extractor documentation 2014-03-10 13:09:55 +01:00
Philipp Hagemeister
963d7ec412 release 2014.03.10 2014-03-10 13:04:20 +01:00
Philipp Hagemeister
e712d94adf Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-10 13:03:52 +01:00
Philipp Hagemeister
6a72423955 [generic] Use a different URL for the generic RSS test (Closes #2532) 2014-03-10 13:03:39 +01:00
Jaime Marquínez Ferrándiz
4126826b10 [photobucket] More unicode literals 2014-03-10 12:59:19 +01:00
Sergey M․
b773ead7fd [vesti] Add support for more sites (Closes #2534) 2014-03-10 18:52:00 +07:00
Philipp Hagemeister
855e2750bc Credit @mharrys for aftonbladet 2014-03-10 10:30:17 +01:00
Philipp Hagemeister
805ef3c60b Correct automatic resolution determination 2014-03-10 10:29:25 +01:00
Philipp Hagemeister
fbc2dcb40b [aftonbladet] Modernize 2014-03-10 10:28:56 +01:00
Philipp Hagemeister
5375d7ad84 Merge remote-tracking branch 'mharrys/aftonbladet' 2014-03-10 10:23:45 +01:00
Jaime Marquínez Ferrándiz
90f3476180 [photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00
Jaime Marquínez Ferrándiz
ee95c09333 [pornhub] Use compat_urllib_parse.unquote_plus (#2531) 2014-03-09 19:16:25 +01:00
Jaime Marquínez Ferrándiz
75d06db9fc Merge branch 'pornhub_unquote_password' of github.com:MikeCol/youtube-dl 2014-03-09 19:15:33 +01:00
Jaime Marquínez Ferrándiz
439a1fffcb [myvideo] Modernize 2014-03-09 18:58:34 +01:00
Jaime Marquínez Ferrándiz
9d9d70c462 [facebook] Modernize 2014-03-09 18:42:44 +01:00
Jaime Marquínez Ferrándiz
b4a186b7be [jukebox] Modernize and add a test 2014-03-09 18:33:17 +01:00
Jaime Marquínez Ferrándiz
bdebf51c8f [xnxx] Modernize 2014-03-09 18:31:39 +01:00
MikeCol
264b86f9b4 Unquote password 2014-03-09 18:26:18 +01:00
Philipp Hagemeister
9e55e37a2e Merge remote-tracking branch 'origin/master' 2014-03-09 18:08:16 +01:00
Jaime Marquínez Ferrándiz
1471956573 Add a basic test suite for the InfoExtractor class 2014-03-09 17:05:29 +01:00
Mattias Harrysson
27865b2169 [aftonbladet] add extractor for aftonbladet.se 2014-03-09 16:59:18 +01:00
Jaime Marquínez Ferrándiz
6d07ce0162 YoutubeDL: If the logger is set call its warning method in report_warning 2014-03-09 15:16:54 +01:00
Sergey M․
edb7fc5435 [videodetective] Modernize 2014-03-09 18:39:39 +07:00
Jaime Marquínez Ferrándiz
31f77343f2 [vube] Update the test's checksum 2014-03-09 12:27:38 +01:00
Jaime Marquínez Ferrándiz
63ad031583 [soundcloud] Add the description field to the second test 2014-03-09 12:26:58 +01:00
Jaime Marquínez Ferrándiz
957688cee6 [ustream:channel] Update test's number of entries 2014-03-09 12:03:49 +01:00
Jaime Marquínez Ferrándiz
806d6c2e8c [gamekings] Modernize and update the test's description field 2014-03-09 11:57:30 +01:00
Jaime Marquínez Ferrándiz
0ef68e04d9 [mtv] Transform the urls from the mobile version to get the best quality
And don't report a warning, just log a message, it allows to pass the test from Europe.
2014-03-08 22:09:42 +01:00
Sergey M․
a496524db2 [collegehumor] Replace youtube test 2014-03-09 03:21:26 +07:00
Jaime Marquínez Ferrándiz
935c7360cc [spike] Add support for mobile urls 2014-03-08 21:10:21 +01:00
Jaime Marquínez Ferrándiz
340b046876 [spike] Add support for downloading the mobile version if the normal version is geoblocked 2014-03-08 20:59:11 +01:00
Jaime Marquínez Ferrándiz
cc1db7f9b7 [mtv] Improve detection of geoblocked videos 2014-03-08 19:46:34 +01:00
Philipp Hagemeister
a4ff6c4762 [arte] Raise a proper error when no video is found 2014-03-08 16:04:03 +01:00
Philipp Hagemeister
1060425cbb [vimeo] Add a better error message for embed-only videos (#2527) 2014-03-08 12:25:09 +01:00
Jaime Marquínez Ferrándiz
e9c092f125 YoutubeDL: Use its urlopen method for downloading the thumbnail. 2014-03-07 16:43:34 +01:00
Jaime Marquínez Ferrándiz
22ff5d2105 [http] Use the YoutubeDL.urlopen method 2014-03-07 16:41:42 +01:00
Sergey M․
136db7881b [lynda] Modernize 2014-03-07 22:11:01 +07:00
33 changed files with 568 additions and 344 deletions

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
class TestIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
self.ie = TestIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
def test_opengraph(self):
ie = self.ie
html = '''
<meta name="og:title" content='Foo'/>
<meta content="Some video's description " name="og:description"/>
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
if __name__ == '__main__':
unittest.main()

View File

@@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '5124905')
self.assertTrue(len(result['entries']) >= 11)
self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_set(self):
dl = FakeYDL()
@@ -254,9 +254,9 @@ class TestPlaylists(unittest.TestCase):
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)

View File

@@ -370,12 +370,15 @@ class YoutubeDL(object):
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
if self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
def report_error(self, message, tb=None):
'''
@@ -413,9 +416,9 @@ class YoutubeDL(object):
if template_dict.get('width') and template_dict.get('height'):
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
elif template_dict.get('height'):
res = '%sp' % template_dict['height']
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
res = '?x%d' % template_dict['width']
template_dict['resolution'] = '?x%d' % template_dict['width']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
@@ -698,8 +701,11 @@ class YoutubeDL(object):
else:
formats = info_dict['formats']
if not formats:
raise ExtractorError('No video formats found!')
# We check that all the formats have the format and format_id fields
for (i, format) in enumerate(formats):
for i, format in enumerate(formats):
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
if format.get('format') is None:
@@ -918,7 +924,7 @@ class YoutubeDL(object):
self.to_screen('[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
uf = self.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
@@ -1164,7 +1170,7 @@ class YoutubeDL(object):
def urlopen(self, req):
""" Start an HTTP download """
return self._opener.open(req)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self):
if not self.params.get('verbose'):
@@ -1195,7 +1201,7 @@ class YoutubeDL(object):
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
timeout = 600 if timeout_val is None else float(timeout_val)
self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
@@ -1233,7 +1239,3 @@ class YoutubeDL(object):
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
# TODO remove this global modification
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)

View File

@@ -50,6 +50,7 @@ __authors__ = (
'Anthony Weems',
'David Wagner',
'Juan C. Olivares',
'Mattias Harrysson',
)
__license__ = 'Public Domain'

View File

@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
while count <= retries:
# Establish connection
try:
data = compat_urllib_request.urlopen(request)
data = self.ydl.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
@@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:

View File

@@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
@@ -52,7 +53,6 @@ from .dailymotion import (
DailymotionUserIE,
)
from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
@@ -175,6 +175,7 @@ from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
@@ -246,8 +247,8 @@ from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtrk import VGTRKIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE

View File

@@ -0,0 +1,69 @@
# encoding: utf-8
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
class AftonbladetIE(InfoExtractor):
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
'id': 'article36015',
'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
'upload_date': '20140306',
},
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
# find internal video meta data
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
internal_meta_id = self._html_search_regex(
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
internal_meta_url = META_URL % internal_meta_id
internal_meta_json = self._download_json(
internal_meta_url, video_id, 'Downloading video meta data')
# find internal video formats
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
internal_video_id = internal_meta_json['videoId']
internal_formats_url = FORMATS_URL % internal_video_id
internal_formats_json = self._download_json(
internal_formats_url, video_id, 'Downloading video formats')
formats = []
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
p = fmt['paths'][0]
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
'width': fmt['width'],
'height': fmt['height'],
'tbr': fmt['bitrate'],
'protocol': 'http',
})
self._sort_formats(formats)
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
upload_date = timestamp.strftime('%Y%m%d')
return {
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
'thumbnail': internal_meta_json['imageUrl'],
'description': internal_meta_json['shortPreamble'],
'upload_date': upload_date,
'duration': internal_meta_json['duration'],
'view_count': internal_meta_json['views'],
}

View File

@@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
raise ExtractorError('Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
raise ExtractorError('No video found')
def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = self._download_xml(
ref_xml_url, video_id, note='Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
config_xml = self._download_webpage(
config_xml_url, video_id, note='Downloading configuration')
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
def _key(m):

View File

@@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):
},
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950457',
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'W5gMp3ZjYg4',
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
'uploader': 'FunnyPlox TV',
'uploader_id': 'funnyploxtv',
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
'upload_date': '20140128',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,

View File

@@ -118,9 +118,6 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
_real_extract() must return a *list* of information dictionaries as
described above.
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""

View File

@@ -1,60 +0,0 @@
import re
import os
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
class DepositFilesIE(InfoExtractor):
"""Information extractor for depositfiles.com"""
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
def _real_extract(self, url):
file_id = url.split('/')[-1]
# Rebuild url in english locale
url = 'http://depositfiles.com/en/files/' + file_id
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = {'gateway_result' : '1'}
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)
webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
# Search for the real file URL
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
if (mobj is None) or (mobj.group(1) is None):
# Try to figure out reason of the error.
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
if (mobj is not None) and (mobj.group(1) is not None):
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
raise ExtractorError(u'%s' % restriction_message)
else:
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
file_url = mobj.group(1)
file_extension = os.path.splitext(file_url)[1][1:]
# Search for file title
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
return [{
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
'uploader': None,
'upload_date': None,
'title': file_title,
'ext': file_extension.decode('utf-8'),
}]

View File

@@ -18,10 +18,8 @@ from ..utils import (
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
_VALID_URL = r'''(?x)
(?:https?://)?(?:\w+\.)?facebook\.com/
https?://(?:\w+\.)?facebook\.com/
(?:[^#?]*\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+)
@@ -37,14 +35,10 @@ class FacebookIE(InfoExtractor):
'id': '120708114770723',
'ext': 'mp4',
'duration': 279,
'title': 'PEOPLE ARE AWESOME 2013'
'title': 'PEOPLE ARE AWESOME 2013',
}
}
def report_login(self):
"""Report attempt to log in."""
self.to_screen('Logging in')
def _login(self):
(useremail, password) = self._get_login_info()
if useremail is None:
@@ -101,8 +95,6 @@ class FacebookIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
video_id = mobj.group('id')
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
@@ -128,18 +120,14 @@ class FacebookIE(InfoExtractor):
video_url = video_data['sd_src']
if not video_url:
raise ExtractorError('Cannot find video URL')
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
info = {
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
'thumbnail': thumbnail,
'duration': int(video_data['video_duration']),
'thumbnail': video_data['thumbnail_src'],
}
return [info]

View File

@@ -1,12 +1,13 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
class FunnyOrDieIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
_TEST = {
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
'file': '0732f586d7.mp4',
@@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor):
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
webpage, 'video URL', flags=re.DOTALL)
if mobj.group('type') == 'embed':
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)['attachment']
title = post['name']
description = post.get('description')
else:
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'title': title,
'description': description,
}

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -6,13 +8,14 @@ from .common import InfoExtractor
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
'id': '20130811',
'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
}
}

View File

@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import os
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .youtube import YoutubeIE
@@ -17,6 +16,7 @@ from ..utils import (
ExtractorError,
HEADRequest,
parse_xml,
smuggle_url,
unescapeHTML,
unified_strdate,
@@ -134,6 +134,17 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
# funnyordie embed
{
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
'md5': '7cf780be104d40fea7bae52eed4a470e',
'info_dict': {
'id': '18e820ec3f',
'ext': 'mp4',
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
}
},
]
def report_download_webpage(self, video_id):
@@ -274,7 +285,7 @@ class GenericIE(InfoExtractor):
# Is it an RSS feed?
try:
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
doc = parse_xml(webpage)
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
except compat_xml_parse_error:
@@ -432,6 +443,14 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
for eurl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:

View File

@@ -1,56 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
RegexNotFoundError,
unescapeHTML,
)
class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
_TEST = {
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
'info_dict': {
'id': 'r303r',
'ext': 'flv',
'title': 'Kosheen-En Vivo Pride',
'uploader': 'Kosheen',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
html = self._download_webpage(url, video_id)
mobj = re.search(self._IFRAME, html)
if mobj is None:
raise ExtractorError(u'Cannot extract iframe url')
iframe_url = unescapeHTML(mobj.group('iframe'))
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
mobj = re.search(r'class="jkb_waiting"', iframe_html)
if mobj is not None:
raise ExtractorError(u'Video is not available(in your country?)!')
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
raise ExtractorError('Video is not available(in your country?)!')
self.report_extraction(video_id)
mobj = re.search(self._VIDEO_URL, iframe_html)
if mobj is None:
mobj = re.search(self._IS_YOUTUBE, iframe_html)
if mobj is None:
raise ExtractorError(u'Cannot extract video url')
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
self.to_screen(u'Youtube video detected')
return self.url_result(youtube_url,ie='Youtube')
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
video_ext = unescapeHTML(mobj.group('video_ext'))
try:
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
iframe_html, 'video url')
video_url = unescapeHTML(video_url).replace('\/', '/')
except RegexNotFoundError:
youtube_url = self._search_regex(
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
iframe_html, 'youtube url')
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
self.to_screen('Youtube video detected')
return self.url_result(youtube_url, ie='Youtube')
mobj = re.search(self._TITLE, html)
if mobj is None:
raise ExtractorError(u'Cannot extract title')
title = unescapeHTML(mobj.group('title'))
artist = unescapeHTML(mobj.group('artist'))
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
html, 'title')
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
html, 'artist')
return [{'id': video_id,
'url': video_url,
'title': artist + '-' + title,
'ext': video_ext
}]
return {
'id': video_id,
'url': video_url,
'title': artist + '-' + title,
'uploader': artist,
}

View File

@@ -10,6 +10,7 @@ from ..utils import (
compat_urllib_request,
ExtractorError,
int_or_none,
compat_str,
)
@@ -27,9 +28,10 @@ class LyndaIE(SubtitlesInfoExtractor):
_TEST = {
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'file': '114408.mp4',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
'info_dict': {
'id': '114408',
'ext': 'mp4',
'title': 'Using the exercise files',
'duration': 68
}
@@ -42,17 +44,18 @@ class LyndaIE(SubtitlesInfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
video_id, 'Downloading video JSON')
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
raise ExtractorError(
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
video_id = video_json['ID']
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']
title = video_json['Title']
@@ -108,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
'stayPut': 'false'
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
# Not (yet) logged in
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
@@ -133,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
'stayPut': 'false',
}
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
raise ExtractorError('Unable to log in')
@@ -167,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
def _get_available_subtitles(self, video_id, webpage):
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
sub = self._download_webpage(url, None, note=False)
sub = self._download_webpage(url, None, False)
sub_json = json.loads(sub)
return {'en': url} if len(sub_json) > 0 else {}

View File

@@ -5,9 +5,12 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
HEADRequest,
unescapeHTML,
url_basename,
RegexNotFoundError,
)
@@ -18,6 +21,7 @@ def _media_xml_tag(tag):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
else:
return thumb_node.attrib['url']
def _extract_video_formats(self, mdoc):
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
raise ExtractorError('This video is not available from your country.', expected=True)
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
req.add_header('Youtubedl-user-agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
req = HEADRequest(metrics_url)
response = self._request_webpage(req, mtvn_id, 'Resolving url')
url = response.geturl()
# Transform the url to get the best quality:
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
return [{'url': url,'ext': 'mp4'}]
def _extract_video_formats(self, mdoc, mtvn_id):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your '
'country, trying with the mobile version')
return self._extract_mobile_video_formats(mtvn_id)
raise ExtractorError('This video is not available from your country.',
expected=True)
formats = []
for rendition in mdoc.findall('.//rendition'):
@@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
raise ExtractorError('Could not find video title')
title = title.strip()
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:id')
if mtvn_id_node is not None:
mtvn_id = mtvn_id_node.text
return {
'title': title,
'formats': self._extract_video_formats(mediagen_doc),
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import binascii
import base64
import hashlib
@@ -14,18 +16,16 @@ from ..utils import (
)
class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de."""
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
IE_NAME = 'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
u'file': u'8229274.flv',
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
u'info_dict': {
u"title": u"bowling-fail-or-win"
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
'md5': '2d2753e8130479ba2cb7e0a37002053e',
'info_dict': {
'id': '8229274',
'ext': 'flv',
'title': 'bowling-fail-or-win',
}
}
@@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'invalid URL: %s' % url)
video_id = mobj.group(1)
video_id = mobj.group('id')
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
@@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, u'title')
webpage, 'title')
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
return [{
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': video_ext,
}]
return {
'id': video_id,
'url': video_url,
'title': video_title,
}
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
u'Downloading video info')
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
return {
'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video')
raise ExtractorError('Unable to extract video')
params = {}
encxml = ''
@@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning(u'avoiding MTV player')
self._downloader.report_warning('avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
@@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
video_url = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
u'Rewriting URL to use unencrypted rtmp:// ...',
'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
@@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
# extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
raise ExtractorError('unable to extract url')
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
video_file = compat_urllib_parse.unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
video_hls_playlist = ''
else:
video_playpath = ''
video_hls_playlist = (
video_file
).replace('.f4m', '.m3u8')
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, u'title')
webpage, 'title')
return [{
'id': video_id,
'url': video_url,
'tc_url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
'play_path': video_playpath,
'video_file': video_file,
'video_hls_playlist': video_hls_playlist,
'player_url': video_swfobj,
}]
return {
'id': video_id,
'url': video_url,
'tc_url': video_url,
'title': video_title,
'ext': 'flv',
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@@ -1,76 +1,43 @@
from __future__ import unicode_literals
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class PhotobucketIE(InfoExtractor):
"""Information extractor for photobucket.com."""
# TODO: the original _VALID_URL was:
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
# Check if it's necessary to keep the old extracion process
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
IE_NAME = u'photobucket'
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
_TEST = {
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
u'file': u'zpsc0c3b9fa.mp4',
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
u'info_dict': {
u"upload_date": u"20130504",
u"uploader": u"rachaneronas",
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
'file': 'zpsc0c3b9fa.mp4',
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
'info_dict': {
'upload_date': '20130504',
'uploader': 'rachaneronas',
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
}
}
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
video_extension = mobj.group('ext')
# Retrieve video webpage to extract further information
webpage = self._download_webpage(url, video_id)
# Extract URL, uploader, and title from webpage
self.report_extraction(video_id)
# We try first by looking the javascript code:
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
if mobj is not None:
info = json.loads(mobj.group('json'))
return [{
'id': video_id,
'url': info[u'downloadUrl'],
'uploader': info[u'username'],
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
'title': info[u'title'],
'ext': video_extension,
'thumbnail': info[u'thumbUrl'],
}]
# We try looking in other parts of the webpage
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
webpage, u'video URL')
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1).decode('utf-8')
video_uploader = mobj.group(2).decode('utf-8')
return [{
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
webpage, 'info json')
info = json.loads(info_json)
return {
'id': video_id,
'url': info['downloadUrl'],
'uploader': info['username'],
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
'title': info['title'],
'ext': video_extension,
'thumbnail': info['thumbUrl'],
}

View File

@@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
)
class PlayvidIE(InfoExtractor):
_VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
_TEST = {
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
'md5': '44930f8afa616efdf9482daf4fe53e1e',
'info_dict': {
'id': 'agbDDi7WZTV',
'ext': 'mp4',
'title': 'Michelle Lewin in Miami Beach',
'duration': 240,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_title = None
duration = None
video_thumbnail = None
formats = []
# most of the information is stored in the flashvars
flashvars = self._html_search_regex(
r'flashvars="(.+?)"', webpage, 'flashvars')
infos = compat_urllib_parse.unquote(flashvars).split(r'&')
for info in infos:
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
if videovars_match:
key = videovars_match.group(1)
val = videovars_match.group(2)
if key == 'title':
video_title = compat_urllib_parse.unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
except ValueError:
pass
if key == 'big_thumb':
video_thumbnail = val
videourl_match = re.match(
r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
if videourl_match:
height = int(videourl_match.group('resolution'))
formats.append({
'height': height,
'url': val,
})
self._sort_formats(formats)
# Extract title - should be in the flashvars; if not, look elsewhere
if video_title is None:
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
return {
'id': video_id,
'formats': formats,
'title': video_title,
'thumbnail': video_thumbnail,
'duration': duration,
'description': None,
'age_limit': 18
}

View File

@@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []

View File

@@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
'id': '47127627',
'ext': 'mp3',
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
'upload_date': '20120521',
},

View File

@@ -1,10 +1,15 @@
from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor
class SpikeIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
_VALID_URL = r'''(?x)https?://
(www\.spike\.com/(video-clips|episodes)/.+|
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
'''
_TEST = {
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
'md5': '1a9265f32b0c375793d6c4ce45255256',
@@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
}
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
mobile_id = mobj.group('mobile_id')
if mobile_id is not None:
url = 'http://www.spike.com/video-clips/%s' % mobile_id
return super(SpikeIE, self)._real_extract(url)

View File

@@ -57,7 +57,7 @@ class VevoIE(InfoExtractor):
'age_limit': 18,
'title': 'Tunnel Vision (Explicit)',
'uploader': 'Justin Timberlake',
'upload_date': '20130704',
'upload_date': '20130703',
},
'params': {
'skip_download': 'true',
@@ -169,7 +169,7 @@ class VevoIE(InfoExtractor):
timestamp_ms = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
upload_date = datetime.datetime.utcfromtimestamp(timestamp_ms // 1000)
return {
'id': video_id,
'title': video_info['title'],

View File

@@ -10,10 +10,9 @@ from ..utils import (
)
class VestiIE(InfoExtractor):
IE_NAME = 'vesti'
IE_DESC = 'Вести.Ru'
_VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia\.tv)/(?P<id>.+)'
class VGTRKIE(InfoExtractor):
IE_DESC = 'ВГТРК'
_VALID_URL = r'http://(?:.+?\.)?(?:vesti\.ru|russia2?\.tv|tvkultura\.ru|rutv\.ru)/(?P<id>.+)'
_TESTS = [
{
@@ -72,6 +71,35 @@ class VestiIE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
'info_dict': {
'id': '766403',
'ext': 'mp4',
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
'duration': 271,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside Russia',
},
{
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
'info_dict': {
'id': '51499',
'ext': 'flv',
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Translation has finished'
},
{
'url': 'http://russia.tv/video/show/brand_id/5169/episode_id/970443/video_id/975648',
'info_dict': {
@@ -101,34 +129,48 @@ class VestiIE(InfoExtractor):
},
},
{
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
'url': 'http://2.russia.tv/video/show/brand_id/48863/episode_id/972920/video_id/978667/viewtype/picture',
'info_dict': {
'id': '766403',
'id': '775081',
'ext': 'mp4',
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
'duration': 271,
'title': 'XXII зимние Олимпийские игры. Россияне заняли весь пьедестал в лыжных гонках',
'description': 'md5:15d3741dd8d04b203fbc031c6a47fb0f',
'duration': 101,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside Russia'
'skip': 'Blocked outside Russia',
},
{
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
'url': 'http://tvkultura.ru/video/show/brand_id/31724/episode_id/972347/video_id/978186',
'info_dict': {
'id': '51499',
'ext': 'flv',
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
'id': '774471',
'ext': 'mp4',
'title': 'Монологи на все времена',
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
'duration': 2906,
},
'params': {
# rtmp download
# m3u8 download
'skip_download': True,
},
'skip': 'Translation has finished'
}
},
{
'url': 'http://rutv.ru/brand/show/id/6792/channel/75',
'info_dict': {
'id': '125521',
'ext': 'mp4',
'title': 'Грустная дама червей. Х',
'description': '',
'duration': 4882,
},
'params': {
# m3u8 download
'skip_download': True,
},
},
]
def _real_extract(self, url):

View File

@@ -1,22 +1,23 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .internetvideoarchive import InternetVideoArchiveIE
from ..utils import (
compat_urlparse,
)
from ..utils import compat_urlparse
class VideoDetectiveIE(InfoExtractor):
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
u'file': u'194487.mp4',
u'info_dict': {
u'title': u'KICK-ASS 2',
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
u'duration': 135,
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
'info_dict': {
'id': '194487',
'ext': 'mp4',
'title': 'KICK-ASS 2',
'description': 'md5:65ba37ad619165afac7d432eaded6013',
'duration': 135,
},
}
@@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage)
query = compat_urlparse.urlparse(og_video).query
return self.url_result(InternetVideoArchiveIE._build_url(query),
ie=InternetVideoArchiveIE.ie_key())
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())

View File

@@ -8,6 +8,7 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
clean_html,
@@ -172,7 +173,18 @@ class VimeoIE(SubtitlesInfoExtractor):
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
webpage = self._download_webpage(request, video_id)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
raise ExtractorError(
'Cannot download embed-only video without embedding '
'URL. Please call youtube-dl with the URL of the page '
'that embeds this video.',
expected=True)
raise
# Now we begin extracting as much information as we can from what we
# retrieved. First we extract the information common to all extractors,

View File

@@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
_TEST = {
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
'md5': 'f81dcf6d0448e3291f54380181695821',
'md5': 'db7aba89d4603dadd627e9d1973946fe',
'info_dict': {
'id': 'YL2qNPkqon',
'ext': 'mp4',
@@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
}
}

View File

@@ -1,55 +1,49 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class XNXXIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
_TEST = {
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
u'file': u'1135332.flv',
u'md5': u'0831677e2b4761795f68d417e0b7b445',
u'info_dict': {
u"title": u"lida \u00bb Naked Funny Actress (5)",
u"age_limit": 18,
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
'md5': '0831677e2b4761795f68d417e0b7b445',
'info_dict': {
'id': '1135332',
'ext': 'flv',
'title': 'lida » Naked Funny Actress (5)',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
video_id = mobj.group('id')
# Get webpage content
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(self.VIDEO_URL_RE,
webpage, u'video URL')
video_url = self._search_regex(r'flv_url=(.*?)&amp;',
webpage, 'video URL')
video_url = compat_urllib_parse.unquote(video_url)
video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
webpage, u'title')
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
webpage, 'title')
video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
webpage, u'thumbnail', fatal=False)
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
webpage, 'thumbnail', fatal=False)
return [{
return {
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
'description': None,
'age_limit': 18,
}]
}

View File

@@ -1285,10 +1285,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Decide which formats to download
try:
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
ytplayer_config = json.loads(mobj.group(1))
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted

View File

@@ -22,6 +22,7 @@ import struct
import subprocess
import sys
import traceback
import xml.etree.ElementTree
import zlib
try:
@@ -1267,3 +1268,13 @@ def read_batch_urls(batch_fd):
def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
def parse_xml(s):
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
def doctype(self, name, pubid, system):
pass # Ignore doctypes
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.07.1'
__version__ = '2014.03.11'