Compare commits
44 Commits
2014.09.15
...
2014.09.18
Author | SHA1 | Date | |
---|---|---|---|
|
fd78a4d3e6 | ||
|
1de33fafd9 | ||
|
e2e5dae64d | ||
|
09b23c902b | ||
|
109a540e7a | ||
|
2914e5f00f | ||
|
2f834e9381 | ||
|
9296738f20 | ||
|
0e59b9fffb | ||
|
67abbe9527 | ||
|
944a3de278 | ||
|
5a13fe9ed2 | ||
|
6b6096d0b7 | ||
|
d0246d07f1 | ||
|
727a98c3ee | ||
|
997987d568 | ||
|
c001f939e4 | ||
|
e825c38082 | ||
|
a04aa7a9e6 | ||
|
7cdd5339b3 | ||
|
38349518f1 | ||
|
64892c0b79 | ||
|
dc9f356846 | ||
|
ed86ee3b4a | ||
|
7bb5df1cda | ||
|
37a81dff04 | ||
|
fc96eb4e21 | ||
|
ae369738b0 | ||
|
e2037b3f7d | ||
|
5419033935 | ||
|
2eebf060af | ||
|
acd9db5902 | ||
|
d0e8b3d59b | ||
|
c15dd15388 | ||
|
0003a5c416 | ||
|
21f2927f70 | ||
|
e5a79071a5 | ||
|
6d1f2431bd | ||
|
fdea3abdf8 | ||
|
98703c7fbf | ||
|
2bca84e345 | ||
|
984e8e14ea | ||
|
d05cfe0600 | ||
|
37419b4f99 |
15
README.md
15
README.md
@@ -227,12 +227,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18".
|
||||
"-f mp4" and "-f flv" are also supported.
|
||||
You can also use the special names "best",
|
||||
"bestvideo", "bestaudio", "worst",
|
||||
"worstvideo" and "worstaudio". By default,
|
||||
youtube-dl will pick the best quality.
|
||||
preference using slashes: -f 22/17/18 . -f
|
||||
mp4 , -f m4a and -f flv are also
|
||||
supported. You can also use the special
|
||||
names "best", "bestvideo", "bestaudio",
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
|
@@ -41,6 +41,8 @@ from youtube_dl.utils import (
|
||||
strip_jsonp,
|
||||
uppercase_escape,
|
||||
limit_length,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -294,5 +296,34 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
self.assertEqual(escape_rfc3986(reserved), reserved)
|
||||
self.assertEqual(escape_rfc3986(unreserved), unreserved)
|
||||
self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||
|
||||
def test_escape_url(self):
|
||||
self.assertEqual(
|
||||
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -28,6 +28,7 @@ from .utils import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@@ -707,7 +708,7 @@ class YoutubeDL(object):
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
@@ -808,28 +809,29 @@ class YoutubeDL(object):
|
||||
if req_format in ('-1', 'all'):
|
||||
formats_to_download = formats
|
||||
else:
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = req_format.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
}
|
||||
for rfstr in req_format.split(','):
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = rfstr.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download = [selected_format]
|
||||
break
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download.append(selected_format)
|
||||
break
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
@@ -1241,6 +1243,25 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
url = req if isinstance(req, compat_str) else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if isinstance(req, compat_str):
|
||||
req = url_escaped
|
||||
else:
|
||||
req = compat_urllib_request.Request(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@@ -75,6 +75,9 @@ __authors__ = (
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
'Hari Padmanaban',
|
||||
'Carlos Ramos',
|
||||
'5moufl',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -6,6 +6,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allmyvideos import AllmyvideosIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
@@ -25,6 +26,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
@@ -83,6 +85,7 @@ from .dropbox import DropboxIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
@@ -365,6 +368,7 @@ from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
@@ -389,6 +393,7 @@ from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
|
59
youtube_dl/extractor/allmyvideos.py
Normal file
59
youtube_dl/extractor/allmyvideos.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class AllmyvideosIE(InfoExtractor):
|
||||
IE_NAME = 'allmyvideos.net'
|
||||
_VALID_URL = r'https?://allmyvideos\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
#Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
# Assume the links are ordered in quality
|
||||
formats = [{
|
||||
'url': l,
|
||||
'quality': i,
|
||||
} for i, l in enumerate(links)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@@ -51,9 +51,6 @@ class ARDMediathekIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
|
53
youtube_dl/extractor/behindkink.py
Normal file
53
youtube_dl/extractor/behindkink.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||
'info_dict': {
|
||||
'id': '36370',
|
||||
'ext': 'mp4',
|
||||
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||
'upload_date': '20140814',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
year = mobj.group('year')
|
||||
month = mobj.group('month')
|
||||
day = mobj.group('day')
|
||||
upload_date = year + month + day
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"'file':\s*'([^']+)'",
|
||||
webpage, 'URL base')
|
||||
|
||||
video_id = url_basename(video_url)
|
||||
video_id = video_id.split('_')[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -11,10 +11,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
'info_dict': {
|
||||
'id': '52554690',
|
||||
@@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
|
||||
'upload_date': '20130831',
|
||||
'duration': 3868,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(
|
||||
@@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
for format_el in urls.findall('result/output_list/output_list'):
|
||||
profile = format_el.attrib['profile']
|
||||
@@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
|
||||
})
|
||||
url_doc = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note=False)
|
||||
video_id, note='Downloading video data for %s format' % profile)
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list, # NSFW
|
||||
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
cats_str = self._html_search_regex(
|
||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
||||
categories = None if cats_str is None else cats_str.split(' ')
|
||||
cats_str = self._search_regex(
|
||||
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
61
youtube_dl/extractor/einthusan.py
Normal file
61
youtube_dl/extractor/einthusan.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ek Villain',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
|
||||
webpage, 'video url')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||
webpage, "thumbnail url", fatal=False)
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
@@ -8,45 +8,68 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
compat_urllib_parse_urlparse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id):
|
||||
info = self._download_xml(
|
||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||
'getInfosOeuvre.php?id-diffusion='
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
def _extract_video(self, video_id, catalogue):
|
||||
info = self._download_json(
|
||||
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
||||
% (video_id, catalogue),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
continue
|
||||
video_url = video['url']
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = video['format']
|
||||
if video_url.endswith('.f4m'):
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
for f4m_format in f4m_formats:
|
||||
f4m_format['preference'] = 1
|
||||
formats.extend(f4m_formats)
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'preference': 1,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'preference': 2,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
return self._extract_video(video_id)
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
@@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'md5': '9cecf35f99c4079c199e9817882a9a1c',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Soir 3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
@@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
}
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
@@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
# france2
|
||||
{
|
||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
'file': '75540104.mp4',
|
||||
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
||||
'info_dict': {
|
||||
'title': '13h15, le samedi...',
|
||||
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': '109169362',
|
||||
'ext': 'flv',
|
||||
'title': '13h15, le dimanche...',
|
||||
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
||||
'upload_date': '20140914',
|
||||
'timestamp': 1410693600,
|
||||
},
|
||||
},
|
||||
# france3
|
||||
{
|
||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
||||
'info_dict': {
|
||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le scandale du prix des médicaments',
|
||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131113',
|
||||
'timestamp': 1384380000,
|
||||
},
|
||||
},
|
||||
# france4
|
||||
{
|
||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
||||
'info_dict': {
|
||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hero Corp Making of - Extrait 1',
|
||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131106',
|
||||
'timestamp': 1383766500,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
||||
'info_dict': {
|
||||
'id': '92837968',
|
||||
'ext': 'mp4',
|
||||
'id': '108961659',
|
||||
'ext': 'flv',
|
||||
'title': 'C à dire ?!',
|
||||
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410795000,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||
'info_dict': {
|
||||
'id': '92327925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infô-Afrique',
|
||||
'id': '108634970',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô Afrique',
|
||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410822000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The id changes frequently',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('key'):
|
||||
webpage = self._download_webpage(url, mobj.group('key'))
|
||||
id_res = [
|
||||
(r'''(?x)<div\s+class="video-player">\s*
|
||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||
class="francetv-video-player">'''),
|
||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||
'\.fr/\?id-video=([^"/&]+)'),
|
||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||
]
|
||||
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_video(video_id)
|
||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationQuoiIE(InfoExtractor):
|
||||
@@ -232,16 +243,15 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
||||
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
||||
'info_dict': {
|
||||
'id': 'EV_6785',
|
||||
'ext': 'mp4',
|
||||
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': 'EV_22853',
|
||||
'ext': 'flv',
|
||||
'title': 'Dans les jardins de William Christie - Le Camus',
|
||||
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
||||
'upload_date': '20140829',
|
||||
'timestamp': 1409317200,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
@@ -877,7 +877,7 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
webpage)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
|
@@ -71,6 +71,7 @@ class IGNIE(InfoExtractor):
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [
|
||||
r'"video_id"\s*:\s*"(.*?)"',
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
@@ -85,7 +86,7 @@ class IGNIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type != 'video':
|
||||
multiple_urls = re.findall(
|
||||
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
webpage)
|
||||
if multiple_urls:
|
||||
return [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||
@@ -111,13 +112,13 @@ class IGNIE(InfoExtractor):
|
||||
|
||||
|
||||
class OneUPIE(IGNIE):
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
||||
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||
'info_dict': {
|
||||
'id': '34976',
|
||||
|
@@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
'info_dict': {
|
||||
'id': '453614',
|
||||
@@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'duration': 18,
|
||||
'upload_date': '20131006',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -8,11 +8,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
||||
_find = lambda el, p: el.find(_x(p)).text.strip()
|
||||
|
||||
|
||||
class NosVideoIE(InfoExtractor):
|
||||
@@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor):
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(_x('.//xspf:track'))
|
||||
title = _find(track, './xspf:title')
|
||||
url = _find(track, './xspf:file')
|
||||
thumbnail = _find(track, './xspf:image')
|
||||
if track is None:
|
||||
raise ExtractorError(
|
||||
'XML playlist is missing the \'track\' element',
|
||||
expected=True)
|
||||
title = xpath_text(track, _x('./xspf:title'), 'title')
|
||||
url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
|
||||
thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
|
||||
if title is not None:
|
||||
title = title.strip()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
qualities,
|
||||
)
|
||||
|
||||
@@ -13,17 +14,43 @@ class NPOIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl'
|
||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'm4v',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'm4v',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
|
||||
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
|
||||
'info_dict': {
|
||||
'id': 'VARA_101191800',
|
||||
'ext': 'm4v',
|
||||
'title': 'De Mega Mike & Mega Thomas show',
|
||||
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
||||
'upload_date': '20090227',
|
||||
'duration': 2400,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
|
||||
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'upload_date': '20130225',
|
||||
'duration': 3000,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -43,19 +70,28 @@ class NPOIE(InfoExtractor):
|
||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||
|
||||
formats = []
|
||||
quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
|
||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||
for format_id in metadata['pubopties']:
|
||||
streams_info = self._download_json(
|
||||
format_info = self._download_json(
|
||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
||||
video_id, 'Downloading %s streams info' % format_id)
|
||||
stream_info = self._download_json(
|
||||
streams_info['streams'][0] + '&type=json',
|
||||
video_id, 'Downloading %s stream info' % format_id)
|
||||
video_id, 'Downloading %s JSON' % format_id)
|
||||
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
||||
continue
|
||||
streams = format_info.get('streams')
|
||||
if streams:
|
||||
video_info = self._download_json(
|
||||
streams[0] + '&type=json',
|
||||
video_id, 'Downloading %s stream JSON' % format_id)
|
||||
else:
|
||||
video_info = format_info
|
||||
video_url = video_info.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
if format_id == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream_info['url'],
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
@@ -65,7 +101,8 @@ class NPOIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': metadata['titel'],
|
||||
'description': metadata['info'],
|
||||
'thumbnail': metadata['images'][-1]['url'],
|
||||
'upload_date': unified_strdate(metadata['gidsdatum']),
|
||||
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
|
||||
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor):
|
||||
(?!sets/|likes/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
|
||||
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
@@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor):
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
{
|
||||
'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
|
||||
'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
|
||||
'info_dict': {
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
||||
@@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor):
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
token = mobj.group('secret_token')
|
||||
if token:
|
||||
info_json_url += "&secret_token=" + token
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0])
|
||||
@@ -220,7 +238,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
_TESTS = [{
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||
@@ -234,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
uploader = mobj.group('uploader')
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
slug_title = mobj.group('slug_title')
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
token = mobj.group('token')
|
||||
if token:
|
||||
full_title += '/' + token
|
||||
url += '/' + token
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = self._resolv_url(url)
|
||||
info = self._download_json(resolv_url, full_title)
|
||||
|
||||
@@ -252,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
@@ -315,7 +338,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [
|
||||
|
||||
@@ -335,14 +358,21 @@ class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
playlist_id = mobj.group('id')
|
||||
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
||||
|
||||
data = compat_urllib_parse.urlencode({
|
||||
data_dict = {
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
}
|
||||
token = mobj.group('token')
|
||||
|
||||
if token:
|
||||
data_dict['secret_token'] = token
|
||||
|
||||
data = compat_urllib_parse.urlencode(data_dict)
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True) for t in data['tracks']]
|
||||
self._extract_info_dict(t, quiet=True, secret_token=token)
|
||||
for t in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
67
youtube_dl/extractor/turbo.py
Normal file
67
youtube_dl/extractor/turbo.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class TurboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
|
||||
_API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
|
||||
'md5': '33f4b91099b36b5d5a91f84b5bcba600',
|
||||
'info_dict': {
|
||||
'id': '454443',
|
||||
'ext': 'mp4',
|
||||
'duration': 3715,
|
||||
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist = self._download_xml(self._API_URL.format(video_id), video_id)
|
||||
item = playlist.find('./channel/item')
|
||||
if item is None:
|
||||
raise ExtractorError('Playlist item was not found', expected=True)
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
duration = int_or_none(xpath_text(item, './durate', 'duration'))
|
||||
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['3g', 'sd', 'hq'])
|
||||
for child in item:
|
||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||
if m:
|
||||
quality = m.group('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': child.text,
|
||||
'quality': get_quality(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
57
youtube_dl/extractor/videomega.py
Normal file
57
youtube_dl/extractor/videomega.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?videomega\.tv/
|
||||
(?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://videomega.tv/?ref=GKeGPVedBe',
|
||||
'md5': '240fb5bcf9199961f48eb17839b084d6',
|
||||
'info_dict': {
|
||||
'id': 'GKeGPVedBe',
|
||||
'ext': 'mp4',
|
||||
'title': 'XXL - All Sports United',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
escaped_data = self._search_regex(
|
||||
r'unescape\("([^"]+)"\)', webpage, 'escaped data')
|
||||
playlist = compat_urllib_parse.unquote(escaped_data)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
|
||||
url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
|
||||
help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
|
@@ -1437,6 +1437,24 @@ def uppercase_escape(s):
|
||||
lambda m: unicode_escape(m.group(0))[0],
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
"""Escape URL as suggested by RFC 3986"""
|
||||
url_parsed = compat_urllib_parse_urlparse(url)
|
||||
return url_parsed._replace(
|
||||
path=escape_rfc3986(url_parsed.path),
|
||||
params=escape_rfc3986(url_parsed.params),
|
||||
query=escape_rfc3986(url_parsed.query),
|
||||
fragment=escape_rfc3986(url_parsed.fragment)
|
||||
).geturl()
|
||||
|
||||
try:
|
||||
struct.pack(u'!I', 0)
|
||||
except TypeError:
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.15'
|
||||
__version__ = '2014.09.18'
|
||||
|
Reference in New Issue
Block a user