Compare commits
8 Commits
2015.02.10
...
2015.02.10
Author | SHA1 | Date | |
---|---|---|---|
04bbe41330 | |||
8f84f57183 | |||
6a78740211 | |||
c0e1a415fd | |||
bf8f082a90 | |||
2f543a2142 | |||
7e5db8c930 | |||
f7a211dcc8 |
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
on Windows)
|
on Windows)
|
||||||
--flat-playlist Do not extract the videos of a playlist,
|
--flat-playlist Do not extract the videos of a playlist,
|
||||||
only list them.
|
only list them.
|
||||||
|
--no-color Do not emit color codes in output.
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||||
|
@ -225,6 +225,7 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **Malemotion**
|
- **Malemotion**
|
||||||
- **MDR**
|
- **MDR**
|
||||||
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
|
@ -8,11 +8,11 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
|||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
compat_urlretrieve(url, fn)
|
compat_urlretrieve(url, fn)
|
||||||
|
|
||||||
ie = YoutubeIE()
|
ydl = FakeYDL()
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
if stype == 'js':
|
if stype == 'js':
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
jscode = testf.read()
|
jscode = testf.read()
|
||||||
|
@ -233,6 +233,7 @@ class YoutubeDL(object):
|
|||||||
If it returns a message, the video is ignored.
|
If it returns a message, the video is ignored.
|
||||||
If it returns None, the video is downloaded.
|
If it returns None, the video is downloaded.
|
||||||
match_filter_func in utils.py is one example for this.
|
match_filter_func in utils.py is one example for this.
|
||||||
|
no_color: Do not emit color codes in output.
|
||||||
|
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
@ -490,7 +491,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
if self.params.get('no_warnings'):
|
if self.params.get('no_warnings'):
|
||||||
return
|
return
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'WARNING:'
|
_msg_header = 'WARNING:'
|
||||||
@ -502,7 +503,7 @@ class YoutubeDL(object):
|
|||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
'''
|
'''
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'ERROR:'
|
_msg_header = 'ERROR:'
|
||||||
|
@ -349,6 +349,7 @@ def _real_main(argv=None):
|
|||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
'match_filter': match_filter,
|
'match_filter': match_filter,
|
||||||
|
'no_color': opts.no_color,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
|
99
youtube_dl/extractor/ccc.py
Normal file
99
youtube_dl/extractor/ccc.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CCCIE(InfoExtractor):
|
||||||
|
IE_NAME = 'media.ccc.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
|
||||||
|
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20131228183',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction to Processor Design',
|
||||||
|
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'view_count': int,
|
||||||
|
'upload_date': '20131229',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('prefer_free_formats'):
|
||||||
|
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||||
|
else:
|
||||||
|
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r"(?s)<p class='description'>(.*?)</p>",
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
matches = re.finditer(r'''(?xs)
|
||||||
|
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
|
||||||
|
<a\s+href='(?P<http_url>[^']+)'>\s*
|
||||||
|
(?:
|
||||||
|
.*?
|
||||||
|
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
|
||||||
|
)?''', webpage)
|
||||||
|
formats = []
|
||||||
|
for m in matches:
|
||||||
|
format = m.group('format')
|
||||||
|
format_id = self._search_regex(
|
||||||
|
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||||
|
m.group('http_url'), 'format id', default=None)
|
||||||
|
vcodec = 'h264' if 'h264' in format_id else (
|
||||||
|
'none' if format_id in ('mp3', 'opus') else None
|
||||||
|
)
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'format': format,
|
||||||
|
'url': m.group('http_url'),
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': preference(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
if m.group('torrent_url'):
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||||
|
'format': '%s (torrent)' % format,
|
||||||
|
'proto': 'torrent',
|
||||||
|
'format_note': '(unsupported; will just download the .torrent file)',
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': -100 + preference(format_id),
|
||||||
|
'url': m.group('torrent_url'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -514,7 +514,7 @@ class InfoExtractor(object):
|
|||||||
if mobj:
|
if mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
if os.name != 'nt' and sys.stderr.isatty():
|
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
page = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_meta('twitter:title', page, 'title')
|
|
||||||
|
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
|
||||||
description = self._html_search_meta('twitter:description', page, 'title')
|
description = self._html_search_meta('twitter:description', page, 'title')
|
||||||
|
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
|
|||||||
'height': int(details.find('./height').text.strip()),
|
'height': int(details.find('./height').text.strip()),
|
||||||
} for details in item.findall('./source/file_details') if details.find('./file').text
|
} for details in item.findall('./source/file_details') if details.find('./file').text
|
||||||
]
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = qualities(['sd', 'hd'])
|
quality = qualities(['sd', 'hd'])
|
||||||
sources = json.loads(js_to_json(self._search_regex(
|
sources = json.loads(js_to_json(self._search_regex(
|
||||||
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
|
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
|
||||||
|
webpage, 'sources')))
|
||||||
formats = []
|
formats = []
|
||||||
for container, s in sources.items():
|
for qname, video_url in sources.items():
|
||||||
for qname, video_url in s.items():
|
if not video_url:
|
||||||
formats.append({
|
continue
|
||||||
'url': video_url,
|
formats.append({
|
||||||
'container': container,
|
'url': video_url,
|
||||||
'format_id': '%s-%s' % (container, qname),
|
'format_id': qname,
|
||||||
'quality': quality(qname),
|
'quality': quality(qname),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
||||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||||
self._download_webpage(password_request, video_id,
|
return self._download_webpage(
|
||||||
'Verifying the password',
|
password_request, video_id,
|
||||||
'Wrong password')
|
'Verifying the password', 'Wrong password')
|
||||||
|
|
||||||
def _verify_player_video_password(self, url, video_id):
|
def _verify_player_video_password(self, url, video_id):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||||
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
||||||
|
|
||||||
if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
|
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||||
self._verify_video_password(url, video_id, webpage)
|
self._verify_video_password(url, video_id, webpage)
|
||||||
return self._real_extract(url)
|
return self._real_extract(url)
|
||||||
else:
|
else:
|
||||||
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
def _extract_list_title(self, webpage):
|
def _extract_list_title(self, webpage):
|
||||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||||
|
|
||||||
|
def _login_list_password(self, page_url, list_id, webpage):
|
||||||
|
login_form = self._search_regex(
|
||||||
|
r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
|
||||||
|
webpage, 'login form', default=None)
|
||||||
|
if not login_form:
|
||||||
|
return webpage
|
||||||
|
|
||||||
|
password = self._downloader.params.get('videopassword', None)
|
||||||
|
if password is None:
|
||||||
|
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||||
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
|
type="hidden"\s+
|
||||||
|
name="([^"]+)"\s+
|
||||||
|
value="([^"]*)"
|
||||||
|
''', login_form))
|
||||||
|
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||||
|
fields['token'] = token
|
||||||
|
fields['password'] = password
|
||||||
|
post = compat_urllib_parse.urlencode(fields)
|
||||||
|
password_path = self._search_regex(
|
||||||
|
r'action="([^"]+)"', login_form, 'password URL')
|
||||||
|
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||||
|
password_request = compat_urllib_request.Request(password_url, post)
|
||||||
|
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
self._set_cookie('vimeo.com', 'xsrft', token)
|
||||||
|
|
||||||
|
return self._download_webpage(
|
||||||
|
password_request, list_id,
|
||||||
|
'Verifying the password', 'Wrong password')
|
||||||
|
|
||||||
def _extract_videos(self, list_id, base_url):
|
def _extract_videos(self, list_id, base_url):
|
||||||
video_ids = []
|
video_ids = []
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
|
page_url = self._page_url(base_url, pagenum)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
self._page_url(base_url, pagenum), list_id,
|
page_url, list_id,
|
||||||
'Downloading page %s' % pagenum)
|
'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
|
if pagenum == 1:
|
||||||
|
webpage = self._login_list_password(page_url, list_id, webpage)
|
||||||
|
|
||||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
break
|
break
|
||||||
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
|
|||||||
'title': 'Staff Favorites: November 2013',
|
'title': 'Staff Favorites: November 2013',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13,
|
'playlist_mincount': 13,
|
||||||
|
}, {
|
||||||
|
'note': 'Password-protected album',
|
||||||
|
'url': 'https://vimeo.com/album/3253534',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'test',
|
||||||
|
'id': '3253534',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'params': {
|
||||||
|
'videopassword': 'youtube-dl',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _page_url(self, base_url, pagenum):
|
def _page_url(self, base_url, pagenum):
|
||||||
return '%s/page:%d/' % (base_url, pagenum)
|
return '%s/page:%d/' % (base_url, pagenum)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
album_id = self._match_id(url)
|
||||||
album_id = mobj.group('id')
|
|
||||||
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_const', dest='extract_flat', const='in_playlist',
|
action='store_const', dest='extract_flat', const='in_playlist',
|
||||||
default=False,
|
default=False,
|
||||||
help='Do not extract the videos of a playlist, only list them.')
|
help='Do not extract the videos of a playlist, only list them.')
|
||||||
|
general.add_option(
|
||||||
|
'--no-color', '--no-colors',
|
||||||
|
action='store_true', dest='no_color',
|
||||||
|
default=False,
|
||||||
|
help='Do not emit color codes in output.')
|
||||||
|
|
||||||
network = optparse.OptionGroup(parser, 'Network Options')
|
network = optparse.OptionGroup(parser, 'Network Options')
|
||||||
network.add_option(
|
network.add_option(
|
||||||
@ -552,7 +557,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'--print-traffic',
|
'--print-traffic', '--dump-headers',
|
||||||
dest='debug_printtraffic', action='store_true', default=False,
|
dest='debug_printtraffic', action='store_true', default=False,
|
||||||
help='Display sent and read HTTP traffic')
|
help='Display sent and read HTTP traffic')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.02.10.2'
|
__version__ = '2015.02.10.3'
|
||||||
|
Reference in New Issue
Block a user