Compare commits

...

8 Commits

13 changed files with 183 additions and 29 deletions

View File

@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
on Windows) on Windows)
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract the videos of a playlist,
only list them. only list them.
--no-color Do not emit color codes in output.
## Network Options: ## Network Options:
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in --proxy URL Use the specified HTTP/HTTPS proxy. Pass in

View File

@ -225,6 +225,7 @@
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **Malemotion** - **Malemotion**
- **MDR** - **MDR**
- **media.ccc.de**
- **metacafe** - **metacafe**
- **Metacritic** - **Metacritic**
- **Mgoon** - **Mgoon**

View File

@ -8,11 +8,11 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io import io
import re import re
import string import string
from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
from youtube_dl.compat import compat_str, compat_urlretrieve from youtube_dl.compat import compat_str, compat_urlretrieve
@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
if not os.path.exists(fn): if not os.path.exists(fn):
compat_urlretrieve(url, fn) compat_urlretrieve(url, fn)
ie = YoutubeIE() ydl = FakeYDL()
ie = YoutubeIE(ydl)
if stype == 'js': if stype == 'js':
with io.open(fn, encoding='utf-8') as testf: with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read() jscode = testf.read()

View File

@ -233,6 +233,7 @@ class YoutubeDL(object):
If it returns a message, the video is ignored. If it returns a message, the video is ignored.
If it returns None, the video is downloaded. If it returns None, the video is downloaded.
match_filter_func in utils.py is one example for this. match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
@ -490,7 +491,7 @@ class YoutubeDL(object):
else: else:
if self.params.get('no_warnings'): if self.params.get('no_warnings'):
return return
if self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'
@ -502,7 +503,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file. in red if stderr is a tty file.
''' '''
if self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m' _msg_header = '\033[0;31mERROR:\033[0m'
else: else:
_msg_header = 'ERROR:' _msg_header = 'ERROR:'

View File

@ -349,6 +349,7 @@ def _real_main(argv=None):
'playlist_items': opts.playlist_items, 'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize, 'xattr_set_filesize': opts.xattr_set_filesize,
'match_filter': match_filter, 'match_filter': match_filter,
'no_color': opts.no_color,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:

View File

@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE from .cbsnews import CBSNewsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE

View File

@ -0,0 +1,99 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
qualities,
unified_strdate,
)
class CCCIE(InfoExtractor):
IE_NAME = 'media.ccc.de'
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
_TEST = {
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
'info_dict': {
'id': '20131228183',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'upload_date': '20131229',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if self._downloader.params.get('prefer_free_formats'):
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
else:
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r"(?s)<p class='description'>(.*?)</p>",
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
<a\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = []
for m in matches:
format = m.group('format')
format_id = self._search_regex(
r'.*/([a-z0-9_-]+)/[^/]*$',
m.group('http_url'), 'format id', default=None)
vcodec = 'h264' if 'h264' in format_id else (
'none' if format_id in ('mp3', 'opus') else None
)
formats.append({
'format_id': format_id,
'format': format,
'url': m.group('http_url'),
'vcodec': vcodec,
'preference': preference(format_id),
})
if m.group('torrent_url'):
formats.append({
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
'format': '%s (torrent)' % format,
'proto': 'torrent',
'format_note': '(unsupported; will just download the .torrent file)',
'vcodec': vcodec,
'preference': -100 + preference(format_id),
'url': m.group('torrent_url'),
})
self._sort_formats(formats)
thumbnail = self._html_search_regex(
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'upload_date': upload_date,
'formats': formats,
}

View File

@ -514,7 +514,7 @@ class InfoExtractor(object):
if mobj: if mobj:
break break
if os.name != 'nt' and sys.stderr.isatty(): if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name _name = '\033[0;34m%s\033[0m' % name
else: else:
_name = name _name = name

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id) page = self._download_webpage(url, video_id)
title = self._html_search_meta('twitter:title', page, 'title')
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
description = self._html_search_meta('twitter:description', page, 'title') description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml( data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
'height': int(details.find('./height').text.strip()), 'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text } for details in item.findall('./source/file_details') if details.find('./file').text
] ]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View File

@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
quality = qualities(['sd', 'hd']) quality = qualities(['sd', 'hd'])
sources = json.loads(js_to_json(self._search_regex( sources = json.loads(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
webpage, 'sources')))
formats = [] formats = []
for container, s in sources.items(): for qname, video_url in sources.items():
for qname, video_url in s.items(): if not video_url:
formats.append({ continue
'url': video_url, formats.append({
'container': container, 'url': video_url,
'format_id': '%s-%s' % (container, qname), 'format_id': qname,
'quality': quality(qname), 'quality': quality(qname),
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
password_request = compat_urllib_request.Request(pass_url + '/password', data) password_request = compat_urllib_request.Request(pass_url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token) password_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(password_request, video_id, return self._download_webpage(
'Verifying the password', password_request, video_id,
'Wrong password') 'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id): def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
if re.search('<form[^>]+?id="pw_form"', webpage) is not None: if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
self._verify_video_password(url, video_id, webpage) self._verify_video_password(url, video_id, webpage)
return self._real_extract(url) return self._real_extract(url)
else: else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
def _extract_list_title(self, webpage): def _extract_list_title(self, webpage):
return self._html_search_regex(self._TITLE_RE, webpage, 'list title') return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
def _login_list_password(self, page_url, list_id, webpage):
login_form = self._search_regex(
r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
webpage, 'login form', default=None)
if not login_form:
return webpage
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
value="([^"]*)"
''', login_form))
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
fields['token'] = token
fields['password'] = password
post = compat_urllib_parse.urlencode(fields)
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request.Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
self._set_cookie('vimeo.com', 'xsrft', token)
return self._download_webpage(
password_request, list_id,
'Verifying the password', 'Wrong password')
def _extract_videos(self, list_id, base_url): def _extract_videos(self, list_id, base_url):
video_ids = [] video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage( webpage = self._download_webpage(
self._page_url(base_url, pagenum), list_id, page_url, list_id,
'Downloading page %s' % pagenum) 'Downloading page %s' % pagenum)
if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
'title': 'Staff Favorites: November 2013', 'title': 'Staff Favorites: November 2013',
}, },
'playlist_mincount': 13, 'playlist_mincount': 13,
}, {
'note': 'Password-protected album',
'url': 'https://vimeo.com/album/3253534',
'info_dict': {
'title': 'test',
'id': '3253534',
},
'playlist_count': 1,
'params': {
'videopassword': 'youtube-dl',
}
}] }]
def _page_url(self, base_url, pagenum): def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum) return '%s/page:%d/' % (base_url, pagenum)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) album_id = self._match_id(url)
album_id = mobj.group('id')
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)

View File

@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
action='store_const', dest='extract_flat', const='in_playlist', action='store_const', dest='extract_flat', const='in_playlist',
default=False, default=False,
help='Do not extract the videos of a playlist, only list them.') help='Do not extract the videos of a playlist, only list them.')
general.add_option(
'--no-color', '--no-colors',
action='store_true', dest='no_color',
default=False,
help='Do not emit color codes in output.')
network = optparse.OptionGroup(parser, 'Network Options') network = optparse.OptionGroup(parser, 'Network Options')
network.add_option( network.add_option(
@ -552,7 +557,7 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='youtube_print_sig_code', default=False, action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
verbosity.add_option( verbosity.add_option(
'--print-traffic', '--print-traffic', '--dump-headers',
dest='debug_printtraffic', action='store_true', default=False, dest='debug_printtraffic', action='store_true', default=False,
help='Display sent and read HTTP traffic') help='Display sent and read HTTP traffic')
verbosity.add_option( verbosity.add_option(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.10.2' __version__ = '2015.02.10.3'