Compare commits

...

25 Commits

Author SHA1 Message Date
5277f09dfc release 2015.02.11 2015-02-11 19:02:39 +01:00
2d30521ab9 [youtube] Extract average rating (closes #2362) 2015-02-11 18:39:31 +01:00
050fa43561 flake8: Ignore some error added in pep8 1.6
* E402: we exectute code between imports, like modifying 'sys.path' in the tests
* E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)
2015-02-11 18:15:15 +01:00
f36f92f4da [aes] style: Put __all__ variable at the end of the file 2015-02-11 18:15:15 +01:00
124f3bc67d [dotsub] Fix extraction and modernize 2015-02-11 22:33:03 +06:00
d304209a85 [test/parameters.json] Set 'fixup' to 'never'
The fixed audio files for Youtube have a size lower that the minimum required.
2015-02-11 17:25:04 +01:00
c56d7d899d [dctptv] Skip rtmp download 2015-02-11 22:10:33 +06:00
ea5db8469e [canalplus] Add support for itele.fr URLs (Closes #4931) 2015-02-11 16:21:52 +02:00
3811c567e7 [teamcoco] Fix video id extraction 2015-02-11 15:47:19 +02:00
054fe3cc40 [ntvru] Adapt to new direct delivery and modernize (Closes #4918) 2015-02-10 21:35:34 +06:00
af0d11f244 release 2015.02.10.5 2015-02-10 15:56:04 +01:00
9650885be9 [escapist] Filter video differently (Fixes #4919) 2015-02-10 15:55:51 +01:00
596ac6e31f [escapist] Modernize 2015-02-10 15:45:36 +01:00
612ee37365 release 2015.02.10.4 2015-02-10 11:28:34 +01:00
442c37b7a9 [YoutubeDL] Do not perform filter matching on partial results (Fixes #4921) 2015-02-10 11:28:28 +01:00
04bbe41330 release 2015.02.10.3 2015-02-10 05:42:47 +01:00
8f84f57183 [ccc] Add new extractor (Fixes #4890) 2015-02-10 05:42:41 +01:00
6a78740211 [test/test_youtube_signature] Use fake YDL 2015-02-10 05:28:59 +01:00
c0e1a415fd [firstpost] Modernize 2015-02-10 05:28:48 +01:00
bf8f082a90 [vimeo:album] Add support for album passwords (Fixes #4917) 2015-02-10 04:53:21 +01:00
2f543a2142 [options] Add alias --dump-header for --print-traffic 2015-02-10 04:52:33 +01:00
7e5db8c930 [options] Add --no-color 2015-02-10 04:22:10 +01:00
f7a211dcc8 [pornhd] Fix extraction (fixes #4915) 2015-02-10 03:41:31 +01:00
845734773d release 2015.02.10.2 2015-02-10 03:32:55 +01:00
347de4931c [YoutubeDL] Add generic video filtering (Fixes #4916)
This functionality is intended to eventually encompass the current format filtering.
2015-02-10 03:32:24 +01:00
25 changed files with 478 additions and 143 deletions

View File

@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
on Windows) on Windows)
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract the videos of a playlist,
only list them. only list them.
--no-color Do not emit color codes in output.
## Network Options: ## Network Options:
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in --proxy URL Use the specified HTTP/HTTPS proxy. Pass in
@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like.
COUNT views COUNT views
--max-views COUNT Do not download any videos with more than --max-views COUNT Do not download any videos with more than
COUNT views COUNT views
--match-filter FILTER (Experimental) Generic video filter.
Specify any key (see help for -o for a list
of available keys) to match if the key is
present, !key to check if the key is not
present,key > NUMBER (like "comment_count >
12", also works with >=, <, <=, !=, =) to
compare against a number, and & to require
multiple matches. Values which are not
known are excluded unless you put a
question mark (?) after the operator.For
example, to only match videos that have
been liked more than 100 times and disliked
less than 50 times (or the dislike
functionality is not available at the given
service), but who also have a description,
use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a --no-playlist If the URL refers to a video and a
playlist, download only the video. playlist, download only the video.
--age-limit YEARS download only videos suitable for the given --age-limit YEARS download only videos suitable for the given

View File

@ -225,6 +225,7 @@
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **Malemotion** - **Malemotion**
- **MDR** - **MDR**
- **media.ccc.de**
- **metacafe** - **metacafe**
- **Metacritic** - **Metacritic**
- **Mgoon** - **Mgoon**

View File

@ -3,4 +3,4 @@ universal = True
[flake8] [flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
ignore = E501 ignore = E402,E501,E731

View File

@ -39,5 +39,6 @@
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false, "listssubtitles": false,
"socket_timeout": 20 "socket_timeout": 20,
"fixup": "never"
} }

View File

@ -53,6 +53,7 @@ from youtube_dl.utils import (
version_tuple, version_tuple,
xpath_with_ns, xpath_with_ns,
render_table, render_table,
match_str,
) )
@ -459,6 +460,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'123 4\n' '123 4\n'
'9999 51') '9999 51')
def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
self.assertFalse(match_str('!x', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 0}))
self.assertFalse(match_str('x>0', {'x': 0}))
self.assertFalse(match_str('x>0', {}))
self.assertTrue(match_str('x>?0', {}))
self.assertTrue(match_str('x>1K', {'x': 1200}))
self.assertFalse(match_str('x>2K', {'x': 1200}))
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
self.assertTrue(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -8,11 +8,11 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io import io
import re import re
import string import string
from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE from youtube_dl.extractor import YoutubeIE
from youtube_dl.compat import compat_str, compat_urlretrieve from youtube_dl.compat import compat_str, compat_urlretrieve
@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
if not os.path.exists(fn): if not os.path.exists(fn):
compat_urlretrieve(url, fn) compat_urlretrieve(url, fn)
ie = YoutubeIE() ydl = FakeYDL()
ie = YoutubeIE(ydl)
if stype == 'js': if stype == 'js':
with io.open(fn, encoding='utf-8') as testf: with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read() jscode = testf.read()

View File

@ -228,6 +228,12 @@ class YoutubeDL(object):
external_downloader: Executable of the external downloader to call. external_downloader: Executable of the external downloader to call.
listformats: Print an overview of available video formats and exit. listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit. list_thumbnails: Print a table of all thumbnails and exit.
match_filter: A function that gets called with the info_dict of
every video.
If it returns a message, the video is ignored.
If it returns None, the video is downloaded.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
@ -485,7 +491,7 @@ class YoutubeDL(object):
else: else:
if self.params.get('no_warnings'): if self.params.get('no_warnings'):
return return
if self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'
@ -497,7 +503,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file. in red if stderr is a tty file.
''' '''
if self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m' _msg_header = '\033[0;31mERROR:\033[0m'
else: else:
_msg_header = 'ERROR:' _msg_header = 'ERROR:'
@ -554,7 +560,7 @@ class YoutubeDL(object):
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None return None
def _match_entry(self, info_dict): def _match_entry(self, info_dict, incomplete):
""" Returns None iff the file should be downloaded """ """ Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video')) video_title = info_dict.get('title', info_dict.get('id', 'video'))
@ -583,9 +589,17 @@ class YoutubeDL(object):
if max_views is not None and view_count > max_views: if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % title return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title return '%s has already been recorded in archive' % video_title
if not incomplete:
match_filter = self.params.get('match_filter')
if match_filter is not None:
ret = match_filter(info_dict)
if ret is not None:
return ret
return None return None
@staticmethod @staticmethod
@ -779,7 +793,7 @@ class YoutubeDL(object):
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
reason = self._match_entry(entry) reason = self._match_entry(entry, incomplete=True)
if reason is not None: if reason is not None:
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
continue continue
@ -1153,7 +1167,7 @@ class YoutubeDL(object):
if 'format' not in info_dict: if 'format' not in info_dict:
info_dict['format'] = info_dict['ext'] info_dict['format'] = info_dict['ext']
reason = self._match_entry(info_dict) reason = self._match_entry(info_dict, incomplete=False)
if reason is not None: if reason is not None:
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
return return

View File

@ -23,9 +23,10 @@ from .compat import (
) )
from .utils import ( from .utils import (
DateRange, DateRange,
DEFAULT_OUTTMPL,
decodeOption, decodeOption,
DEFAULT_OUTTMPL,
DownloadError, DownloadError,
match_filter_func,
MaxDownloadsReached, MaxDownloadsReached,
preferredencoding, preferredencoding,
read_batch_urls, read_batch_urls,
@ -247,6 +248,9 @@ def _real_main(argv=None):
xattr # Confuse flake8 xattr # Confuse flake8
except ImportError: except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available') parser.error('setting filesize xattr requested but python-xattr is not available')
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
ydl_opts = { ydl_opts = {
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
@ -344,6 +348,8 @@ def _real_main(argv=None):
'list_thumbnails': opts.list_thumbnails, 'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items, 'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize, 'xattr_set_filesize': opts.xattr_set_filesize,
'match_filter': match_filter,
'no_color': opts.no_color,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64 import base64
from math import ceil from math import ceil
@ -329,3 +327,5 @@ def inc(data):
data[i] = data[i] + 1 data[i] = data[i] + 1
break break
return data return data
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']

View File

@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE from .cbsnews import CBSNewsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE

View File

@ -15,12 +15,13 @@ from ..utils import (
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
_SITE_ID_MAP = { _SITE_ID_MAP = {
'canalplus.fr': 'cplus', 'canalplus.fr': 'cplus',
'piwiplus.fr': 'teletoon', 'piwiplus.fr': 'teletoon',
'd8.tv': 'd8', 'd8.tv': 'd8',
'itele.fr': 'itele',
} }
_TESTS = [{ _TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20131108', 'upload_date': '20131108',
}, },
'skip': 'videos get deleted after a while', 'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
'md5': '65aa83ad62fe107ce29e564bb8712580',
'info_dict': {
'id': '1213714',
'ext': 'flv',
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
'upload_date': '20150211',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,99 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
qualities,
unified_strdate,
)
class CCCIE(InfoExtractor):
IE_NAME = 'media.ccc.de'
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
_TEST = {
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
'info_dict': {
'id': '20131228183',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'upload_date': '20131229',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if self._downloader.params.get('prefer_free_formats'):
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
else:
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r"(?s)<p class='description'>(.*?)</p>",
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
<a\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = []
for m in matches:
format = m.group('format')
format_id = self._search_regex(
r'.*/([a-z0-9_-]+)/[^/]*$',
m.group('http_url'), 'format id', default=None)
vcodec = 'h264' if 'h264' in format_id else (
'none' if format_id in ('mp3', 'opus') else None
)
formats.append({
'format_id': format_id,
'format': format,
'url': m.group('http_url'),
'vcodec': vcodec,
'preference': preference(format_id),
})
if m.group('torrent_url'):
formats.append({
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
'format': '%s (torrent)' % format,
'proto': 'torrent',
'format_note': '(unsupported; will just download the .torrent file)',
'vcodec': vcodec,
'preference': -100 + preference(format_id),
'url': m.group('torrent_url'),
})
self._sort_formats(formats)
thumbnail = self._html_search_regex(
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'upload_date': upload_date,
'formats': formats,
}

View File

@ -157,6 +157,7 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video dislike_count: Number of negative ratings of the video
average_rating: Average rating give by users, the scale used depends on the webpage
comment_count: Number of comments on the video comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional): properties (all but one of text or html optional):
@ -271,7 +272,7 @@ class InfoExtractor(object):
raise raise
except compat_http_client.IncompleteRead as e: except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occured.', cause=e, expected=True) raise ExtractorError('A network error has occured.', cause=e, expected=True)
except (KeyError,) as e: except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occured.', cause=e) raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader): def set_downloader(self, downloader):
@ -514,7 +515,7 @@ class InfoExtractor(object):
if mobj: if mobj:
break break
if os.name != 'nt' and sys.stderr.isatty(): if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name _name = '\033[0;34m%s\033[0m' % name
else: else:
_name = name _name = name

View File

@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'flv', 'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade' 'title': 'Videoinstallation für eine Kaufhausfassade'
},
'params': {
# rtmp download
'skip_download': True,
} }
} }

View File

@ -1,13 +1,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class DotsubIE(InfoExtractor): class DotsubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = { _TEST = {
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'md5': '0914d4d69605090f623b7ac329fea66e', 'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'ext': 'flv', 'ext': 'flv',
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'duration': 3169,
'uploader': '4v4l0n42', 'uploader': '4v4l0n42',
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', 'timestamp': 1292248482.625,
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'upload_date': '20101213', 'upload_date': '20101213',
'view_count': int,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id info = self._download_json(
info = self._download_json(info_url, video_id) 'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds video_url = info.get('mediaURI')
if not video_url:
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
return { return {
'id': video_id, 'id': video_id,
'url': info['mediaURI'], 'url': video_url,
'ext': 'flv', 'ext': 'flv',
'title': info['title'], 'title': info['title'],
'thumbnail': info['screenshotURI'], 'description': info.get('description'),
'description': info['description'], 'thumbnail': info.get('screenshotURI'),
'uploader': info['user'], 'duration': int_or_none(info.get('duration'), 1000),
'view_count': info['numberOfViews'], 'uploader': info.get('user'),
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), 'timestamp': float_or_none(info.get('dateCreated'), 1000),
'view_count': int_or_none(info.get('numberOfViews')),
} }

View File

@ -1,18 +1,17 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json,
) )
class EscapistIE(InfoExtractor): class EscapistIE(InfoExtractor):
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-' _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_TEST = { _TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1', 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
'id': '6618', 'id': '6618',
'ext': 'mp4', 'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
'uploader': 'the-escapist-presents', 'uploader_id': 'the-escapist-presents',
'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate", 'title': "Breaking Down Baldur's Gate",
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
showName = mobj.group('showname')
video_id = mobj.group('id')
self.report_extraction(video_id)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
videoDesc = self._html_search_regex( uploader_id = self._html_search_regex(
r'<meta name="description" content="([^"]*)"', r"<h1 class='headline'><a href='/videos/view/(.*?)'",
webpage, 'description', fatal=False) webpage, 'uploader ID', fatal=False)
uploader = self._html_search_regex(
r"<h1 class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage)
playerUrl = self._og_search_video_url(webpage, name='player URL') raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2]
title = self._html_search_regex( player_url = self._og_search_video_url(webpage, name='player URL')
r'<meta name="title" content="([^"]*)"', config_url = compat_urllib_parse.unquote(self._search_regex(
webpage, 'title').split(' : ')[-1] r'config=(.*)$', player_url, 'config URL'))
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
configUrl = compat_urllib_parse.unquote(configUrl)
formats = [] formats = []
@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
cfgurl, video_id, cfgurl, video_id,
'Downloading ' + name + ' configuration', 'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration', 'Unable to download ' + name + ' configuration',
transform_source=lambda s: s.replace("'", '"')) transform_source=js_to_json)
playlist = config['playlist'] playlist = config['playlist']
video_url = next(
p['url'] for p in playlist
if p.get('eventCategory') == 'Video')
formats.append({ formats.append({
'url': playlist[1]['url'], 'url': video_url,
'format_id': name, 'format_id': name,
'quality': quality, 'quality': quality,
}) })
_add_format('normal', configUrl, quality=0) _add_format('normal', config_url, quality=0)
hq_url = (configUrl + hq_url = (config_url +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
try: try:
_add_format('hq', hq_url, quality=1) _add_format('hq', hq_url, quality=1)
except ExtractorError: except ExtractorError:
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'uploader': showName, 'uploader': uploader,
'uploader_id': uploader_id,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc, 'description': description,
'player_url': playerUrl, 'player_url': player_url,
} }

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id) page = self._download_webpage(url, video_id)
title = self._html_search_meta('twitter:title', page, 'title')
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
description = self._html_search_meta('twitter:description', page, 'title') description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml( data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
'height': int(details.find('./height').text.strip()), 'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text } for details in item.findall('./source/file_details') if details.find('./file').text
] ]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View File

@ -3,7 +3,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unescapeHTML clean_html,
xpath_text,
int_or_none,
) )
@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ntv.ru/novosti/863142/', 'url': 'http://www.ntv.ru/novosti/863142/',
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
'info_dict': { 'info_dict': {
'id': '746000', 'id': '746000',
'ext': 'flv', 'ext': 'mp4',
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
'thumbnail': 're:^http://.*\.jpg',
'duration': 136, 'duration': 136,
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'http://www.ntv.ru/video/novosti/750370/', 'url': 'http://www.ntv.ru/video/novosti/750370/',
'md5': 'adecff79691b4d71e25220a191477124',
'info_dict': { 'info_dict': {
'id': '750370', 'id': '750370',
'ext': 'flv', 'ext': 'mp4',
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
'thumbnail': 're:^http://.*\.jpg',
'duration': 172, 'duration': 172,
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c',
'info_dict': { 'info_dict': {
'id': '747480', 'id': '747480',
'ext': 'flv', 'ext': 'mp4',
'title': '«Сегодня». 21 марта 2014 года. 16:00 ', 'title': '«Сегодня». 21 марта 2014 года. 16:00',
'description': '«Сегодня». 21 марта 2014 года. 16:00 ', 'description': '«Сегодня». 21 марта 2014 года. 16:00',
'thumbnail': 're:^http://.*\.jpg',
'duration': 1496, 'duration': 1496,
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'http://www.ntv.ru/kino/Koma_film', 'url': 'http://www.ntv.ru/kino/Koma_film',
'md5': 'f825770930937aa7e5aca0dc0d29319a',
'info_dict': { 'info_dict': {
'id': '758100', 'id': '1007609',
'ext': 'flv', 'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»', 'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»', 'description': 'Остросюжетный фильм «Кома»',
'thumbnail': 're:^http://.*\.jpg',
'duration': 5592, 'duration': 5592,
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
'info_dict': { 'info_dict': {
'id': '751482', 'id': '751482',
'ext': 'flv', 'ext': 'mp4',
'title': '«Дело врачей»: «Деревце жизни»', 'title': '«Дело врачей»: «Деревце жизни»',
'description': '«Дело врачей»: «Деревце жизни»', 'description': '«Дело врачей»: «Деревце жизни»',
'thumbnail': 're:^http://.*\.jpg',
'duration': 2590, 'duration': 2590,
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
] ]
@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id') webpage = self._download_webpage(url, video_id)
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
title = unescapeHTML(player.find('./data/title').text)
description = unescapeHTML(player.find('./data/description').text) player = self._download_xml(
'http://www.ntv.ru/vi%s/' % video_id,
video_id, 'Downloading video XML')
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
description = clean_html(xpath_text(player, './data/description', 'description'))
video = player.find('./data/video') video = player.find('./data/video')
video_id = video.find('./id').text video_id = xpath_text(video, './id', 'video id')
thumbnail = video.find('./splash').text thumbnail = xpath_text(video, './splash', 'thumbnail')
duration = int(video.find('./totaltime').text) duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
view_count = int(video.find('./views').text) view_count = int_or_none(xpath_text(video, './views', 'view count'))
puid22 = video.find('./puid22').text
apps = { token = self._download_webpage(
'4': 'video1', 'http://stat.ntv.ru/services/access/token',
'7': 'video2', video_id, 'Downloading access token')
}
app = apps.get(puid22, apps['4'])
formats = [] formats = []
for format_id in ['', 'hi', 'webm']: for format_id in ['', 'hi', 'webm']:
file = video.find('./%sfile' % format_id) file_ = video.find('./%sfile' % format_id)
if file is None: if file_ is None:
continue continue
size = video.find('./%ssize' % format_id) size = video.find('./%ssize' % format_id)
formats.append({ formats.append({
'url': 'rtmp://media.ntv.ru/%s' % app, 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
'app': app, 'filesize': int_or_none(size.text if size is not None else None),
'play_path': file.text,
'rtmp_conn': 'B:1',
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
'page_url': 'http://www.ntv.ru',
'flash_version': 'LNX 11,2,202,341',
'rtmp_live': True,
'ext': 'flv',
'filesize': int(size.text),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
quality = qualities(['sd', 'hd']) quality = qualities(['sd', 'hd'])
sources = json.loads(js_to_json(self._search_regex( sources = json.loads(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
webpage, 'sources')))
formats = [] formats = []
for container, s in sources.items(): for qname, video_url in sources.items():
for qname, video_url in s.items(): if not video_url:
formats.append({ continue
'url': video_url, formats.append({
'container': container, 'url': video_url,
'format_id': '%s-%s' % (container, qname), 'format_id': qname,
'quality': quality(qname), 'quality': quality(qname),
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor):
} }
} }
] ]
_VIDEO_ID_REGEXES = (
r'"eVar42"\s*:\s*(\d+)',
r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
r'"id_not"\s*:\s*(\d+)'
)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id") video_id = mobj.group("video_id")
if not video_id: if not video_id:
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'<div\s+class="player".*?data-id="(\d+?)"', self._VIDEO_ID_REGEXES, webpage, 'video id')
webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml( data = self._download_xml(

View File

@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
password_request = compat_urllib_request.Request(pass_url + '/password', data) password_request = compat_urllib_request.Request(pass_url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token) password_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(password_request, video_id, return self._download_webpage(
'Verifying the password', password_request, video_id,
'Wrong password') 'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id): def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
if re.search('<form[^>]+?id="pw_form"', webpage) is not None: if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
self._verify_video_password(url, video_id, webpage) self._verify_video_password(url, video_id, webpage)
return self._real_extract(url) return self._real_extract(url)
else: else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
def _extract_list_title(self, webpage): def _extract_list_title(self, webpage):
return self._html_search_regex(self._TITLE_RE, webpage, 'list title') return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
def _login_list_password(self, page_url, list_id, webpage):
login_form = self._search_regex(
r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
webpage, 'login form', default=None)
if not login_form:
return webpage
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
value="([^"]*)"
''', login_form))
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
fields['token'] = token
fields['password'] = password
post = compat_urllib_parse.urlencode(fields)
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
password_request = compat_urllib_request.Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
self._set_cookie('vimeo.com', 'xsrft', token)
return self._download_webpage(
password_request, list_id,
'Verifying the password', 'Wrong password')
def _extract_videos(self, list_id, base_url): def _extract_videos(self, list_id, base_url):
video_ids = [] video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage( webpage = self._download_webpage(
self._page_url(base_url, pagenum), list_id, page_url, list_id,
'Downloading page %s' % pagenum) 'Downloading page %s' % pagenum)
if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
'title': 'Staff Favorites: November 2013', 'title': 'Staff Favorites: November 2013',
}, },
'playlist_mincount': 13, 'playlist_mincount': 13,
}, {
'note': 'Password-protected album',
'url': 'https://vimeo.com/album/3253534',
'info_dict': {
'title': 'test',
'id': '3253534',
},
'playlist_count': 1,
'params': {
'videopassword': 'youtube-dl',
}
}] }]
def _page_url(self, base_url, pagenum): def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum) return '%s/page:%d/' % (base_url, pagenum)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) album_id = self._match_id(url)
album_id = mobj.group('id')
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)

View File

@ -25,6 +25,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none,
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count, 'view_count': view_count,
'like_count': like_count, 'like_count': like_count,
'dislike_count': dislike_count, 'dislike_count': dislike_count,
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
'formats': formats, 'formats': formats,
} }

View File

@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
action='store_const', dest='extract_flat', const='in_playlist', action='store_const', dest='extract_flat', const='in_playlist',
default=False, default=False,
help='Do not extract the videos of a playlist, only list them.') help='Do not extract the videos of a playlist, only list them.')
general.add_option(
'--no-color', '--no-colors',
action='store_true', dest='no_color',
default=False,
help='Do not emit color codes in output.')
network = optparse.OptionGroup(parser, 'Network Options') network = optparse.OptionGroup(parser, 'Network Options')
network.add_option( network.add_option(
@ -244,6 +249,25 @@ def parseOpts(overrideArguments=None):
'--max-views', '--max-views',
metavar='COUNT', dest='max_views', default=None, type=int, metavar='COUNT', dest='max_views', default=None, type=int,
help='Do not download any videos with more than COUNT views') help='Do not download any videos with more than COUNT views')
selection.add_option(
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
'(Experimental) Generic video filter. '
'Specify any key (see help for -o for a list of available keys) to'
' match if the key is present, '
'!key to check if the key is not present,'
'key > NUMBER (like "comment_count > 12", also works with '
'>=, <, <=, !=, =) to compare against a number, and '
'& to require multiple matches. '
'Values which are not known are excluded unless you'
' put a question mark (?) after the operator.'
'For example, to only match videos that have been liked more than '
'100 times and disliked less than 50 times (or the dislike '
'functionality is not available at the given service), but who '
'also have a description, use --match-filter '
'"like_count > 100 & dislike_count <? 50 & description" .'
))
selection.add_option( selection.add_option(
'--no-playlist', '--no-playlist',
action='store_true', dest='noplaylist', default=False, action='store_true', dest='noplaylist', default=False,
@ -533,7 +557,7 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='youtube_print_sig_code', default=False, action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
verbosity.add_option( verbosity.add_option(
'--print-traffic', '--print-traffic', '--dump-headers',
dest='debug_printtraffic', action='store_true', default=False, dest='debug_printtraffic', action='store_true', default=False,
help='Display sent and read HTTP traffic') help='Display sent and read HTTP traffic')
verbosity.add_option( verbosity.add_option(

View File

@ -17,6 +17,7 @@ import io
import json import json
import locale import locale
import math import math
import operator
import os import os
import pipes import pipes
import platform import platform
@ -1678,3 +1679,79 @@ def render_table(header_row, data):
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
return '\n'.join(format_str % tuple(row) for row in table) return '\n'.join(format_str % tuple(row) for row in table)
def _match_one(filter_part, dct):
COMPARISON_OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'=': operator.eq,
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
)
\s*$
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
if m.group('strval') is not None:
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('strval')
else:
try:
comparison_value = int(m.group('intval'))
except ValueError:
comparison_value = parse_filesize(m.group('intval'))
if comparison_value is None:
comparison_value = parse_filesize(m.group('intval') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part))
actual_value = dct.get(m.group('key'))
if actual_value is None:
return m.group('none_inclusive')
return op(actual_value, comparison_value)
UNARY_OPERATORS = {
'': lambda v: v is not None,
'!': lambda v: v is None,
}
operator_rex = re.compile(r'''(?x)\s*
(?P<op>%s)\s*(?P<key>[a-z_]+)
\s*$
''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
return op(actual_value)
raise ValueError('Invalid filter part %r' % filter_part)
def match_str(filter_str, dct):
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
return all(
_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
def match_filter_func(filter_str):
def _match_func(info_dict):
if match_str(filter_str, info_dict):
return None
else:
video_title = info_dict.get('title', info_dict.get('id', 'video'))
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
return _match_func

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.10.1' __version__ = '2015.02.11'