def __init__(self, audit_type, confdir=None, show_content=False, dirsizes=False, depth=2, to_check=None, ignore_also=None, maxfiles=None): ''' audit_type: type of audit e.g. 'logs', 'homes' confdir: dir path where yaml config files are kept show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) ''' self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.show_sample_content = show_content self.dirsizes = dirsizes self.depth = depth + 1 # actually count of path separators in dirname self.filenames_to_check = None self.dirs_to_check = None self.set_up_to_check(to_check) clouseau.retention.utils.config.set_up_conf(self.confdir) if ignore_also is not None: ignore_also = ignore_also.split(',') ignore_also_ignoreds = convert_ignore_also_to_ignores(ignore_also) self.ignores = Ignores(self.confdir) ignored_from_export = get_ignored_from_exported_rules(self.confdir) hostname = socket.getfqdn() self.ignored = self.ignores.merge( [ignore_also_ignoreds, ignored_from_export], hostname) self.max_files = maxfiles self.set_up_max_files() self.warnings = []
def __init__(self, audit_type, confdir=None, show_content=False, dirsizes=False, depth=2, to_check=None, ignore_also=None, maxfiles=None): ''' audit_type: type of audit e.g. 'logs', 'homes' confdir: dir path where yaml config files are kept show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) ''' self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.show_sample_content = show_content self.dirsizes = dirsizes self.depth = depth + 1 # actually count of path separators in dirname self.filenames_to_check = None self.dirs_to_check = None self.set_up_to_check(to_check) clouseau.retention.utils.config.set_up_conf(self.confdir) if ignore_also is not None: ignore_also = ignore_also.split(',') ignore_also_ignoreds = convert_ignore_also_to_ignores(ignore_also) self.ignores = Ignores(self.confdir) ignored_from_export = get_ignored_from_exported_rules(self.confdir) hostname = socket.getfqdn() self.ignored = self.ignores.merge([ignore_also_ignoreds, ignored_from_export], hostname) self.max_files = maxfiles self.set_up_max_files() self.warnings = []
def __init__(self, confdir, store_filepath, timeout, audit_type, ignore_also=None, hosts_expr=None): self.confdir = confdir self.cdb = RuleStore(store_filepath) self.cdb.store_db_init(None) self.timeout = timeout self.audit_type = audit_type self.locations = audit_type + "_locations" self.hosts_expr = hosts_expr self.basedir = None clouseau.retention.utils.cliutils.init_readline_hist() # this is arbitrary, can tweak it later # how many levels down we keep in our list of # top-level dirs from which the user can start # their interactive session self.max_depth_top_level = 3 self.filtertype = 'all' # fixme completely wrong self.batchno = 1 clouseau.retention.utils.config.set_up_conf(self.confdir) # duplicate all the ignores except for the uh # ones specific to a host. those will be done # at host choice time # this includes rules, we will do those at host choice time too # we want: global, perhost, ignore_also (if there were any) self.local_ignored = None self.ignores = Ignores(self.confdir) self.ignored_from_rulestore = {} self.ignored_also = clouseau.retention.utils.ignores.convert_ignore_also_to_ignores( ignore_also) self.dircontents = CurrentDirContents(self.timeout) self.cenv = CurrentEnv() self.cmpl = Completion(self.dircontents, self.cenv, self.max_depth_top_level)
class LocalFilesAuditor(object): ''' audit files on the local host in a specified set of directories ''' def __init__(self, audit_type, confdir=None, show_content=False, dirsizes=False, depth=2, to_check=None, ignore_also=None, maxfiles=None): ''' audit_type: type of audit e.g. 'logs', 'homes' confdir: dir path where yaml config files are kept show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) ''' self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.show_sample_content = show_content self.dirsizes = dirsizes self.depth = depth + 1 # actually count of path separators in dirname self.filenames_to_check = None self.dirs_to_check = None self.set_up_to_check(to_check) clouseau.retention.utils.config.set_up_conf(self.confdir) if ignore_also is not None: ignore_also = ignore_also.split(',') ignore_also_ignoreds = convert_ignore_also_to_ignores(ignore_also) self.ignores = Ignores(self.confdir) ignored_from_export = get_ignored_from_exported_rules(self.confdir) hostname = socket.getfqdn() self.ignored = self.ignores.merge([ignore_also_ignoreds, ignored_from_export], hostname) self.max_files = maxfiles self.set_up_max_files() self.warnings = [] def set_up_max_files(self): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if self.max_files is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 def set_up_to_check(self, to_check): ''' turn the to_check arg into lists of dirs and files to check ''' if to_check is not None: check_list = to_check.split(',') self.filenames_to_check = [fname for fname in check_list if not fname.startswith(os.sep)] if not len(self.filenames_to_check): self.filenames_to_check = None self.dirs_to_check = [d.rstrip(os.path.sep) for d in check_list if d.startswith(os.sep)] def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname def file_is_wanted(self, fname, basedir): ''' decide if we want to audit the specific file or not (is it ignored, or in an ignored directory, or of a type we skip) args: fname - the abs path to the file / dir returns True if wanted or False if not ''' fname = self.normalize(fname) if clouseau.retention.utils.ignores.file_is_ignored(fname, basedir, self.ignored): return False if (self.filenames_to_check is not None and fname not in self.filenames_to_check): return False return True def get_subdirs_to_do(self, dirname, dirname_depth, todo): locale.setlocale(locale.LC_ALL, '') if clouseau.retention.utils.ignores.dir_is_ignored(dirname, self.ignored): return todo if clouseau.retention.utils.fileutils.dir_is_wrong_type(dirname): return todo if self.depth < dirname_depth: return todo if dirname_depth not in todo: todo[dirname_depth] = [] if self.dirs_to_check is not None: if clouseau.retention.utils.fileutils.subdir_check(dirname, self.dirs_to_check): todo[dirname_depth].append(dirname) else: todo[dirname_depth].append(dirname) if self.depth == dirname_depth: # don't read below the depth level return todo dirs = [os.path.join(dirname, d) for d in os.listdir(dirname)] if self.dirs_to_check is not None: dirs = [d for d in dirs if dirtree_check(d, self.dirs_to_check)] for dname in dirs: todo = self.get_subdirs_to_do(dname, dirname_depth + 1, todo) return todo def get_dirs_to_do(self, dirname): if (self.dirs_to_check is not None and not dirtree_check(dirname, self.dirs_to_check)): return {} todo = {} depth_of_dirname = dirname.count(os.path.sep) todo = self.get_subdirs_to_do(dirname, depth_of_dirname, todo) return todo def process_files_from_path(self, location, base, files, count, results): ''' arguments: location: the location being checked base: directory containing the files to be checked files: files to be checked count: number of files in result set so far for this location results: the result set ''' for fname, fstat in files: path = os.path.join(base, fname) if self.file_is_wanted(path, location): count += 1 if count > self.max_files: if self.dirsizes: self.warn_dirsize(base) else: self.warn_too_many_files(base) return count # for dirsizes option we don't collect or report files if not self.dirsizes: results.append((path, fstat)) return count def walk_nolinks(self, top): '''replaces (and is stolen from) os.walk, checks for and skips links, returns base, paths, files but it's guaranteed that files really are regular files and base/paths are not symlinks the files list is a list of filename, stat of that filename, because we have to do the stat on it anyways to ensure it's a file and not a dir, so the caller might as well get that info''' try: names = os.listdir(top) except os.error: return dirs, files = [], [] for name in names: try: filestat = os.lstat(os.path.join(top, name)) except: continue if stat.S_ISLNK(filestat.st_mode): continue if stat.S_ISDIR(filestat.st_mode): dirs.append(name) elif stat.S_ISREG(filestat.st_mode): files.append((name, filestat)) else: continue yield top, dirs, files for name in dirs: new_path = os.path.join(top, name) for result in self.walk_nolinks(new_path): yield result def process_one_dir(self, location, subdirpath, depth, results): ''' arguments: location: the location being checked subdirpath: the path to the subdirectory being checked depth: the depth of the directory being checked (starting at 1) results: the result set ''' if self.dirs_to_check is not None: if not dirtree_check(subdirpath, self.dirs_to_check): return if clouseau.retention.utils.ignores.dir_is_ignored(subdirpath, self.ignored): return True count = 0 # doing a directory higher up in the tree than our depth cutoff, # only do the files in it, because we have the full list of dirs # up to our cutoff we do them one by one if depth < self.depth: filenames = os.listdir(subdirpath) files = [] for fname in filenames: try: filestat = os.stat(os.path.join(subdirpath, fname)) except: continue if (not stat.S_ISLNK(filestat.st_mode) and stat.S_ISREG(filestat.st_mode)): files.append((fname, filestat)) self.process_files_from_path(location, subdirpath, files, count, results) return # doing a directory at our cutoff depth, walk it, # because anything below the depth # cutoff won't be in our list temp_results = [] for base, paths, files in self.walk_nolinks(subdirpath): expanded_dirs, wildcard_dirs = expand_ignored_dirs(base, self.ignored) if self.dirs_to_check is not None: paths[:] = [p for p in paths if dirtree_check(os.path.join(base, p), self.dirs_to_check)] paths[:] = [p for p in paths if (not clouseau.retention.utils.fileutils.startswithpath(os.path.join( base, p), expanded_dirs) and not clouseau.retention.utils.fileutils.wildcard_matches(os.path.join( base, p), wildcard_dirs, exact=False))] count = self.process_files_from_path(location, base, files, count, temp_results) if count > self.max_files: return results.extend(temp_results) def find_all_files(self): results = [] for location in clouseau.retention.utils.config.conf[self.locations]: dirs_to_do = self.get_dirs_to_do(location) if location.count(os.path.sep) >= self.depth + 1: # do the run at least once upper_end = location.count(os.path.sep) + 1 else: upper_end = self.depth + 1 for depth in range(location.count(os.path.sep), upper_end): if depth in dirs_to_do: for dname in dirs_to_do[depth]: self.process_one_dir(location, dname, depth, results) return results def warn_too_many_files(self, path=None): warning = "WARNING: too many files to audit" if path is not None: fields = path.split(os.path.sep) warning += " in directory %s" % os.path.sep.join(fields[:self.depth + 1]) self.warnings.append(warning) def warn_dirsize(self, path): fields = path.split(os.path.sep) self.warnings.append("WARNING: directory %s has more than %d files" % (os.path.sep.join(fields[:self.depth + 1]), self.max_files)) def do_local_audit(self): open_files = clouseau.retention.utils.fileutils.get_open_files() all_files = {} files = self.find_all_files() magic = clouseau.retention.utils.magic.magic_open(clouseau.retention.utils.magic.MAGIC_NONE) magic.load() today = time.time() for (fname, fstat) in files: all_files[fname] = FileInfo(fname, magic, fstat) all_files[fname].load_file_info() all_files[fname].load_extra_file_info(today, clouseau.retention.utils.config.conf['cutoff'], open_files) all_files_sorted = sorted(all_files, key=lambda f: all_files[f].path) result = [] if all_files: max_name_length = max([len(all_files[fname].path) for fname in all_files]) + 2 for fname in all_files_sorted: if (not clouseau.retention.utils.fileutils.contains( all_files[fname].filetype, clouseau.retention.utils.config.conf['ignored_types']) and not all_files[fname].is_empty): result.append(all_files[fname].format_output( self.show_sample_content, False, max_name_length)) output = "\n".join(self.warnings + result) + "\n" return output
class LocalFilesAuditor(object): ''' audit files on the local host in a specified set of directories ''' def __init__(self, audit_type, confdir=None, show_content=False, dirsizes=False, depth=2, to_check=None, ignore_also=None, maxfiles=None): ''' audit_type: type of audit e.g. 'logs', 'homes' confdir: dir path where yaml config files are kept show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) ''' self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.show_sample_content = show_content self.dirsizes = dirsizes self.depth = depth + 1 # actually count of path separators in dirname self.filenames_to_check = None self.dirs_to_check = None self.set_up_to_check(to_check) clouseau.retention.utils.config.set_up_conf(self.confdir) if ignore_also is not None: ignore_also = ignore_also.split(',') ignore_also_ignoreds = convert_ignore_also_to_ignores(ignore_also) self.ignores = Ignores(self.confdir) ignored_from_export = get_ignored_from_exported_rules(self.confdir) hostname = socket.getfqdn() self.ignored = self.ignores.merge( [ignore_also_ignoreds, ignored_from_export], hostname) self.max_files = maxfiles self.set_up_max_files() self.warnings = [] def set_up_max_files(self): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if self.max_files is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 def set_up_to_check(self, to_check): ''' turn the to_check arg into lists of dirs and files to check ''' if to_check is not None: check_list = to_check.split(',') self.filenames_to_check = [ fname for fname in check_list if not fname.startswith(os.sep) ] if not len(self.filenames_to_check): self.filenames_to_check = None self.dirs_to_check = [ d.rstrip(os.path.sep) for d in check_list if d.startswith(os.sep) ] def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname def file_is_wanted(self, fname, basedir): ''' decide if we want to audit the specific file or not (is it ignored, or in an ignored directory, or of a type we skip) args: fname - the abs path to the file / dir returns True if wanted or False if not ''' fname = self.normalize(fname) if clouseau.retention.utils.ignores.file_is_ignored( fname, basedir, self.ignored): return False if (self.filenames_to_check is not None and fname not in self.filenames_to_check): return False return True def get_subdirs_to_do(self, dirname, dirname_depth, todo): locale.setlocale(locale.LC_ALL, '') if clouseau.retention.utils.ignores.dir_is_ignored( dirname, self.ignored): return todo if clouseau.retention.utils.fileutils.dir_is_wrong_type(dirname): return todo if self.depth < dirname_depth: return todo if dirname_depth not in todo: todo[dirname_depth] = [] if self.dirs_to_check is not None: if clouseau.retention.utils.fileutils.subdir_check( dirname, self.dirs_to_check): todo[dirname_depth].append(dirname) else: todo[dirname_depth].append(dirname) if self.depth == dirname_depth: # don't read below the depth level return todo dirs = [os.path.join(dirname, d) for d in os.listdir(dirname)] if self.dirs_to_check is not None: dirs = [d for d in dirs if dirtree_check(d, self.dirs_to_check)] for dname in dirs: todo = self.get_subdirs_to_do(dname, dirname_depth + 1, todo) return todo def get_dirs_to_do(self, dirname): if (self.dirs_to_check is not None and not dirtree_check(dirname, self.dirs_to_check)): return {} todo = {} depth_of_dirname = dirname.count(os.path.sep) todo = self.get_subdirs_to_do(dirname, depth_of_dirname, todo) return todo def process_files_from_path(self, location, base, files, count, results): ''' arguments: location: the location being checked base: directory containing the files to be checked files: files to be checked count: number of files in result set so far for this location results: the result set ''' for fname, fstat in files: path = os.path.join(base, fname) if self.file_is_wanted(path, location): count += 1 if count > self.max_files: if self.dirsizes: self.warn_dirsize(base) else: self.warn_too_many_files(base) return count # for dirsizes option we don't collect or report files if not self.dirsizes: results.append((path, fstat)) return count def walk_nolinks(self, top): '''replaces (and is stolen from) os.walk, checks for and skips links, returns base, paths, files but it's guaranteed that files really are regular files and base/paths are not symlinks the files list is a list of filename, stat of that filename, because we have to do the stat on it anyways to ensure it's a file and not a dir, so the caller might as well get that info''' try: names = os.listdir(top) except os.error: return dirs, files = [], [] for name in names: try: filestat = os.lstat(os.path.join(top, name)) except: continue if stat.S_ISLNK(filestat.st_mode): continue if stat.S_ISDIR(filestat.st_mode): dirs.append(name) elif stat.S_ISREG(filestat.st_mode): files.append((name, filestat)) else: continue yield top, dirs, files for name in dirs: new_path = os.path.join(top, name) for result in self.walk_nolinks(new_path): yield result def process_one_dir(self, location, subdirpath, depth, results): ''' arguments: location: the location being checked subdirpath: the path to the subdirectory being checked depth: the depth of the directory being checked (starting at 1) results: the result set ''' if self.dirs_to_check is not None: if not dirtree_check(subdirpath, self.dirs_to_check): return if clouseau.retention.utils.ignores.dir_is_ignored( subdirpath, self.ignored): return True count = 0 # doing a directory higher up in the tree than our depth cutoff, # only do the files in it, because we have the full list of dirs # up to our cutoff we do them one by one if depth < self.depth: filenames = os.listdir(subdirpath) files = [] for fname in filenames: try: filestat = os.stat(os.path.join(subdirpath, fname)) except: continue if (not stat.S_ISLNK(filestat.st_mode) and stat.S_ISREG(filestat.st_mode)): files.append((fname, filestat)) self.process_files_from_path(location, subdirpath, files, count, results) return # doing a directory at our cutoff depth, walk it, # because anything below the depth # cutoff won't be in our list temp_results = [] for base, paths, files in self.walk_nolinks(subdirpath): expanded_dirs, wildcard_dirs = expand_ignored_dirs( base, self.ignored) if self.dirs_to_check is not None: paths[:] = [ p for p in paths if dirtree_check(os.path.join(base, p), self.dirs_to_check) ] paths[:] = [ p for p in paths if (not clouseau.retention.utils.fileutils.startswithpath( os.path.join(base, p), expanded_dirs) and not clouseau.retention.utils.fileutils.wildcard_matches( os.path.join(base, p), wildcard_dirs, exact=False)) ] count = self.process_files_from_path(location, base, files, count, temp_results) if count > self.max_files: return results.extend(temp_results) def find_all_files(self): results = [] for location in clouseau.retention.utils.config.conf[self.locations]: dirs_to_do = self.get_dirs_to_do(location) if location.count(os.path.sep) >= self.depth + 1: # do the run at least once upper_end = location.count(os.path.sep) + 1 else: upper_end = self.depth + 1 for depth in range(location.count(os.path.sep), upper_end): if depth in dirs_to_do: for dname in dirs_to_do[depth]: self.process_one_dir(location, dname, depth, results) return results def warn_too_many_files(self, path=None): warning = "WARNING: too many files to audit" if path is not None: fields = path.split(os.path.sep) warning += " in directory %s" % os.path.sep.join( fields[:self.depth + 1]) self.warnings.append(warning) def warn_dirsize(self, path): fields = path.split(os.path.sep) self.warnings.append( "WARNING: directory %s has more than %d files" % (os.path.sep.join(fields[:self.depth + 1]), self.max_files)) def do_local_audit(self): open_files = clouseau.retention.utils.fileutils.get_open_files() all_files = {} files = self.find_all_files() magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) magic.load() today = time.time() for (fname, fstat) in files: all_files[fname] = FileInfo(fname, magic, fstat) all_files[fname].load_file_info() all_files[fname].load_extra_file_info( today, clouseau.retention.utils.config.conf['cutoff'], open_files) all_files_sorted = sorted(all_files, key=lambda f: all_files[f].path) result = [] if all_files: max_name_length = max( [len(all_files[fname].path) for fname in all_files]) + 2 for fname in all_files_sorted: if (not clouseau.retention.utils.fileutils.contains( all_files[fname].filetype, clouseau.retention.utils.config.conf['ignored_types']) and not all_files[fname].is_empty): result.append(all_files[fname].format_output( self.show_sample_content, False, max_name_length)) output = "\n".join(self.warnings + result) + "\n" return output