def set_up_runner(self): self.runner = Runner(self.confdir, self.store_filepath, self.hosts_expr, self.expanded_hosts, self.audit_type, self.get_audit_args(), self.show_sample_content, self.to_check, self.timeout, self.verbose)
def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store()
class RemoteFilesAuditor(object): ''' audit files across a set of remote hosts, in a specified set of directories ''' def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store() def get_audit_args(self): audit_args = [ self.confdir, self.show_sample_content, self.dirsizes, self.depth - 1, self.to_check, self.ignore_also, self.max_files ] return audit_args def set_up_runner(self): self.runner = Runner(self.confdir, self.store_filepath, self.hosts_expr, self.expanded_hosts, self.audit_type, self.get_audit_args(), self.show_sample_content, self.to_check, self.timeout, self.verbose) def set_up_max_files(self, maxfiles): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if maxfiles is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 else: self.max_files = maxfiles def set_up_and_export_rule_store(self): hosts = self.cdb.store_db_list_all_hosts() destdir = os.path.join(os.path.dirname(self.store_filepath), "data_retention.d") if not os.path.isdir(destdir): os.makedirs(destdir, 0755) for host in hosts: all_destpath = os.path.join(destdir, host + "_store.yaml") clouseau.retention.utils.ruleutils.export_rules( self.cdb, all_destpath, host) good_destpath = os.path.join(destdir, host + "_store_good.yaml") clouseau.retention.utils.ruleutils.export_rules( self.cdb, good_destpath, host, Status.text_to_status('good')) def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname @staticmethod def get_dirname_from_warning(warning): ''' some audit output lines warn about directory trees having too many files to audit; grab the dirname out of such a line and return it ''' start = "WARNING: directory " if warning.startswith(start): # WARNING: directory %s has more than %d files rindex = warning.rfind(" has more than") if not rindex: return None else: return warning[len(start):rindex] start = "WARNING: too many files to audit in directory " if warning.startswith(start): return warning[len(start):] return None def add_stats(self, item, summary): ''' gather stats on how many files/dirs may be problematic; summary is where the results are collected, item is the item to include in the summary if needed ''' dirname = os.path.dirname(item['path']) if dirname not in summary: summary[dirname] = { 'binary': { 'old': 0, 'maybe_old': 0, 'nonroot': 0 }, 'text': { 'old': 0, 'maybe_old': 0, 'nonroot': 0 } } if item['binary'] is True: group = 'binary' else: group = 'text' if item['old'] == 'T': summary[dirname][group]['old'] += 1 elif item['old'] == '-': summary[dirname][group]['maybe_old'] += 1 if item['owner'] != 0: summary[dirname][group]['nonroot'] += 1 return summary def display_host_summary(self): if self.summary is not None: paths = sorted(self.summary.keys()) for path in paths: for group in self.summary[path]: if (self.summary[path][group]['old'] > 0 or self.summary[path][group]['maybe_old'] > 0 or self.summary[path][group]['nonroot'] > 0): print( "in directory %s, (%s), %d old," " %d maybe old, %d with non root owner" % (path, group, self.summary[path][group]['old'], self.summary[path][group]['maybe_old'], self.summary[path][group]['nonroot'])) def display_summary(self, result): for host in result: self.summary = {} print "host:", host if result[host]: try: lines = result[host].split('\n') for line in lines: if display_summary_line(line): continue else: try: item = json.loads( line, object_hook=JsonHelper.decode_dict) if item['empty'] is not True: self.add_stats(item, self.summary) except: print "WARNING: failed to json load from host", print host, "this line:", line self.display_host_summary() except: print "WARNING: failed to process output from host" else: if self.verbose: print "WARNING: no output from host", host def display_remote_host(self, result): try: lines = result.split('\n') files = [] for line in lines: if line == "": continue elif line.startswith("WARNING:") or line.startswith("INFO:"): print line else: files.append( json.loads(line, object_hook=JsonHelper.decode_dict)) if files == []: return path_justify = max([len(finfo['path']) for finfo in files]) + 2 for finfo in files: self.display_from_dict(finfo, self.show_sample_content, path_justify) except: print "WARNING: failed to load json from host" def get_local_auditor(self): return LocalFilesAuditor(self.audit_type, self.confdir, self.show_sample_content, self.dirsizes, self.depth, self.to_check, self.ignore_also, self.max_files) def audit_hosts(self): # do local audit instead if self.localaudit: localauditor = self.get_local_auditor() result = localauditor.do_local_audit() self.display_remote_host(result) return # proceed to regular remote audit self.set_up_runner() result = self.runner.run_remotely() if result is None: print "WARNING: failed to get output from audit script on any host" elif self.show_summary: self.display_summary(result) else: for host in result: print "host:", host if result[host]: self.display_remote_host(result[host]) else: if self.verbose: print "no output from host", host # add some results to rule store self.update_status_rules_from_report(result) return result def update_status_rules_from_report(self, report): hostlist = report.keys() for host in hostlist: try: problem_rules = clouseau.retention.utils.ruleutils.get_rules( self.cdb, host, Status.text_to_status('problem')) except: print 'WARNING: problem retrieving problem rules for host', host problem_rules = None if problem_rules is not None: existing_problems = [rule['path'] for rule in problem_rules] else: existing_problems = [] dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) if dirs_problem is not None: dirs_problem = list(set(dirs_problem)) for dirname in dirs_problem: clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir'), Status.text_to_status('problem'), host) if dirs_skipped is not None: dirs_skipped = list(set(dirs_skipped)) for dirname in dirs_skipped: if dirname in dirs_problem or dirname in existing_problems: # problem report overrides 'too many to audit' continue clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir'), Status.text_to_status('unreviewed'), host)
def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions( hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store()
class RemoteFilesAuditor(object): ''' audit files across a set of remote hosts, in a specified set of directories ''' def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions( hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store() def get_audit_args(self): audit_args = [self.confdir, self.show_sample_content, self.dirsizes, self.depth - 1, self.to_check, self.ignore_also, self.max_files] return audit_args def set_up_runner(self): self.runner = Runner(self.confdir, self.store_filepath, self.hosts_expr, self.expanded_hosts, self.audit_type, self.get_audit_args(), self.show_sample_content, self.to_check, self.timeout, self.verbose) def set_up_max_files(self, maxfiles): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if maxfiles is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 else: self.max_files = maxfiles def set_up_and_export_rule_store(self): hosts = self.cdb.store_db_list_all_hosts() destdir = os.path.join(os.path.dirname(self.store_filepath), "data_retention.d") if not os.path.isdir(destdir): os.makedirs(destdir, 0755) for host in hosts: all_destpath = os.path.join(destdir, host + "_store.yaml") clouseau.retention.utils.ruleutils.export_rules(self.cdb, all_destpath, host) good_destpath = os.path.join(destdir, host + "_store_good.yaml") clouseau.retention.utils.ruleutils.export_rules(self.cdb, good_destpath, host, Status.text_to_status('good')) def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname @staticmethod def get_dirname_from_warning(warning): ''' some audit output lines warn about directory trees having too many files to audit; grab the dirname out of such a line and return it ''' start = "WARNING: directory " if warning.startswith(start): # WARNING: directory %s has more than %d files rindex = warning.rfind(" has more than") if not rindex: return None else: return warning[len(start):rindex] start = "WARNING: too many files to audit in directory " if warning.startswith(start): return warning[len(start):] return None def add_stats(self, item, summary): ''' gather stats on how many files/dirs may be problematic; summary is where the results are collected, item is the item to include in the summary if needed ''' dirname = os.path.dirname(item['path']) if dirname not in summary: summary[dirname] = { 'binary': {'old': 0, 'maybe_old': 0, 'nonroot': 0}, 'text': {'old': 0, 'maybe_old': 0, 'nonroot': 0} } if item['binary'] is True: group = 'binary' else: group = 'text' if item['old'] == 'T': summary[dirname][group]['old'] += 1 elif item['old'] == '-': summary[dirname][group]['maybe_old'] += 1 if item['owner'] != 0: summary[dirname][group]['nonroot'] += 1 return summary def display_host_summary(self): if self.summary is not None: paths = sorted(self.summary.keys()) for path in paths: for group in self.summary[path]: if (self.summary[path][group]['old'] > 0 or self.summary[path][group]['maybe_old'] > 0 or self.summary[path][group]['nonroot'] > 0): print ("in directory %s, (%s), %d old," " %d maybe old, %d with non root owner" % (path, group, self.summary[path][group]['old'], self.summary[path][group]['maybe_old'], self.summary[path][group]['nonroot'])) def display_summary(self, result): for host in result: self.summary = {} print "host:", host if result[host]: try: lines = result[host].split('\n') for line in lines: if display_summary_line(line): continue else: try: item = json.loads( line, object_hook=JsonHelper.decode_dict) if item['empty'] is not True: self.add_stats(item, self.summary) except: print "WARNING: failed to json load from host", print host, "this line:", line self.display_host_summary() except: print "WARNING: failed to process output from host" else: if self.verbose: print "WARNING: no output from host", host def display_remote_host(self, result): try: lines = result.split('\n') files = [] for line in lines: if line == "": continue elif line.startswith("WARNING:") or line.startswith("INFO:"): print line else: files.append(json.loads(line, object_hook=JsonHelper.decode_dict)) if files == []: return path_justify = max([len(finfo['path']) for finfo in files]) + 2 for finfo in files: self.display_from_dict(finfo, self.show_sample_content, path_justify) except: print "WARNING: failed to load json from host" def get_local_auditor(self): return LocalFilesAuditor(self.audit_type, self.confdir, self.show_sample_content, self.dirsizes, self.depth, self.to_check, self.ignore_also, self.max_files) def audit_hosts(self): # do local audit instead if self.localaudit: localauditor = self.get_local_auditor() result = localauditor.do_local_audit() self.display_remote_host(result) return # proceed to regular remote audit self.set_up_runner() result = self.runner.run_remotely() if result is None: print "WARNING: failed to get output from audit script on any host" elif self.show_summary: self.display_summary(result) else: for host in result: print "host:", host if result[host]: self.display_remote_host(result[host]) else: if self.verbose: print "no output from host", host # add some results to rule store self.update_status_rules_from_report(result) return result def update_status_rules_from_report(self, report): hostlist = report.keys() for host in hostlist: try: problem_rules = clouseau.retention.utils.ruleutils.get_rules( self.cdb, host, Status.text_to_status('problem')) except: print 'WARNING: problem retrieving problem rules for host', host problem_rules = None if problem_rules is not None: existing_problems = [rule['path'] for rule in problem_rules] else: existing_problems = [] dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) if dirs_problem is not None: dirs_problem = list(set(dirs_problem)) for dirname in dirs_problem: clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype('dir'), Status.text_to_status('problem'), host) if dirs_skipped is not None: dirs_skipped = list(set(dirs_skipped)) for dirname in dirs_skipped: if dirname in dirs_problem or dirname in existing_problems: # problem report overrides 'too many to audit' continue clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype('dir'), Status.text_to_status('unreviewed'), host)