def main(): loglines = preprocess_step(log_file, transforms_file, *read_lines_args, **read_lines_kwargs) if write_to_pickle_file: write_pickle_file(loglines, loglines_file) # loglines = read_pickle_file(loglines_file) # count cardinality; print unique lines if verbose and there are actually # transforms to apply log_cardinality(loglines, get_item=operator.attrgetter("processed"), item_title="Transform", verbose=False) gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs) # gen_templates = template_step(loglines, "stringmatch") # WIP if write_to_pickle_file: write_pickle_file(gen_templates, gen_templates_file) # gen_templates = read_pickle_file(gen_templates_file) eval_loglines = genapply_step(loglines, gen_templates, **genapply_kwargs) if write_to_pickle_file: write_pickle_file(eval_loglines, eval_loglines_file) # eval_loglines = read_pickle_file(eval_loglines_file) gen_windows = genwindow_step(eval_loglines, **gen_windows_kwargs) if write_to_pickle_file: write_pickle_file(gen_windows, gen_windows_file) # gen_windows = read_pickle_file(modelgen_windows_file) # gen_events = event_step(gen_windows, "fp_growth", **fp_growth_kwargs) # gen_events = event_step(gen_windows, "paris", **paris_kwargs) gen_events = event_step(gen_windows, "glove", **glove_kwargs) if write_to_pickle_file: write_pickle_file(gen_events, gen_events_file) # gen_events = read_pickle_file(gen_events_file) """ # pretty print template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]} e = [] for event in gen_events: ts = [] for template_id in event.template_ids: ts.append("%s: %s" % (template_id, template_d[template_id])) e.append(ts) from pprint import pformat logger.info("Discovered events:") logger.info("\n"+pformat(e)) """ timed_events = evalapply_step(gen_events, eval_loglines, **eval_apply_kwargs) write_pickle_file(timed_events, timed_events_file) # timed_events = read_pickle_file(timed_events_file) logger.info("Done!")
def main(): loglines = preprocess_step(log_file, transforms_file, *read_lines_args, **read_lines_kwargs) write_pickle_file(loglines, transformed_lines_file) #loglines = read_pickle_file(transformed_lines_file) gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs) #gen_templates = template_step(loglines, "stringmatch") # WIP write_pickle_file(gen_templates, templates_file) #gen_templates = read_pickle_file(templates_file) timed_templates = genapply_step(loglines, gen_templates) write_pickle_file(timed_templates, timed_templates_file) #timed_templates = read_pickle_file(timed_templates_file) modelgen_windows = genwindow_step(timed_templates, **modelgen_windows_kwargs) write_pickle_file(modelgen_windows, modelgen_windows_file) #modelgen_windows = read_pickle_file(modelgen_windows_file) gen_events = event_step(modelgen_windows, "fp_growth", **fp_growth_kwargs) #gen_events = event_step(modelgen_windows, "paris", **paris_kwargs) # WIP write_pickle_file(gen_events, events_file) #gen_events = read_pickle_file(events_file) """ # pretty print template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]} e = [] for event in gen_events: ts = [] for template_id in event.template_ids: #ts.append(template_id) ts.append("%s: %s" % (template_id, template_d[template_id].str)) e.append(ts) from pprint import pformat logger.info("Discovered events:") logger.info("\n"+pformat(e)) """ """ modeleval_windows = evalwindow_step(timed_templates, window_size) write_pickle_file(modeleval_windows, modeleval_windows_file) #modeleval_windows = read_pickle_file(modeleval_windows_file) """ timed_events = evalapply_step(gen_events, timed_templates, loglines) write_pickle_file(timed_events, timed_events_file) #timed_events = read_pickle_file(timed_events_file) logger.info("Done!")
def run_pipeline(options): read_lines_kwargs = {'transforms_file': options.transforms_file, 'gettime_auditd': options.auditd, 'type_template_auditd': options.auditd_templates_file, 'ts_start_index': options.ts_start_index, 'ts_end_index': options.ts_end_index, 'ts_format': options.ts_format, 'skip_num_chars': options.skip_num_chars, 'mp': options.mp,} loglines = [] log_files = [] if options.data_file: log_files.append(options.data_file) if options.data_dir: log_files.extend(glob.glob(os.path.join(options.data_dir, '*'))) if not log_files or (not options.data_file and not options.data_dir): raise RuntimeError('No input specified/available') for log_file in log_files: loglines.extend( preprocess_step( log_file, **read_lines_kwargs)) # count cardinality; print unique lines if verbose and there are actually # transforms to apply log_cardinality(loglines, get_item=operator.attrgetter('processed'), item_title='Transform', verbose=options.verbose and options.transforms_file) if options.save_intermediate: transformed_lines_file = os.path.join( options.pickle_cache_dir, "transformed_lines.pickle") write_pickle_file(loglines, transformed_lines_file) if read_lines_kwargs.get('type_template_auditd'): # Read in auditd template definitions templates = get_auditd_templates(options.auditd_templates_file) else: # Generate templates if options.template_gen == 'logcluster': logcluster_kwargs = {"support": str(options.template_support)} templates = template_step( loglines, "logcluster", **logcluster_kwargs) elif options.template_gen == 'stringmatch': templates = template_step(loglines, "stringmatch") # WIP else: raise NotImplementedError( '%s Template generation method not implemented' % options.template_gen) if options.save_intermediate: templates_file = os.path.join( options.pickle_cache_dir, "templates.pickle") write_pickle_file(templates, templates_file) log_cardinality(templates, item_key=operator.attrgetter('id'), item_title='Template', verbose=options.verbose) timed_templates = genapply_step(loglines, templates, **read_lines_kwargs) if options.save_intermediate: timed_templates_file = os.path.join( options.pickle_cache_dir, "timed_templates.pickle") write_pickle_file(timed_templates, timed_templates_file) modelgen_windows = genwindow_step(timed_templates, window_size=options.gwindow_time, tfidf_threshold=options.gtfidf_threshold) if options.save_intermediate: modelgen_windows_file = os.path.join( options.pickle_cache_dir, "modelgen_windows.pickle") write_pickle_file(modelgen_windows, modelgen_windows_file) if options.event_gen == 'fp-growth': fp_growth_kwargs = { "min_support": options.min_support, "iterations": options.iterations, "tfidf_threshold": options.tfidf_threshold} gen_events = event_step( modelgen_windows, "fp_growth", **fp_growth_kwargs) elif options.event_gen == 'paris': paris_kwargs = { "r_slack": options.r_slack, "num_iterations": options.num_iterations, "tau": options.tau} gen_events = event_step( modelgen_windows, "paris", **paris_kwargs) # WIP elif options.event_gen == 'glove': glove_kwargs = { 'num_components': options.num_components, 'glove_window': options.glove_window, 'epochs': options.epochs} gen_events = event_step( modelgen_windows, "glove", verbose=options.verbose, **glove_kwargs) elif options.event_gen == 'auditd': # ignore timed_templates and modelgen_window and pass templates to auditd-specific event generator gen_events = auditd.event_gen(templates) else: raise NotImplementedError('%s Not implemented' % options.event_gen) if options.save_intermediate: events_file = os.path.join(options.pickle_cache_dir, "events.pickle") write_pickle_file(gen_events, events_file) logger.info("Discovered events: %d" % len(gen_events)) if options.verbose: # Print events and their templates if read_lines_kwargs.get('type_template_auditd'): template_list = [(templates[template], template) for template in templates] else: template_list = [(template.id, template) for template in templates] template_d = { template_id: template for ( template_id, template) in template_list} e = [] for event in sorted(gen_events, key=lambda event:event.id): ts = ["event_id: %s" % event.id] for template_id in sorted(event.template_ids): ts.append("%s: %s" % (template_id, template_d[template_id])) e.append(ts) from pprint import pformat logger.info("\n" + pformat(e)) # compute how many times each template was used (i.e. how many events each template appears in) event_templates = ( template_d[template_id] for event in gen_events for template_id in event.template_ids) log_cardinality( event_templates, item_title='EventTemplate', item_key=operator.attrgetter('id'), verbose=options.verbose) timed_events = evalapply_step( gen_events, timed_templates, window_time=options.awindow_time, mp=options.mp) if options.save_intermediate: timed_events_file = os.path.join( options.pickle_cache_dir, "timed_events.pickle") write_pickle_file(timed_events, timed_events_file) logger.info("Timed events: %d" % len(timed_events)) log_cardinality( timed_events, item_title='TimedEvent', get_item=operator.attrgetter('event_id'), verbose=options.verbose) if options.verbose > 1: # Print timed event summary for -vv # sort timed_templates in ascending time order for te in timed_events: te.timed_templates.sort(key=lambda tt: tt.ts) if options.sort_events_key=='time': # sort timed events in ascending time order (of their first occurring timed_template) timed_event_key = lambda te: te.timed_templates[0].ts else: # sort timed events by event id, then by time order timed_event_key = lambda te: (te.event_id, te.timed_templates[0].ts) timed_events.sort(key=timed_event_key) e = [] for event in timed_events: s = strTimedEvent(event) e.append(s) logger.info("\n" + pformat(e)) logger.info("Done!")
def run_pipeline(options): read_lines_kwargs = { 'transforms_file': options.transforms_file, 'gettime_auditd': options.auditd, 'type_template_auditd': options.auditd_templates_file, 'ts_start_index': options.ts_start_index, 'ts_end_index': options.ts_end_index, 'ts_format': options.ts_format, 'skip_num_chars': options.skip_num_chars, 'mp': options.mp, } loglines = [] log_files = [] if options.data_file: log_files.append(options.data_file) if options.data_dir: log_files.extend(glob.glob(os.path.join(options.data_dir, '*'))) if not log_files or (not options.data_file and not options.data_dir): raise RuntimeError('No input specified/available') for log_file in log_files: loglines.extend(preprocess_step(log_file, **read_lines_kwargs)) # count cardinality; print unique lines if verbose and there are actually # transforms to apply log_cardinality(loglines, get_item=operator.attrgetter('processed'), item_title='Transform', verbose=options.verbose and options.transforms_file) if options.save_intermediate: transformed_lines_file = os.path.join(options.pickle_cache_dir, "transformed_lines.pickle") write_pickle_file(loglines, transformed_lines_file) if read_lines_kwargs.get('type_template_auditd'): # Read in auditd template definitions templates = get_auditd_templates(options.auditd_templates_file) else: # Generate templates if options.template_gen == 'logcluster': logcluster_kwargs = {"support": str(options.template_support)} templates = template_step(loglines, "logcluster", **logcluster_kwargs) elif options.template_gen == 'stringmatch': templates = template_step(loglines, "stringmatch") # WIP else: raise NotImplementedError( '%s Template generation method not implemented' % options.template_gen) if options.save_intermediate: templates_file = os.path.join(options.pickle_cache_dir, "templates.pickle") write_pickle_file(templates, templates_file) log_cardinality(templates, item_key=operator.attrgetter('id'), item_title='Template', verbose=options.verbose) timed_templates = genapply_step(loglines, templates, **read_lines_kwargs) if options.save_intermediate: timed_templates_file = os.path.join(options.pickle_cache_dir, "timed_templates.pickle") write_pickle_file(timed_templates, timed_templates_file) modelgen_windows = genwindow_step(timed_templates, window_size=options.gwindow_time, tfidf_threshold=options.gtfidf_threshold) if options.save_intermediate: modelgen_windows_file = os.path.join(options.pickle_cache_dir, "modelgen_windows.pickle") write_pickle_file(modelgen_windows, modelgen_windows_file) if options.event_gen == 'fp-growth': fp_growth_kwargs = { "min_support": options.min_support, "iterations": options.iterations, "tfidf_threshold": options.tfidf_threshold } gen_events = event_step(modelgen_windows, "fp_growth", **fp_growth_kwargs) elif options.event_gen == 'paris': paris_kwargs = { "r_slack": options.r_slack, "num_iterations": options.num_iterations, "tau": options.tau } gen_events = event_step(modelgen_windows, "paris", **paris_kwargs) # WIP elif options.event_gen == 'glove': glove_kwargs = { 'num_components': options.num_components, 'glove_window': options.glove_window, 'epochs': options.epochs } gen_events = event_step(modelgen_windows, "glove", verbose=options.verbose, **glove_kwargs) elif options.event_gen == 'auditd': # ignore timed_templates and modelgen_window and pass templates to # auditd-specific event generator gen_events = auditd.event_gen(templates) else: raise NotImplementedError('%s Not implemented' % options.event_gen) if options.save_intermediate: events_file = os.path.join(options.pickle_cache_dir, "events.pickle") write_pickle_file(gen_events, events_file) logger.info("Discovered events: %d" % len(gen_events)) if options.verbose: # Print events and their templates if read_lines_kwargs.get('type_template_auditd'): template_list = [(templates[template], template) for template in templates] else: template_list = [(template.id, template) for template in templates] template_d = { template_id: template for (template_id, template) in template_list } e = [] for event in sorted(gen_events, key=lambda event: event.id): ts = ["event_id: %s" % event.id] for template_id in sorted(event.template_ids): ts.append("%s: %s" % (template_id, template_d[template_id])) e.append(ts) from pprint import pformat logger.info("\n" + pformat(e)) # compute how many times each template was used (i.e. how many events # each template appears in) event_templates = (template_d[template_id] for event in gen_events for template_id in event.template_ids) log_cardinality(event_templates, item_title='EventTemplate', item_key=operator.attrgetter('id'), verbose=options.verbose) timed_events = evalapply_step(gen_events, timed_templates, window_time=options.awindow_time, mp=options.mp) if options.save_intermediate: timed_events_file = os.path.join(options.pickle_cache_dir, "timed_events.pickle") write_pickle_file(timed_events, timed_events_file) logger.info("Timed events: %d" % len(timed_events)) log_cardinality(timed_events, item_title='TimedEvent', get_item=operator.attrgetter('event_id'), verbose=options.verbose) if options.verbose > 1: # Print timed event summary for -vv # sort timed_templates in ascending time order for te in timed_events: te.timed_templates.sort(key=lambda tt: tt.ts) if options.sort_events_key == 'time': # sort timed events in ascending time order (of their first # occurring timed_template) timed_event_key = lambda te: te.timed_templates[0].ts else: # sort timed events by event id, then by time order timed_event_key = lambda te: (te.event_id, te.timed_templates[0].ts ) timed_events.sort(key=timed_event_key) e = [] for event in timed_events: s = strTimedEvent(event) e.append(s) logger.info("\n" + pformat(e)) logger.info("Done!")
def main(): loglines = preprocess_step( log_file, transforms_file, *read_lines_args, **read_lines_kwargs) if write_to_pickle_file: write_pickle_file(loglines, loglines_file) #loglines = read_pickle_file(loglines_file) # count cardinality; print unique lines if verbose and there are actually # transforms to apply log_cardinality(loglines, get_item=operator.attrgetter('processed'), item_title='Transform', verbose=False) gen_templates = template_step(loglines, "logcluster", **logcluster_kwargs) # gen_templates = template_step(loglines, "stringmatch") # WIP if write_to_pickle_file: write_pickle_file(gen_templates, gen_templates_file) #gen_templates = read_pickle_file(gen_templates_file) eval_loglines = genapply_step(loglines, gen_templates, **genapply_kwargs) if write_to_pickle_file: write_pickle_file(eval_loglines, eval_loglines_file) #eval_loglines = read_pickle_file(eval_loglines_file) gen_windows = genwindow_step(eval_loglines, **gen_windows_kwargs) if write_to_pickle_file: write_pickle_file(gen_windows, gen_windows_file) #gen_windows = read_pickle_file(modelgen_windows_file) #gen_events = event_step(gen_windows, "fp_growth", **fp_growth_kwargs) #gen_events = event_step(gen_windows, "paris", **paris_kwargs) gen_events = event_step(gen_windows, "glove", **glove_kwargs) if write_to_pickle_file: write_pickle_file(gen_events, gen_events_file) #gen_events = read_pickle_file(gen_events_file) """ # pretty print template_d = {template_id : template for (template_id, template) in [(template.id, template) for template in gen_templates]} e = [] for event in gen_events: ts = [] for template_id in event.template_ids: ts.append("%s: %s" % (template_id, template_d[template_id])) e.append(ts) from pprint import pformat logger.info("Discovered events:") logger.info("\n"+pformat(e)) """ timed_events = evalapply_step( gen_events, eval_loglines, **eval_apply_kwargs) write_pickle_file(timed_events, timed_events_file) #timed_events = read_pickle_file(timed_events_file) logger.info("Done!")