#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'Shilin He' from models import PCA as PCA from utils import data_loader as data_loader para = { 'path':'../../Data/SOSP_data/', # directory for input data 'log_seq_file_name':'rm_repeat_rawTFVector.txt', # filename for log sequence data file 'label_file_name':'rm_repeat_mlabel.txt', # filename for label data file 'fraction':0.95 } if __name__ == '__main__': raw_data, label_data = data_loader.hdfs_data_loader(para) weigh_data = PCA.weighting(raw_data) threshold, C = PCA.get_threshold(para, weigh_data) PCA.anomaly_detection(weigh_data, label_data, C, threshold)
#!/usr/bin/env python # -*- coding: UTF-8 -*- __author__ = 'Shilin He' from models import PCA as PCA from utils import data_loader as data_loader para={ 'path':'../../Data/BGL_data/', # directory for input data 'log_file_name':'BGL_MERGED.log', # filename for log data file 'log_event_mapping':'logTemplateMap.csv', # filename for log-event mapping. A list of event index, where each row represents a log 'save_path': '../time_windows/', # dir for saving sliding window data files to avoid splitting 'select_column':[0,4], # select the corresponding columns (label and time) in the raw log file 'window_size':3, # time window (unit: hour) 'step_size': 1, # step size (unit: hour) 'tf-idf': False, # tf-idf should set to false in BGL data since it can get better accuracy 'fraction':0.95 } if __name__ == '__main__': raw_data, event_mapping_data = data_loader.bgl_data_loader(para) event_count_matrix, labels = data_loader.bgl_preprocess_data(para, raw_data, event_mapping_data) weigh_data = PCA.weighting(para, event_count_matrix) threshold, C = PCA.get_threshold(para, weigh_data) PCA.anomaly_detection(weigh_data, labels, C, threshold)
import sys sys.path.append('../') from models import PCA as PCA from utils import data_loader as data_loader para = { 'path': '../../Data/BGL_data/', # directory for input data 'log_file_name': 'BGL_MERGED.log', # filename for log data file 'log_event_mapping': 'logTemplateMap.csv', # filename for log-event mapping. A list of event index, where each row represents a log 'save_path': '../time_windows/', # dir for saving sliding window data files to avoid splitting 'select_column': [ 0, 4 ], # select the corresponding columns (label and time) in the raw log file 'window_size': 3, # time window (unit: hour) 'step_size': 1, # step size (unit: hour) 'tf-idf': False, # tf-idf should set to false in BGL data since it can get better accuracy 'fraction': 0.95 } if __name__ == '__main__': raw_data, event_mapping_data = data_loader.bgl_data_loader(para) event_count_matrix, labels = data_loader.bgl_preprocess_data( para, raw_data, event_mapping_data) weigh_data = PCA.weighting(para, event_count_matrix) threshold, C = PCA.get_threshold(para, weigh_data) PCA.anomaly_detection(weigh_data, labels, C, threshold)
#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'Shilin He' from models import PCA as PCA from utils import data_loader as data_loader para = { 'path': '../../Data/SOSP_data/', # directory for input data 'log_seq_file_name': 'rm_repeat_rawTFVector.txt', # filename for log sequence data file 'label_file_name': 'rm_repeat_mlabel.txt', # filename for label data file 'fraction': 0.95 } if __name__ == '__main__': raw_data, label_data = data_loader.hdfs_data_loader(para) weigh_data = PCA.weighting(raw_data) threshold, C = PCA.get_threshold(para, weigh_data) PCA.anomaly_detection(weigh_data, label_data, C, threshold)