print('########### Start Invariant Mining on Dataset ' + dataset + ' ###########') config.init('InvariantsMiner_' + dataset) if dataset == 'BGL': data_instances = config.BGL_data (x_train, y_train), (x_test, y_test), (_, _) = load_BGL(data_instances, 0.3, 0.6) if dataset == 'HDFS': data_instances = config.HDFS_data (x_train, y_train), (x_test, y_test), (_, _) = dataloader.load_HDFS( data_instances, train_ratio=0.3, split_type='uniform', test_ratio=0.6, is_data_instance=True) if dataset == 'OpenStack': data_train = config.OpenStack_train_data data_test = config.OpenStack_test_data (x_train, y_train), (_, _), (_, _) = dataloader.load_OpenStack( data_train, train_ratio=1, split_type='uniform', is_data_instance=True) (_, _), (x_test, y_test), (_, _) = dataloader.load_OpenStack( data_test, train_ratio=0, split_type='uniform', is_data_instance=True)
config.init('LSTM_' + dataset) checkpoint_name = config.path + FLAGS.checkpoint_name (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = (None, None), (None, None), (None, None) collector = None result_folder = config.path + FLAGS.result_folder if dataset == 'BGL': data_instances = config.BGL_data (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = load_BGL(data_instances, 0.35, 0.6) collector = Collector(result_folder, (1, 1, 1, 1), False, config.BGL_col_header, 100) if dataset == 'HDFS': data_instances = config.HDFS_data (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = dataloader.load_HDFS(data_instances, train_ratio=0.35, is_data_instance=True, test_ratio=0.6) collector = Collector(result_folder, (1, 1, 1, 1), False, config.HDFS_col_header, 100) assert FLAGS.h < FLAGS.plb lstm_preprocessor = preprocessing.LstmPreprocessor(x_train, x_test, x_validate) sym_count = len(lstm_preprocessor.vectors) - 1 print('Total symbols: %d' % sym_count) print(lstm_preprocessor.syms) # pad x_train x_train = [lstm_preprocessor.pad(t, FLAGS.plb) if len(t) < FLAGS.plb else t for t in x_train] # throw away anomalies & same event series in x_train x_train = lstm_preprocessor.process_train_inputs(x_train, y_train, FLAGS.h, True, FLAGS.no_repeat_series)
datasets = ['HDFS'] if __name__ == '__main__': for dataset in datasets: print('########### Start CNN on Dataset ' + dataset + ' ###########') config.init('CNN_' + dataset) train_dir = config.path + FLAGS.train_dir if dataset == 'HDFS': data_instances = config.HDFS_data (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = dataloader.load_HDFS( data_instances, train_ratio=0.3, is_data_instance=True, test_ratio=0.6, CNN_option=True) cnn_preprocessor = preprocessing.CNNPreprocessor( FLAGS.log_len, x_train, x_test, x_validate) sym_count = len(cnn_preprocessor.syms) - 1 print('Total symbols: %d' % sym_count) with tf.Session() as sess: model = CNN.CNN(sym_count, FLAGS.log_len) if tf.train.get_checkpoint_state(train_dir): print('== Reading model parameters from %s ==' % train_dir) model.saver.restore(sess,