示例#1
0
        print('########### Start Invariant Mining on Dataset ' + dataset +
              ' ###########')
        config.init('InvariantsMiner_' + dataset)

        if dataset == 'BGL':
            data_instances = config.BGL_data
            (x_train,
             y_train), (x_test,
                        y_test), (_, _) = load_BGL(data_instances, 0.3, 0.6)

        if dataset == 'HDFS':
            data_instances = config.HDFS_data
            (x_train, y_train), (x_test, y_test), (_,
                                                   _) = dataloader.load_HDFS(
                                                       data_instances,
                                                       train_ratio=0.3,
                                                       split_type='uniform',
                                                       test_ratio=0.6,
                                                       is_data_instance=True)
        if dataset == 'OpenStack':
            data_train = config.OpenStack_train_data
            data_test = config.OpenStack_test_data
            (x_train, y_train), (_, _), (_, _) = dataloader.load_OpenStack(
                data_train,
                train_ratio=1,
                split_type='uniform',
                is_data_instance=True)
            (_, _), (x_test, y_test), (_, _) = dataloader.load_OpenStack(
                data_test,
                train_ratio=0,
                split_type='uniform',
                is_data_instance=True)
        config.init('LSTM_' + dataset)
        checkpoint_name = config.path + FLAGS.checkpoint_name

        (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = (None, None), (None, None), (None, None)
        collector = None
        result_folder = config.path + FLAGS.result_folder
        if dataset == 'BGL':
            data_instances = config.BGL_data

            (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = load_BGL(data_instances, 0.35, 0.6)
            collector = Collector(result_folder, (1, 1, 1, 1), False, config.BGL_col_header, 100)

        if dataset == 'HDFS':
            data_instances = config.HDFS_data
            (x_train, y_train), (x_test, y_test), (x_validate, y_validate) = dataloader.load_HDFS(data_instances,
                                                                                                  train_ratio=0.35,
                                                                                                  is_data_instance=True,
                                                                                                  test_ratio=0.6)
            collector = Collector(result_folder, (1, 1, 1, 1), False, config.HDFS_col_header, 100)

        assert FLAGS.h < FLAGS.plb
        lstm_preprocessor = preprocessing.LstmPreprocessor(x_train, x_test, x_validate)
        sym_count = len(lstm_preprocessor.vectors) - 1
        print('Total symbols: %d' % sym_count)
        print(lstm_preprocessor.syms)

        # pad x_train
        x_train = [lstm_preprocessor.pad(t, FLAGS.plb) if len(t) < FLAGS.plb else t for t in x_train]

        # throw away anomalies & same event series in x_train
        x_train = lstm_preprocessor.process_train_inputs(x_train, y_train, FLAGS.h, True,
                                                         FLAGS.no_repeat_series)
示例#3
0
datasets = ['HDFS']

if __name__ == '__main__':
    for dataset in datasets:
        print('########### Start CNN on Dataset ' + dataset + ' ###########')
        config.init('CNN_' + dataset)
        train_dir = config.path + FLAGS.train_dir

        if dataset == 'HDFS':
            data_instances = config.HDFS_data
            (x_train, y_train), (x_test,
                                 y_test), (x_validate,
                                           y_validate) = dataloader.load_HDFS(
                                               data_instances,
                                               train_ratio=0.3,
                                               is_data_instance=True,
                                               test_ratio=0.6,
                                               CNN_option=True)

        cnn_preprocessor = preprocessing.CNNPreprocessor(
            FLAGS.log_len, x_train, x_test, x_validate)
        sym_count = len(cnn_preprocessor.syms) - 1
        print('Total symbols: %d' % sym_count)

        with tf.Session() as sess:

            model = CNN.CNN(sym_count, FLAGS.log_len)

            if tf.train.get_checkpoint_state(train_dir):
                print('== Reading model parameters from %s ==' % train_dir)
                model.saver.restore(sess,