Exemplo n.º 1
0
label_file = '../data/HDFS/anomaly_label.csv'  # The anomaly label file
epsilon = 0.5  # threshold for estimating invariant space

if __name__ == '__main__':
    # Load structured log without label info
    train_test_tuple = dataloader.load_HDFS(struct_log,
                                            window='session',
                                            train_ratio=0.5,
                                            split_type='sequential')
    (x_train, _), (x_test, _) = train_test_tuple[0], train_test_tuple[1]
    # Feature extraction
    feature_extractor = preprocessing.FeatureExtractor()
    x_train = feature_extractor.fit_transform(x_train)

    # Model initialization and training
    model = InvariantsMiner(epsilon=epsilon)
    model.fit(x_train)

    # Predict anomalies on the training set offline, and manually check for correctness
    y_train = model.predict(x_train)

    # Predict anomalies on the test set to simulate the online mode
    # x_test may be loaded from another log file
    x_test = feature_extractor.transform(x_test)
    y_test = model.predict(x_test)

    # If you have labeled data, you can evaluate the accuracy of the model as well.
    # Load structured log with label info
    train_test_tuple = dataloader.load_HDFS(struct_log,
                                            label_file=label_file,
                                            window='session',
Exemplo n.º 2
0
struct_log = '../data/August.csv'  # The structured log file
label_file = None  # The anomaly label file
epsilon = 0.5  # threshold for estimating invariant space

if __name__ == '__main__':
    # Load structured log without label info
    (x_train, _), (x_test, _) = dataloader.load_HDFS(struct_log,
                                                     window='session',
                                                     train_ratio=1.0,
                                                     split_type='sequential')
    # Feature extraction
    feature_extractor = preprocessing.FeatureExtractor()
    x_train = feature_extractor.fit_transform(x_train)

    # Model initialization and training
    model = InvariantsMiner(epsilon=epsilon)
    model.fit(x_train)

    # Predict anomalies on the training set offline, and manually check for correctness
    y_train = model.predict(x_train)

    # Predict anomalies on the test set to simulate the online mode
    # x_test may be loaded from another log file
    x_test = feature_extractor.transform(x_test)
    #y_test = model.predict(x_test)

    # If you have labeled data, you can evaluate the accuracy of the model as well.
    # Load structured log with label info

    num_train = x_train.shape[0]
    num_test = x_test.shape[0]