def train_autoencoder(x_train: Dict, x_test: Dict, y_train: np.array,
                      y_test: np.array) -> Dict:
    fe = FeatureExtractor(method='tf-idf', preprocessing='mean')
    y_train = get_labels_from_csv(y_train, x_train.keys())
    y_test = get_labels_from_csv(y_test, x_test.keys())
    x_train = fe.fit_transform(x_train)
    x_test = fe.transform(x_test)

    model = AutoEncoder()
    n_experiments = 100
    params = {
        'epochs':
        np.random.choice(np.arange(1, 10), size=n_experiments).tolist(),
        'learning_rate':
        np.random.choice(10**np.linspace(-4, -0.1),
                         size=n_experiments).tolist(),
        'batch_size':
        np.random.choice([2**i for i in range(3, 8)],
                         size=n_experiments).tolist(),
        'input_dim': [48] * n_experiments,
        'layers':
        generate_layer_settings(n_experiments),
        'dropout':
        np.random.uniform(0, 0.5, size=n_experiments).tolist()
    }
    evaluated_hyperparams = random_search(
        (x_train[y_train == 0], x_test, None, y_test), model, params)
    return evaluated_hyperparams
Exemplo n.º 2
0
def train_autoencoder(x_train: Dict, x_test: Dict, y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict:
    fe = FeatureExtractor(method='tf-idf', preprocessing='mean')
    y_train = get_labels_from_csv(y_train, x_train.keys())
    y_test = get_labels_from_csv(y_test, x_test.keys())
    x_train = fe.fit_transform(x_train)
    x_test = fe.transform(x_test)

    model = AutoEncoder()

    experiments = load_experiment('../../models/AE-hyperparameters-Drain3-HDFS1.json')
    evaluated_hyperparams = random_search((x_train[y_train == 0], x_test, None, y_test), model, experiments)
    return evaluated_hyperparams
def evaluate_lof(x_train: Dict, x_test: Dict, y_test: np.array) -> Dict:
    fe = FeatureExtractor(method='tf-idf', preprocessing='mean')
    y_test = get_labels_from_csv(y_test, x_test.keys())
    fe.fit_transform(x_train)
    x_test = fe.transform(x_test)

    training_stats = load_experiment(
        '../../models/lof_baseline/experiments.json')
    score = evaluate_unsupervised(x_test, y_test,
                                  training_stats['experiments'])
    return score
Exemplo n.º 4
0
def train_iso_forest(x_train: Dict, x_test: Dict,  y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict:
    fe = FeatureExtractor(method='tf-idf', preprocessing='mean')
    y_test = get_labels_from_csv(y_test, x_test.keys())
    x_train = fe.fit_transform(x_train)
    x_test = fe.transform(x_test)

    clf = IsolationForest(bootstrap=True, n_jobs=os.cpu_count(), random_state=SEED)

    experiments = load_experiment('../../models/IF-hyperparameters-Drain3-HDFS1.json')
    evaluated_hyperparams = random_search_unsupervised((x_train, x_test, None, y_test), clf, experiments)
    return evaluated_hyperparams
Exemplo n.º 5
0
def train_lof(x_train: Dict, x_test: Dict,  y_train: pd.DataFrame, y_test: pd.DataFrame) -> Dict:
    fe = FeatureExtractor(method='tf-idf', preprocessing='mean')
    y_test = get_labels_from_csv(y_test, x_test.keys())
    fe.fit_transform(x_train)
    x_test = fe.transform(x_test)

    clf = LocalOutlierFactor(n_jobs=os.cpu_count())

    experiments = load_experiment('../../models/LOF-hyperparameters-Drain3-HDFS1.json')
    evaluated_hyperparams = random_search_unsupervised((None, x_test, None, y_test), clf, experiments)
    return evaluated_hyperparams