示例#1
0
def random_search_unsupervised(data_and_labels: tuple, model: Union[LocalOutlierFactor, IsolationForest],
                               params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for experiment in params['experiments']:
        model.set_params(**experiment['hyperparameters'])

        print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.')

        if isinstance(model, LocalOutlierFactor):
            y_pred = model.fit_predict(x_test)  # return labels
        else:
            model.fit(x_train)
            y_pred = model.predict(x_test)  # return labels

        y_pred = convert_predictions(y_pred)
        metrics_report(y_test, y_pred)

        model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4()))
        torch.save(model, model_path)

        res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], file_path=model_path)
        scores.append(res)
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {
        'experiments': scores
    }
示例#2
0
def random_search(data_and_labels: tuple, model: TransformerAutoEncoder,
                  params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in zip(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model current hyperparameters are: {kwargs}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred), kwargs))
        # visualize_distribution_with_labels(y_pred, y_test, to_file=False)
        from sklearn.metrics import confusion_matrix
        print(confusion_matrix(y_test, y_pred))
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {'experiments': scores}
def grid_search(data_and_labels: tuple, model: Union[LocalOutlierFactor,
                                                     IsolationForest],
                params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in itertools.product(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model (hyper)parameters are: {model.get_params()}.')

        if isinstance(model, LocalOutlierFactor):
            y_pred = model.fit_predict(x_test)
        else:
            model.fit(x_train)
            y_pred = model.predict(x_test)

        y_pred = convert_predictions(y_pred)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     model.get_params()))
    return {'experiments': scores}
示例#4
0
def random_search(data_and_labels: tuple, model: Union[AutoEncoder, VanillaTCN, AETCN, AECNN1D, CNN1D, CNN2D, TCNCNN1D,
                                                       SACNN1D, SACNN2D], params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for experiment in params['experiments']:
        model.set_params(**experiment['hyperparameters'])

        print(f'Model current hyperparameters are: {experiment["hyperparameters"]}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)

        model_path = create_model_path(DIR_TO_EXPERIMENTS, str(uuid.uuid4()))
        torch.save(model, model_path)

        res = create_experiment_report(get_metrics(y_test, y_pred), experiment['hyperparameters'], theta, model_path)
        scores.append(res)
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {
        'experiments': scores
    }
示例#5
0
def find_optimal_threshold(y_true: np.array, y_pred: np.array) -> tuple:
    ret = {}
    for th in set(y_pred[y_true == 1]):
        tmp = classify(y_pred, th)
        f1 = get_metrics(y_true, tmp)['f1_score']
        ret[th] = f1
    return max(ret.items(), key=lambda x: x[1])
def evaluate_unsupervised(x_test: np.ndarray, y_test: np.array,
                          experiments: Dict) -> Dict:
    model_config = find_best_model(experiments)

    model = torch.load(model_config['model_path'])

    if isinstance(model, LocalOutlierFactor):
        y_pred = model.fit_predict(x_test)  # return labels
    else:
        y_pred = model.predict(x_test)  # return labels

    y_pred = convert_predictions(y_pred)
    auc_score = roc_auc_score(y_test, y_pred)
    metrics_report(y_test, y_pred)
    return create_report(
        model_config, {
            **get_metrics(y_test, y_pred), 'auc_score': float(auc_score)
        })
def evaluate(x_test: np.ndarray, y_test: np.array, experiments: Dict) -> Dict:
    model_config = find_best_model(experiments)

    model = torch.load(model_config['model_path'])
    theta = model_config['threshold']

    y_pred = model.predict(x_test)  # return reconstruction errors

    np.savez('preds', y_pred=y_pred, y_test=y_test)
    auc_score = roc_auc_score(y_test, y_pred)

    y_pred = classify(y_pred, theta)
    metrics_report(y_test, y_pred)
    # print('# trainable params:', sum(p.numel() for p in model._model.parameters() if p.requires_grad), ',# params:', sum(p.numel() for p in model._model.parameters()))
    return create_report(
        model_config, {
            **get_metrics(y_test, y_pred), 'auc_score': float(auc_score)
        })
def random_search(data_and_labels: tuple, model: Union[AutoEncoder,
                                                       IsolationForest],
                  params: Dict) -> Dict:
    x_train, x_test, _, y_test = data_and_labels

    scores = []
    for conf in zip(*params.values()):
        kwargs = {k: val for k, val in zip(params.keys(), conf)}

        model.set_params(**kwargs)

        print(f'Model current hyperparameters are: {kwargs}.')

        model.fit(x_train)
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred), kwargs))
        create_checkpoint({'experiments': scores}, EXPERIMENT_PATH)
    return {'experiments': scores}
示例#9
0
def train_window(x_train: List, x_test: List, y_train: np.array,
                 y_test: np.array) -> Dict:
    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)

    scores = []
    for w in range(1, 50, 2):
        print('Window:', w)
        model = VanillaTCN(epochs=1, window=w)

        model.fit(x_train[y_train == 0])
        y_pred = model.predict(x_test)  # return reconstruction errors

        theta, f1 = find_optimal_threshold(y_test, y_pred)
        y_pred = classify(y_pred, theta)
        metrics_report(y_test, y_pred)
        scores.append(
            create_experiment_report(get_metrics(y_test, y_pred),
                                     {'window': w}))
        create_checkpoint(
            {'experiments': scores},
            '../../models/TCN-cropped-window-embeddings-HDFS1.json')
    return {'experiments': scores}
    #     noise = np.random.normal(0, 1, 2000).reshape((20, 100))
    #     x[i, ...] = org + noise
    #
    # org = np.random.randn(20, 100)
    # for i in range(49000, 50000):
    #     noise = np.random.exponential(1, 2000).reshape((20, 100))
    #     x[i, ...] = org + noise
    #     y[i] = 1
    #
    # x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, random_state=1)

    sc = CustomMinMaxScaler()
    x_train = sc.fit_transform(x_train).astype(np.float32)
    x_test = sc.transform(x_test).astype(np.float32)

    model = VanillaTCN(epochs=1, learning_rate=0.0001)
    model._initialize_model(20, [20], 7, 0.0)
    model.fit(x_train[y_train == 0])

    y_pred = model.predict(x_test)

    f1_max = 0
    for th in sorted(y_pred[y_test == 1]):
        tmp = np.zeros(shape=y_pred.shape)
        tmp[y_pred > th] = 1
        f1_max = max(f1_max, get_metrics(y_test, tmp)['f1_score'])
    print(f1_max)

    visualize_distribution_with_labels(y_pred, y_test)
    print(min(y_pred[y_test == 1]), max(y_pred[y_test == 1]))