示例#1
0
    def validation_step(self, batch, batch_idx, dataloader_idx):
        x, y, lengths = batch
        # x = torch.squeeze(x, 0).type(torch.float32)
        # y = torch.squeeze(y, 0).type(torch.float32)
        output, _ = self(x)
        prefix = "val" if dataloader_idx else "train"

        loss = self.loss(output, y)

        auroc, auprc = compute_auc(y.cpu().detach().numpy(), torch.sigmoid(output.cpu()).detach().numpy(),
                                   self.hparams.num_classes)

        hard_prediction = probs_to_hard_predictions(output.cpu().detach().numpy(), self.hparams.num_classes)
        accuracy, f_measure, f_beta, g_beta = compute_beta_score(y.cpu().detach().numpy(), hard_prediction, 1, self.hparams.num_classes)

        return {"{}_loss".format(prefix): loss,
                "{}_auroc".format(prefix): torch.tensor(auroc),
                "{}_auprc".format(prefix): torch.tensor(auprc),
                "{}_accuracy".format(prefix): torch.tensor(accuracy),
                "{}_F_measure".format(prefix): torch.tensor(f_measure),
                "{}_F_beta".format(prefix): torch.tensor(f_beta),
                "{}_G_beta".format(prefix): torch.tensor(g_beta),
                "output": output.detach(),
                "y": y.detach()
                }
示例#2
0
 def test_auc(self):
     from sklearn.metrics import auc
     ranks, labels = self._gen_metric_data()
     res = metrics.compute_auc(ranks, labels)
     TPRs = [1 / 3, 2 / 3, 1]
     FPRs = [1 / 3, 2 / 3, 1]
     self.assertAllCloseAccordingToType(res['AUC'],
                                        auc(FPRs, TPRs, reorder=True))
示例#3
0
def xgboost_test(extractor, opt):
    import xgboost as xgb
    res = defaultdict(list)
    res_train = defaultdict(list)
    for study_num in range(7):
        #print(study_name)
        train_set, test_set = get_merged_common_dataset(opt,
                                                        skip_study=study_num)
        train_data, train_labels = get_data(train_set)
        val_data, val_labels = get_data(test_set)
        if True:
            train_features = extractor(train_data).detach().numpy()
            val_features = extractor(val_data).detach().numpy()
        else:
            train_features = train_data
            val_features = val_data
        # train the model
        model = xgb.XGBClassifier()
        clf = model.fit(train_features,
                        train_labels.astype(int),
                        eval_set=[(val_features, val_labels)],
                        early_stopping_rounds=50,
                        verbose=True,
                        eval_metric='auc')
        #model = LogisticRegression()
        #model = SVC(probability=True, class_weight='balanced')
        #clf = model.fit(train_features, train_labels.astype(int))

        print(val_data.shape)
        res['bias'].append(val_labels.sum() / len(val_labels))
        print(res['bias'][-1])
        y_pred = clf.predict_proba(val_features)[:, 1]
        x_pred = clf.predict_proba(train_features)[:, 1]
        compute_metrics(res, val_labels.flatten() > 0.5, y_pred > 0.5)
        compute_auc(res, val_labels.flatten() > 0.5, y_pred)
        compute_metrics(res_train, train_labels.flatten() > 0.5, x_pred > 0.5)
        compute_auc(res_train, train_labels.flatten() > 0.5, x_pred)
    for key in res_train:
        ave = numpy.asarray(res_train[key]).mean(axis=0)
        print('Train {0}: {1}'.format(key, ave))
    for key in res:
        ave = numpy.asarray(res[key]).mean(axis=0)
        print('Test {0}: {1}'.format(key, ave))
示例#4
0
def test_DFM_avazu(data, train, test):
    print("\nTesting DFM on avazu dataset...\n")

    results_activation_function = {"auc": [], "logloss": [], "rmse": []}
    results_dropout = {"auc": [], "logloss": [], "rmse": []}
    results_number_of_neurons = {"auc": [], "logloss": [], "rmse": []}

    auc = 0
    logloss = 0
    rmse = 0

    features_labels = train.columns

    sparse_features_labels = features_labels[1:23]
    target_label = features_labels[0]

    dnn_feature_columns = [
        SparseFeat(
            feat,
            vocabulary_size=data[feat].nunique(),
            embedding_dim=4,
        ) for feat in sparse_features_labels
    ]
    linear_feature_columns = [
        SparseFeat(
            feat,
            vocabulary_size=data[feat].nunique(),
            embedding_dim=4,
        ) for feat in sparse_features_labels
    ]

    feature_names = get_feature_names(linear_feature_columns +
                                      dnn_feature_columns)

    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}

    true_y = test[target_label].values

    print("\t\t-- ACTIVATION FUNCTIONS --\t\t")
    for dnn_activation in dnn_activation_list:
        print("\nTesting {dnn_activation}...".format(
            dnn_activation=dnn_activation))

        model = DeepFM(linear_feature_columns,
                       dnn_feature_columns,
                       dnn_activation=dnn_activation,
                       task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(
            train_model_input,
            train[target_label].values,
            batch_size=256,
            epochs=10,
            verbose=0,
            validation_split=TEST_PROPORTION,
        )
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_activation_function["auc"].append(auc)
        results_activation_function["logloss"].append(logloss)
        results_activation_function["rmse"].append(rmse)

    print("\t\t-- DROPOUT RATES --\t\t")
    for dnn_dropout in dnn_dropout_list:
        print("\nTesting {dnn_dropout}...".format(dnn_dropout=dnn_dropout))

        model = DeepFM(linear_feature_columns,
                       dnn_feature_columns,
                       dnn_dropout=dnn_dropout,
                       task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(
            train_model_input,
            train[target_label].values,
            batch_size=256,
            epochs=10,
            verbose=0,
            validation_split=TEST_PROPORTION,
        )
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_dropout["auc"].append(auc)
        results_dropout["logloss"].append(logloss)
        results_dropout["rmse"].append(rmse)

    print("\t\t-- HIDDEN UNITS --\t\t")
    for dnn_hidden_units in dnn_hidden_units_list:
        print("\nTesting {dnn_hidden_units}...".format(
            dnn_hidden_units=dnn_hidden_units))

        model = DeepFM(linear_feature_columns,
                       dnn_feature_columns,
                       dnn_hidden_units=dnn_hidden_units,
                       task='binary')
        model.compile(
            "adam",
            "binary_crossentropy",
            metrics=['binary_crossentropy'],
        )
        model.fit(
            train_model_input,
            train[target_label].values,
            batch_size=256,
            epochs=10,
            verbose=0,
            validation_split=TEST_PROPORTION,
        )
        pred_y = model.predict(test_model_input, batch_size=256)

        auc = compute_auc(true_y, pred_y)
        logloss = compute_log_loss(true_y, pred_y)
        rmse = compute_rmse(true_y, pred_y)

        results_number_of_neurons["auc"].append(auc)
        results_number_of_neurons["logloss"].append(logloss)
        results_number_of_neurons["rmse"].append(rmse)

    if PLOT:
        create_plots("DFM", "avazu", results_activation_function,
                     "Activation Function", "activation_func",
                     dnn_activation_list)
        create_plots("DFM", "avazu", results_dropout, "Dropout Rate",
                     "dropout", dnn_dropout_list)
        create_plots("DFM", "avazu", results_number_of_neurons,
                     "Number of Neurons per layer", "nr_neurons",
                     dnn_hidden_units_list)