Пример #1
0
def get_scores_full(labels, predictions, validation_test, total_training_loss,
                    total_validation_test_loss, epoch, comp_tar_pair_dataset,
                    fold_epoch_results):
    deep_dta_rm2 = get_rm2(np.asarray(labels), np.asarray(predictions))
    # deep_dta_aupr = get_aupr(np.asarray(labels), np.asarray(
    #    predictions))
    deep_dta_cindex = get_cindex(np.asarray(labels), np.asarray(predictions))
    deep_dta_mse = mse(np.asarray(labels), np.asarray(predictions))

    rmse_score = rmse(np.asarray(labels), np.asarray(predictions))
    pearson_score = pearson(np.asarray(labels), np.asarray(predictions))
    spearman_score = spearman(np.asarray(labels), np.asarray(predictions))
    ci_score = ci(np.asarray(labels), np.asarray(predictions))
    f1_score = f1(np.asarray(labels), np.asarray(predictions))
    ave_auc_score = average_AUC(np.asarray(labels), np.asarray(predictions))
    fold_epoch_results.append([
        deep_dta_rm2, deep_dta_cindex, deep_dta_mse, pearson_score,
        spearman_score, ci_score, f1_score, ave_auc_score
    ])
    print("Epoch:{}\tTraining Loss:{}\t{} Loss:{}".format(
        epoch, total_training_loss, validation_test,
        total_validation_test_loss))
    print("{} DeepDTA RM2:\t{}".format(validation_test, deep_dta_rm2))
    print("{} DeepDTA MSE\t{}".format(validation_test, deep_dta_mse))
    print("{} RMSE\t{}".format(validation_test, rmse_score))
    print("{} DeepDTA c-index\t{}".format(validation_test, deep_dta_cindex))
    print("{} Pearson:\t{}".format(validation_test, pearson_score))
    print("{} Spearman:\t{}".format(validation_test, spearman_score))
    print("{} Ci:\t{}".format(validation_test, ci_score))
    print("{} F1-Score:\t{}".format(validation_test, f1_score))
    print("{} Average_AUC:\t{}".format(validation_test, ave_auc_score))
Пример #2
0
    def test():
        model.eval()
        regression_classifier = "r"
        total_validation_loss = 0.0
        total_validation_count = 0
        validation_predictions = []
        validation_labels = []

        # h = model.init_hidden(args.batch_size)

        with torch.no_grad():  # torch.set_grad_enabled(False):
            for i, data in enumerate(test_loader):
                # print("Validation")
                val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                    val_comp_feature_vectors).to(device), Variable(
                        val_target_feature_vectors).to(device), Variable(
                            val_labels).to(device)
                total_validation_count += val_comp_feature_vectors.shape[0]
                # print(val_labels)
                # if val_comp_feature_vectors.shape[0] == args.batch_size:
                val_inputs = None
                val_y_pred = None

                # val_y_pred, h = model(val_comp_feature_vectors, val_target_feature_vectors, h)
                val_y_pred, h = model(val_comp_feature_vectors,
                                      val_target_feature_vectors)
                loss_val = criterion(val_y_pred.squeeze(), val_labels)
                total_validation_loss += float(loss_val.item())
                #print(len(val_y_pred))

                for item in val_labels:
                    validation_labels.append(float(item.item()))
                for item in val_y_pred:
                    validation_predictions.append(float(item.item()))

        # print( len(validation_predictions), len(validation_labels))
        if regression_classifier == "r":
            rmse_score = rmse(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
            pearson_score = pearson(np.asarray(validation_labels),
                                    np.asarray(validation_predictions))
            f1_score = f1(np.asarray(validation_labels),
                          np.asarray(validation_predictions))
            ave_auc_score = average_AUC(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))

            print(
                "Test RMSE:{}\tF1-Score:{}\tAverage_AUC:{}\tValidation Loss:{}"
                .format(rmse_score, f1_score, ave_auc_score,
                        total_validation_loss))

        reporter(mean_loss=total_validation_loss, mean_accuracy=f1_score)
Пример #3
0
    def _test(self):
        self.model.eval()
        regression_classifier = "r"
        total_validation_loss = 0.0
        total_validation_count = 0
        validation_predictions = []
        validation_labels = []

        with torch.no_grad():  # torch.set_grad_enabled(False):
            for i, data in enumerate(self.test_loader):
                # print("Validation")
                val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                    val_comp_feature_vectors).to(
                        self.device), Variable(val_target_feature_vectors).to(
                            self.device), Variable(val_labels).to(self.device)
                # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1)
                total_validation_count += val_comp_feature_vectors.shape[0]
                # print(val_comp_feature_vectors)
                # print(val_labels)
                val_inputs = None
                val_y_pred = None
                concat_models = [""]
                # print(self.model.parameters)
                modeltype = None
                if modeltype in concat_models:
                    val_inputs = torch.cat(
                        (val_comp_feature_vectors, val_target_feature_vectors),
                        1)
                    val_y_pred = self.model(val_inputs)
                else:
                    # Forward pass: Compute predicted y by passing x to the model
                    # print("girdi")
                    val_y_pred = self.model(val_comp_feature_vectors,
                                            val_target_feature_vectors)

                # print(val_y_pred)
                criterion = torch.nn.MSELoss()
                loss_val = criterion(val_y_pred.squeeze(), val_labels)
                total_validation_loss += float(loss_val.item())

                for item in val_y_pred:
                    # regression icin
                    # validation_predictions.append(float(item.data[0]))
                    # classification icin
                    if regression_classifier == "r":
                        validation_predictions.append(float(item.data[0]))
                    else:
                        validation_predictions.append(
                            int(float(item.data[0]) >= 0.5))
                    # print(item.data[0], int(float(item.data[0])>=0.5))
                    # print("real pred", float(item.data[0]))
                    # print("loggedpred", -math.log10(10e-10*float(item.data[0])))
                    # validation_predictions.append(-math.log10(10e-10*float(item.data[0])))
                for item in val_labels:
                    # regression icin
                    # validation_labels.append(float(item.data[0]))
                    # classification icin
                    if regression_classifier == "r":
                        validation_labels.append(float(item.item()))
                    else:
                        validation_labels.append(int(item.data[0]))

                    # validation_labels.append(-math.log10(10e-10*float(item.data[0])))
        # print("validation predictions", validation_predictions)
        # print("validation labels", validation_labels)
        if regression_classifier == "r":
            rmse_score = rmse(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
            pearson_score = pearson(np.asarray(validation_labels),
                                    np.asarray(validation_predictions))
            # spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions))
            # ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions))
            f1_score = f1(np.asarray(validation_labels),
                          np.asarray(validation_predictions))
            ave_auc_score = average_AUC(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))
            print(
                "================================================================================"
            )
            print("Fold:{}\tEpoch:{}\tTest RMSE:{}\tValidation Loss:{}".format(
                0 + 1, 0, rmse_score, total_validation_loss))
            print("RMSE:\t{}".format(
                rmse_score))  # rmse, pearson, spearman, ci, ci, average_AUC
            #print("Pearson:\t{}".format(pearson_score))
            #print("Spearman:\t{}".format(spearman_score))
            #print("Ci:\t{}".format(ci_score))
            print("F1-Score:\t{}".format(f1_score))
            print("Average_AUC:\t{}".format(ave_auc_score))
            # print("IDG File:\t{}".format(comp_tar_pair_dataset))
            #print("Number of training samples:\t{}".format(total_training_count))
            # print("Number of validation samples:\t{}".format(total_validation_count))

            return {"RMSE": rmse_score, "F1-Score": f1_score}
        else:
            f1_score = sklearn.metrics.f1_score(validation_labels,
                                                validation_predictions)
            accuracy_score = sklearn.metrics.accuracy_score(
                validation_labels, validation_predictions)
            print(
                "================================================================================"
            )
            print("Fold:{}\tEpoch:{}\tTest F1:{}\tValidation Loss:{}".format(
                0 + 1, 0, f1_score, total_validation_loss))

            print("F1 Score:\t{}".format(f1_score))
            print("Accuracy:\t{}.".format(accuracy_score))

        return {
            "neg_mean_loss": -1 * total_validation_loss,
            "mean_accuracy": accuracy_score
        }
def train_networks(mod, comp_feat, tar_feat, comp_hidden_lst, tar_hidden_lst,
                   fc1, fc2, lr, comp_tar_pair_dataset, regression_classifier):
    torch.manual_seed(1)
    modeltype = mod
    comp_feature_list = comp_feat.split("_")
    tar_feature_list = tar_feat.split("_")
    comp_hidden_lst = [int(neuron) for neuron in comp_hidden_lst.split("_")]
    tar_hidden_lst = [int(neuron) for neuron in tar_hidden_lst.split("_")]
    fc1 = int(fc1)
    fc2 = int(fc2)
    learn_rate = float(lr)
    print(modeltype, comp_feature_list, tar_feature_list, fc1, fc2, learn_rate)
    #learn_rate = sys.argv[2]
    n_epoch = 10
    num_of_folds = 1
    batch_size = 64

    # comp_tar_pair_dataset = "idg_comp_targ_uniq_inter_filtered.csv"
    comp_tar_pair_test_dataset = "comp_targ_affinity.csv"

    use_gpu = torch.cuda.is_available()

    device = "cpu"

    if use_gpu:
        print("GPU is available on this device!")
        device = "cuda"
    else:
        print("CPU is available on this device!")
    device = "cpu"
    # comp_tar_pair_dataset = "dummy_Dtc_comp_targ_uniq_inter_filtered_onlykinase.txt"

    # comp_feature_list = ["comp_dummy_feat_1", "comp_dummy_feat_2"]
    # tar_feature_list = ["prot_dummy_feat_1", "prot_dummy_feat_2"]
    # comp_feature_list = ["comp_dummy_feat_1"]
    # tar_feature_list = ["prot_dummy_feat_1"]
    # comp_feature_list = ["ecfp4", "fcfp4", "rdk5"]
    # tar_feature_list = ["k-sep-bigrams", "APAAC", "DDE", "pfam", "spmap_final"]
    # comp_feature_list = ["ecfp4"]
    # tar_feature_list = ["k-sep-bigrams"]

    if final_training:
        train_loader, number_of_comp_features, number_of_target_features = get_full_training_data_loader(
            batch_size, comp_feature_list, tar_feature_list,
            comp_tar_pair_dataset, regression_classifier)

        test_loader = get_test_loader_challenge(comp_feature_list,
                                                tar_feature_list)
        test_predictions = []
        original_number_of_comp_features = int(number_of_comp_features)
        original_number_of_target_features = int(number_of_target_features)

        print(original_number_of_comp_features,
              original_number_of_target_features)

        total_number_of_features = number_of_comp_features + number_of_target_features
        concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"]

        number_of_comp_features = original_number_of_comp_features
        number_of_target_features = original_number_of_target_features
        model = None
        if modeltype == "FC1":
            model = FCModel1(total_number_of_features).to(device)
        elif modeltype == "FC1M":
            model = FCModel1_M(total_number_of_features).to(device)
        elif modeltype == "FC2":
            model = FCModel2(total_number_of_features).to(device)
        elif modeltype == "FC3":
            model = FCModel_3_Hidden(total_number_of_features, 1024, 400, 200,
                                     0.5).to(device)
        elif modeltype == "FC3M":
            model = FCModel_3_Hidden_with_Modules(total_number_of_features,
                                                  1024, 400, 200,
                                                  0.5).to(device)
        else:
            model = FC_PINNModel_2_2_2_Modules(
                number_of_comp_features, comp_hidden_lst[0],
                comp_hidden_lst[1], number_of_target_features,
                tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2,
                regression_classifier).to(device)
        # print(model.parameters)
        #optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=learn_rate,
                                    momentum=0.507344802825)
        criterion = torch.nn.MSELoss()
        optimizer.zero_grad()

        for epoch in range(n_epoch):

            total_training_loss = 0.0
            total_training_count = 0
            batch_number = 0
            model.train()
            for i, data in enumerate(train_loader):

                batch_number += 1
                # get the inputs
                comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data
                # wrap them in Variable
                comp_feature_vectors, target_feature_vectors, labels = Variable(
                    comp_feature_vectors).to(device), Variable(
                        target_feature_vectors).to(device), Variable(
                            labels).to(device)
                inputs = None
                y_pred = None

                total_training_count += comp_feature_vectors.shape[0]
                if modeltype in concat_models:
                    inputs = torch.cat(
                        (comp_feature_vectors, target_feature_vectors), 1)
                    y_pred = model(inputs)
                else:
                    # Forward pass: Compute predicted y by passing x to the model
                    y_pred = model(comp_feature_vectors,
                                   target_feature_vectors)
                # Compute and print loss
                loss = criterion(y_pred.squeeze(), labels)
                total_training_loss += float(loss.data[0])

                loss.backward()
                optimizer.step()
                # clear gradient DO NOT forget you fool!
                optimizer.zero_grad()
            print("Epoch: {}, Loss: {}".format(epoch, total_training_loss))

        model.eval()
        with torch.no_grad():  # torch.set_grad_enabled(False):
            for i, data in enumerate(test_loader):
                # print("Validation")
                test_comp_feature_vectors, test_target_feature_vectors, test_compound_ids, test_target_ids, test_number_of_comp_features, test_number_of_target_features = data
                test_comp_feature_vectors, test_target_feature_vectors = Variable(
                    test_comp_feature_vectors).to(device), Variable(
                        test_target_feature_vectors).to(device)
                # print(test_compound_ids)

                test_inputs = None
                test_y_pred = None

                if modeltype in concat_models:
                    test_inputs = torch.cat((test_comp_feature_vectors,
                                             test_target_feature_vectors), 1)
                    test_y_pred = model(test_inputs)
                else:
                    # Forward pass: Compute predicted y by passing x to the model
                    test_y_pred = model(test_comp_feature_vectors,
                                        test_target_feature_vectors)

                for item in test_y_pred:
                    test_predictions.append([
                        test_compound_ids[0], test_target_ids[0],
                        float(item.data[0])
                    ])
            for pred in test_predictions:
                print("{}\t{}\t{}".format(pred[0], pred[1], pred[2]))
            # print(test_predictions)

    else:
        loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict(
            num_of_folds, batch_size, comp_feature_list, tar_feature_list,
            comp_tar_pair_dataset, regression_classifier)
        test_loader = get_test_loader(comp_feature_list, tar_feature_list,
                                      comp_tar_pair_test_dataset)

        original_number_of_comp_features = int(number_of_comp_features)
        original_number_of_target_features = int(number_of_target_features)

        print(original_number_of_comp_features,
              original_number_of_target_features)

        total_number_of_features = number_of_comp_features + number_of_target_features
        # feature_lst = ["tri_gram", "spmap", "pfam", "k_sep_bigrams", "DDE", "APAAC"]
        # feature_lst = ["k_sep_bigrams", "APAAC"]

        concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"]
        rmse_fold_lst = [-100000.0 for i in range(num_of_folds)]
        pearson_fold_lst = [-100000.0 for i in range(num_of_folds)]
        spearman_fold_lst = [-100000.0 for i in range(num_of_folds)]
        ci_fold_lst = [-100000.0 for i in range(num_of_folds)]
        f1_fold_lst = [-100000.0 for i in range(num_of_folds)]
        auc_fold_lst = [-100000.0 for i in range(num_of_folds)]

        for fold in range(num_of_folds):
            train_loader, valid_loader = loader_fold_dict[fold]
            # Just to check if everything is OK.
            # Remove this when you finish testing.
            print("FOLD : {}".format(fold + 1))
            #print(len(train_loader), len(valid_loader))
            number_of_comp_features = original_number_of_comp_features
            number_of_target_features = original_number_of_target_features
            model = None
            if modeltype == "FC1":
                model = FCModel1(total_number_of_features).to(device)
            elif modeltype == "FC1M":
                model = FCModel1_M(total_number_of_features).to(device)
            elif modeltype == "FC2":
                model = FCModel2(total_number_of_features).to(device)
            elif modeltype == "FC3":
                model = FCModel_3_Hidden(total_number_of_features, 1024, 400,
                                         200, 0.5).to(device)
            elif modeltype == "FC3M":
                model = FCModel_3_Hidden_with_Modules(total_number_of_features,
                                                      1024, 400, 200,
                                                      0.5).to(device)
            else:
                model = FC_PINNModel_2_2_2(
                    number_of_comp_features, comp_hidden_lst[0],
                    comp_hidden_lst[1], number_of_target_features,
                    tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2,
                    regression_classifier).to(device)
            # print(model.parameters)
            #optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=learn_rate,
                                        momentum=0.507344802825)
            criterion = torch.nn.MSELoss()
            optimizer.zero_grad()

            for epoch in range(n_epoch):

                total_training_loss = 0.0
                total_validation_loss = 0.0
                total_training_count = 0
                total_validation_count = 0
                validation_predictions = []
                test_predictions = []
                validation_labels = []
                batch_number = 0
                model.train()
                for i, data in enumerate(train_loader):

                    batch_number += 1
                    # get the inputs
                    comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data
                    # wrap them in Variable
                    comp_feature_vectors, target_feature_vectors, labels = Variable(
                        comp_feature_vectors).to(device), Variable(
                            target_feature_vectors).to(device), Variable(
                                labels).to(device)
                    inputs = None
                    y_pred = None

                    total_training_count += comp_feature_vectors.shape[0]
                    if modeltype in concat_models:
                        inputs = torch.cat(
                            (comp_feature_vectors, target_feature_vectors), 1)
                        y_pred = model(inputs)
                    else:
                        # Forward pass: Compute predicted y by passing x to the model
                        y_pred = model(comp_feature_vectors,
                                       target_feature_vectors)
                    # Compute and print loss
                    loss = criterion(y_pred.squeeze(), labels)
                    total_training_loss += float(loss.data[0])

                    loss.backward()
                    optimizer.step()
                    # clear gradient DO NOT forget you fool!
                    optimizer.zero_grad()

                model.eval()
                with torch.no_grad():  # torch.set_grad_enabled(False):
                    for i, data in enumerate(valid_loader):
                        #print("Validation")
                        val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                        val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                            val_comp_feature_vectors).to(device), Variable(
                                val_target_feature_vectors).to(
                                    device), Variable(val_labels).to(device)
                        # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1)
                        total_validation_count += val_comp_feature_vectors.shape[
                            0]

                        val_inputs = None
                        val_y_pred = None

                        if modeltype in concat_models:
                            val_inputs = torch.cat(
                                (val_comp_feature_vectors,
                                 val_target_feature_vectors), 1)
                            val_y_pred = model(val_inputs)
                        else:
                            # Forward pass: Compute predicted y by passing x to the model
                            val_y_pred = model(val_comp_feature_vectors,
                                               val_target_feature_vectors)

                        # print(val_y_pred)
                        loss_val = criterion(val_y_pred.squeeze(), val_labels)
                        total_validation_loss += float(loss_val.data[0])

                        for item in val_y_pred:
                            validation_predictions.append(float(item.data[0]))
                        for item in val_labels:
                            validation_labels.append(float(item.data[0]))

                    for i, data in enumerate(test_loader):
                        #print("Validation")
                        test_comp_feature_vectors, test_target_feature_vectors, test_compound_ids, test_target_ids, test_number_of_comp_features, test_number_of_target_features = data
                        test_comp_feature_vectors, test_target_feature_vectors = Variable(
                            test_comp_feature_vectors).to(device), Variable(
                                test_target_feature_vectors).to(device)
                        #print(test_compound_ids)

                        test_inputs = None
                        test_y_pred = None

                        if modeltype in concat_models:
                            test_inputs = torch.cat(
                                (test_comp_feature_vectors,
                                 test_target_feature_vectors), 1)
                            test_y_pred = model(test_inputs)
                        else:
                            # Forward pass: Compute predicted y by passing x to the model
                            test_y_pred = model(test_comp_feature_vectors,
                                                test_target_feature_vectors)

                        for item in test_y_pred:
                            test_predictions.append([
                                test_compound_ids[0], test_target_ids[0],
                                float(item.data[0])
                            ])

                    print(test_predictions)
                rmse_score = rmse(np.asarray(validation_labels),
                                  np.asarray(validation_predictions))
                pearson_score = pearson(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))
                spearman_score = spearman(np.asarray(validation_labels),
                                          np.asarray(validation_predictions))
                ci_score = ci(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                f1_score = f1(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                ave_auc_score = average_AUC(np.asarray(validation_labels),
                                            np.asarray(validation_predictions))
                print(
                    "================================================================================"
                )
                print(
                    "Fold:{}, Epoch:{}, Training Loss:{}, Validation Loss:{}".
                    format(fold + 1, epoch, total_training_loss,
                           total_validation_loss))
                print("RMSE:\t{}".format(rmse_score)
                      )  # rmse, pearson, spearman, ci, ci, average_AUC
                print("Pearson:\t{}".format(pearson_score))
                print("Spearman:\t{}".format(spearman_score))
                print("Ci:\t{}".format(ci_score))
                print("F1-Score:\t{}".format(f1_score))
                print("Average_AUC:\t{}".format(ave_auc_score))
                print("Number of training samples:\t{}".format(
                    total_training_count))
                print("Number of validation samples:\t{}".format(
                    total_validation_count))

                rmse_fold_lst[fold] = rmse_score
                pearson_fold_lst[fold] = pearson_score
                spearman_fold_lst[fold] = spearman_score
                ci_fold_lst[fold] = ci_score
                f1_fold_lst[fold] = f1_score
                auc_fold_lst[fold] = ave_auc_score

        average_rmse_fold = sum(rmse_fold_lst) / num_of_folds
        average_pearson_fold = sum(pearson_fold_lst) / num_of_folds
        average_spearman_fold = sum(spearman_fold_lst) / num_of_folds
        average_ci_fold = sum(ci_fold_lst) / num_of_folds
        average_f1_fold = sum(f1_fold_lst) / num_of_folds
        average_auc_fold = sum(auc_fold_lst) / num_of_folds

        print(
            "-----------------------------------------------------------------"
        )
        print(
            "ave_result\tmodeltype\tcomp_feature_list\ttar_feature_list\tcomp_hidden_lst\ttar_hidden_lst\tfc1\tfc2\tlearn_rate\tave_rmse\tave_pearson\tave_spearman\taverage_cie\taverage_f1score\tave_ave_auc"
        )
        print(
            "average_results\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}"
            .format(modeltype, comp_feature_list, tar_feature_list,
                    comp_hidden_lst, tar_hidden_lst, fc1, fc2, learn_rate,
                    average_rmse_fold, average_pearson_fold,
                    average_spearman_fold, average_ci_fold, average_f1_fold,
                    average_auc_fold))
        """
Пример #5
0
def train_networks(mod, comp_feat, tar_feat, comp_hidden_lst, tar_hidden_lst,
                   fc1, fc2, lr, comp_tar_pair_dataset, regression_classifier):

    modeltype = mod
    torch.manual_seed(1)
    comp_feature_list = comp_feat.split("_")
    tar_feature_list = tar_feat.split("_")
    comp_hidden_lst = [int(neuron) for neuron in comp_hidden_lst.split("_")]
    tar_hidden_lst = [int(neuron) for neuron in tar_hidden_lst.split("_")]
    fc1 = int(fc1)
    fc2 = int(fc2)
    learn_rate = float(lr)
    print(modeltype, comp_feature_list, tar_feature_list, fc1, fc2, learn_rate)
    #learn_rate = sys.argv[2]
    n_epoch = 100
    num_of_folds = 5
    batch_size = 64

    #comp_tar_pair_dataset = "idg_comp_targ_uniq_inter_filtered.csv"

    use_gpu = torch.cuda.is_available()

    device = "cpu"

    if use_gpu:
        print("GPU is available on this device!")
        device = "cuda"
    else:
        print("CPU is available on this device!")

    # comp_tar_pair_dataset = "dummy_Dtc_comp_targ_uniq_inter_filtered_onlykinase.txt"

    # comp_feature_list = ["comp_dummy_feat_1", "comp_dummy_feat_2"]
    # tar_feature_list = ["prot_dummy_feat_1", "prot_dummy_feat_2"]
    # comp_feature_list = ["comp_dummy_feat_1"]
    # tar_feature_list = ["prot_dummy_feat_1"]
    # comp_feature_list = ["ecfp4", "fcfp4", "rdk5"]
    # tar_feature_list = ["k-sep-bigrams", "APAAC", "DDE", "pfam", "spmap_final"]
    # comp_feature_list = ["ecfp4"]
    # tar_feature_list = ["k-sep-bigrams"]

    loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict(
        num_of_folds, batch_size, comp_feature_list, tar_feature_list,
        comp_tar_pair_dataset, regression_classifier)

    original_number_of_comp_features = int(number_of_comp_features)
    original_number_of_target_features = int(number_of_target_features)

    print(original_number_of_comp_features, original_number_of_target_features)

    total_number_of_features = number_of_comp_features + number_of_target_features
    # feature_lst = ["tri_gram", "spmap", "pfam", "k_sep_bigrams", "DDE", "APAAC"]
    # feature_lst = ["k_sep_bigrams", "APAAC"]

    concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"]
    rmse_fold_lst = [-100000.0 for i in range(num_of_folds)]
    pearson_fold_lst = [-100000.0 for i in range(num_of_folds)]
    spearman_fold_lst = [-100000.0 for i in range(num_of_folds)]
    ci_fold_lst = [-100000.0 for i in range(num_of_folds)]
    f1_fold_lst = [-100000.0 for i in range(num_of_folds)]
    auc_fold_lst = [-100000.0 for i in range(num_of_folds)]

    for fold in range(num_of_folds):
        train_loader, valid_loader = loader_fold_dict[fold]
        # Just to check if everything is OK.
        # Remove this when you finish testing.
        print("FOLD : {}".format(fold + 1))
        #print(len(train_loader), len(valid_loader))
        number_of_comp_features = original_number_of_comp_features
        number_of_target_features = original_number_of_target_features
        model = None
        if modeltype == "FC1":
            model = FCModel1(total_number_of_features).to(device)
        elif modeltype == "FC1M":
            model = FCModel1_M(total_number_of_features).to(device)
        elif modeltype == "FC2":
            model = FCModel2(total_number_of_features).to(device)
        elif modeltype == "FC3":
            model = FCModel_3_Hidden(total_number_of_features, 1024, 400, 200,
                                     0.5).to(device)
        elif modeltype == "FC3M":
            model = FCModel_3_Hidden_with_Modules(total_number_of_features,
                                                  1024, 400, 200,
                                                  0.5).to(device)
        elif modeltype == "PINN_2_2":
            model = FC_PINNModel_2_2_2_Modules(
                number_of_comp_features, comp_hidden_lst[0],
                comp_hidden_lst[1], number_of_target_features,
                tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2,
                regression_classifier).to(device)
        elif modeltype == "PINN_2_3":
            model = FC_PINNModel_2_3_2_Modules(
                number_of_comp_features, comp_hidden_lst[0],
                comp_hidden_lst[1], number_of_target_features,
                tar_hidden_lst[0], tar_hidden_lst[1], tar_hidden_lst[2], fc1,
                fc2).to(device)
        # print(model.parameters)
        # optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=learn_rate,
                                    momentum=0.507344802825)
        criterion = None
        if regression_classifier == "r":
            criterion = torch.nn.MSELoss()
        else:
            criterion = torch.nn.BCELoss()
        optimizer.zero_grad()

        for epoch in range(n_epoch):

            total_training_loss = 0.0
            total_validation_loss = 0.0
            total_training_count = 0
            total_validation_count = 0
            validation_predictions = []
            validation_labels = []
            batch_number = 0
            model.train()
            for i, data in enumerate(train_loader):

                batch_number += 1
                # get the inputs
                comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data
                # wrap them in Variable
                comp_feature_vectors, target_feature_vectors, labels = Variable(
                    comp_feature_vectors).to(device), Variable(
                        target_feature_vectors).to(device), Variable(
                            labels).to(device)
                # print(labels)
                inputs = None
                y_pred = None

                total_training_count += comp_feature_vectors.shape[0]
                if modeltype in concat_models:
                    inputs = torch.cat(
                        (comp_feature_vectors, target_feature_vectors), 1)
                    y_pred = model(inputs)
                else:
                    # Forward pass: Compute predicted y by passing x to the model
                    y_pred = model(comp_feature_vectors,
                                   target_feature_vectors)

                # Compute and print loss
                # loss = criterion(y_pred.squeeze(), labels)
                # print(y_pred, labels)

                # print(len(weights), len(labels))

                weights = []

                binary_labels = preprocessing.binarize(labels.reshape(1, -1),
                                                       threshold=7.0,
                                                       copy=False)[0]

                if regression_classifier == "c":
                    for lbl in labels:
                        weights.append([3, 1])
                if regression_classifier == "r":
                    for lbl in labels:
                        if int(lbl) == 1:
                            weights.append(1000)
                        else:
                            weights.append(1)

                weights = torch.FloatTensor(weights).to(device)
                # print(labels)
                # print(weights)
                # print(binary_labels)
                loss = None
                if regression_classifier == "r":
                    if len(weights) == 64:
                        criterion.weight = weights
                    else:
                        criterion.weight = None

                    loss = criterion(y_pred.squeeze(), labels)
                else:
                    """
                    if len(weights)==64:
                        criterion.weight = weights
                    else:
                        criterion.weight=None
                    """
                    loss = criterion(y_pred.squeeze(), labels)
                # print(loss)
                total_training_loss += float(loss.data[0])
                # print(y_pred)
                loss.backward()
                optimizer.step()
                # clear gradient DO NOT forget you fool!
                optimizer.zero_grad()

            model.eval()
            with torch.no_grad():  # torch.set_grad_enabled(False):
                for i, data in enumerate(valid_loader):
                    #print("Validation")
                    val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                    val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                        val_comp_feature_vectors).to(device), Variable(
                            val_target_feature_vectors).to(device), Variable(
                                val_labels).to(device)
                    # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1)
                    total_validation_count += val_comp_feature_vectors.shape[0]

                    val_inputs = None
                    val_y_pred = None

                    if modeltype in concat_models:
                        val_inputs = torch.cat((val_comp_feature_vectors,
                                                val_target_feature_vectors), 1)
                        val_y_pred = model(val_inputs)
                    else:
                        # Forward pass: Compute predicted y by passing x to the model
                        val_y_pred = model(val_comp_feature_vectors,
                                           val_target_feature_vectors)

                    # print(val_y_pred)
                    loss_val = criterion(val_y_pred.squeeze(), val_labels)
                    total_validation_loss += float(loss_val.data[0])

                    for item in val_y_pred:
                        # regression icin
                        #validation_predictions.append(float(item.data[0]))
                        # classification icin
                        if regression_classifier == "r":
                            validation_predictions.append(float(item.data[0]))
                        else:
                            validation_predictions.append(
                                int(float(item.data[0]) >= 0.5))
                        #print(item.data[0], int(float(item.data[0])>=0.5))
                        # print("real pred", float(item.data[0]))
                        # print("loggedpred", -math.log10(10e-10*float(item.data[0])))
                        # validation_predictions.append(-math.log10(10e-10*float(item.data[0])))
                    for item in val_labels:
                        # regression icin
                        #validation_labels.append(float(item.data[0]))
                        # classification icin
                        if regression_classifier == "r":
                            validation_labels.append(float(item.data[0]))
                        else:
                            validation_labels.append(int(item.data[0]))

                        # validation_labels.append(-math.log10(10e-10*float(item.data[0])))
            # print("validation predictions", validation_predictions)
            # print("validation labels", validation_labels)
            if regression_classifier == "r":
                rmse_score = rmse(np.asarray(validation_labels),
                                  np.asarray(validation_predictions))
                pearson_score = pearson(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))
                spearman_score = spearman(np.asarray(validation_labels),
                                          np.asarray(validation_predictions))
                ci_score = ci(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                f1_score = f1(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                ave_auc_score = average_AUC(np.asarray(validation_labels),
                                            np.asarray(validation_predictions))
                print(
                    "================================================================================"
                )
                print(
                    "Fold:{}\tEpoch:{}\tTest RMSE:{}\tTraining Loss:{}\tValidation Loss:{}"
                    .format(fold + 1, epoch, rmse_score, total_training_loss,
                            total_validation_loss))
                print("RMSE:\t{}".format(rmse_score)
                      )  # rmse, pearson, spearman, ci, ci, average_AUC
                print("Pearson:\t{}".format(pearson_score))
                print("Spearman:\t{}".format(spearman_score))
                print("Ci:\t{}".format(ci_score))
                print("F1-Score:\t{}".format(f1_score))
                print("Average_AUC:\t{}".format(ave_auc_score))
                print("IDG File:\t{}".format(comp_tar_pair_dataset))
                print("Number of training samples:\t{}".format(
                    total_training_count))
                print("Number of validation samples:\t{}".format(
                    total_validation_count))

                rmse_fold_lst[fold] = rmse_score
                pearson_fold_lst[fold] = pearson_score
                spearman_fold_lst[fold] = spearman_score
                ci_fold_lst[fold] = ci_score
                f1_fold_lst[fold] = f1_score
                auc_fold_lst[fold] = ave_auc_score
            else:
                f1_score = sklearn.metrics.f1_score(validation_labels,
                                                    validation_predictions)
                accuracy_score = sklearn.metrics.accuracy_score(
                    validation_labels, validation_predictions)
                print(
                    "================================================================================"
                )
                print(
                    "Fold:{}\tEpoch:{}\tTest F1:{}\tTraining Loss:{}\tValidation Loss:{}"
                    .format(fold + 1, epoch, f1_score, total_training_loss,
                            total_validation_loss))

                print("F1 Score:\t{}".format(f1_score))
                print("Accuracy:\t{}.".format(accuracy_score))
    if regression_classifier == "r":
        average_rmse_fold = sum(rmse_fold_lst) / num_of_folds
        average_pearson_fold = sum(pearson_fold_lst) / num_of_folds
        average_spearman_fold = sum(spearman_fold_lst) / num_of_folds
        average_ci_fold = sum(ci_fold_lst) / num_of_folds
        average_f1_fold = sum(f1_fold_lst) / num_of_folds
        average_auc_fold = sum(auc_fold_lst) / num_of_folds

        print(
            "-----------------------------------------------------------------"
        )
        print(
            "ave_result\tmodeltype\tcomp_feature_list\ttar_feature_list\tcomp_hidden_lst\ttar_hidden_lst\tfc1\tfc2\tlearn_rate\tave_rmse\tave_pearson\tave_spearman\taverage_cie\taverage_f1score\tave_ave_auc"
        )
        print(
            "average_results\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}"
            .format(comp_tar_pair_dataset, modeltype, comp_feature_list,
                    tar_feature_list, comp_hidden_lst, tar_hidden_lst, fc1,
                    fc2, learn_rate, average_rmse_fold, average_pearson_fold,
                    average_spearman_fold, average_ci_fold, average_f1_fold,
                    average_auc_fold))
    """
Пример #6
0
    def test():
        """
        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                if args.cuda:
                    data, target = data.cuda(), target.cuda()
                output = model(data)
                # sum up batch loss
                test_loss += F.nll_loss(output, target, reduction='sum').item()
                # get the index of the max log-probability
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(
                    target.data.view_as(pred)).long().cpu().sum()

        test_loss = test_loss / len(test_loader.dataset)
        accuracy = float(correct.item()) / len(test_loader.dataset)
        print(test_loss, accuracy)
        reporter(mean_loss=test_loss, mean_accuracy=accuracy)
        """
        model.eval()
        regression_classifier = "r"
        total_validation_loss = 0.0
        total_validation_count = 0
        validation_predictions = []
        validation_labels = []
        criterion = torch.nn.MSELoss()
        with torch.no_grad():  # torch.set_grad_enabled(False):
            for i, data in enumerate(test_loader):
                # print("Validation")
                val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                    val_comp_feature_vectors).to(device), Variable(
                        val_target_feature_vectors).to(device), Variable(
                            val_labels).to(device)
                # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1)
                total_validation_count += val_comp_feature_vectors.shape[0]
                val_inputs = None
                val_y_pred = None
                concat_models = [""]
                # print(self.model.parameters)
                modeltype = None
                if modeltype in concat_models:
                    val_inputs = torch.cat(
                        (val_comp_feature_vectors, val_target_feature_vectors),
                        1)
                    val_y_pred = model(val_inputs)
                else:
                    val_y_pred = model(val_comp_feature_vectors,
                                       val_target_feature_vectors)

                # print(val_y_pred)

                loss_val = criterion(val_y_pred.squeeze(), val_labels)
                total_validation_loss += float(loss_val.item())

                for item in val_y_pred:
                    if regression_classifier == "r":
                        validation_predictions.append(float(item.data[0]))
                    else:
                        validation_predictions.append(
                            int(float(item.data[0]) >= 0.5))
                for item in val_labels:
                    if regression_classifier == "r":
                        validation_labels.append(float(item.item()))
                    else:
                        validation_labels.append(int(item.data[0]))

        if regression_classifier == "r":
            rmse_score = rmse(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
            pearson_score = pearson(np.asarray(validation_labels),
                                    np.asarray(validation_predictions))
            # spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions))
            # ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions))
            f1_score = f1(np.asarray(validation_labels),
                          np.asarray(validation_predictions))
            ave_auc_score = average_AUC(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))

            print(
                "Test RMSE:{}\tF1-Score:{}\tAverage_AUC:{}\tValidation Loss:{}"
                .format(rmse_score, f1_score, ave_auc_score,
                        total_validation_loss))
            # print("F1-Score:\t{}".format(f1_score))
            #print("Average_AUC:\t{}".format(ave_auc_score))

            #return {"RMSE": rmse_score, "F1-Score": f1_score}
        else:
            f1_score = sklearn.metrics.f1_score(validation_labels,
                                                validation_predictions)
            accuracy_score = sklearn.metrics.accuracy_score(
                validation_labels, validation_predictions)
            print(
                "================================================================================"
            )
            print("Fold:{}\tEpoch:{}\tTest F1:{}\tValidation Loss:{}".format(
                0 + 1, 0, f1_score, total_validation_loss))

            print("F1 Score:\t{}".format(f1_score))
            print("Accuracy:\t{}.".format(accuracy_score))
        reporter(mean_loss=total_validation_loss, mean_accuracy=f1_score)
Пример #7
0
def train_networks(comp_feature_list, tar_feature_list, comp_hidden_lst,
                   vocab_size, output_size, embedding_dim, hidden_dim,
                   n_rnn_layers, fc1, fc2, learn_rate, comp_tar_pair_dataset,
                   regression_classifier, batch_size):
    print("PARAMETERS:", comp_feature_list, tar_feature_list, comp_hidden_lst,
          vocab_size, output_size, embedding_dim, hidden_dim, n_rnn_layers,
          fc1, fc2, learn_rate, comp_tar_pair_dataset, regression_classifier,
          batch_size)
    torch.manual_seed(1)
    use_gpu = torch.cuda.is_available()

    device = "cpu"

    if use_gpu:
        print("GPU is available on this device!")
        device = "cuda"
    else:
        print("CPU is available on this device!")

    loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict(
        num_of_folds, batch_size, comp_feature_list, tar_feature_list,
        comp_tar_pair_dataset, regression_classifier)

    original_number_of_comp_features = int(number_of_comp_features)
    original_number_of_target_features = int(number_of_target_features)

    print(original_number_of_comp_features, original_number_of_target_features)

    for fold in range(num_of_folds):
        train_loader, valid_loader = loader_fold_dict[fold]
        print("FOLD : {}".format(fold + 1))
        number_of_comp_features = original_number_of_comp_features
        number_of_target_features = original_number_of_target_features
        print(int(number_of_comp_features), int(comp_hidden_lst[0]),
              int(comp_hidden_lst[1]), vocab_size, output_size, embedding_dim,
              hidden_dim, n_rnn_layers, fc1, fc2)
        model = CompFCNNTarRNN(int(number_of_comp_features),
                               int(comp_hidden_lst[0]),
                               int(comp_hidden_lst[1]), vocab_size,
                               output_size, embedding_dim, hidden_dim,
                               n_rnn_layers, fc1, fc2).to(device)
        # print(model)

        #optimizer = torch.optim.SGD(
        #    model.parameters(), lr=learn_rate, momentum=0.507344802825)
        optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
        criterion = torch.nn.MSELoss()
        optimizer.zero_grad()

        for epoch in range(n_epoch):
            total_training_loss, total_validation_loss = 0.0, 0.0
            total_training_count, total_validation_count = 0, 0
            validation_predictions, validation_labels = [], []
            batch_number = 0

            h = model.init_hidden(batch_size)

            model.train()
            for i, data in enumerate(train_loader):
                batch_number += 1
                h = tuple([each.data for each in h])
                # clear gradient DO NOT forget you fool!
                optimizer.zero_grad()

                # get the inputs
                comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data
                # wrap them in Variable
                comp_feature_vectors, target_feature_vectors, labels = Variable(
                    comp_feature_vectors).to(device), Variable(
                        target_feature_vectors).to(device), Variable(
                            labels).to(device)
                if comp_feature_vectors.shape[0] == batch_size:
                    inputs = None
                    y_pred = None

                    total_training_count += comp_feature_vectors.shape[0]

                    y_pred, h = model(comp_feature_vectors,
                                      target_feature_vectors, h)

                    loss = criterion(y_pred.squeeze(), labels)

                    total_training_loss += float(loss.item())
                    loss.backward()
                    optimizer.step()

            print("Epoch {} training loss:".format(epoch), total_training_loss)

            h = model.init_hidden(batch_size)
            model.eval()
            with torch.no_grad():  # torch.set_grad_enabled(False):
                for i, data in enumerate(valid_loader):

                    val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data
                    val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable(
                        val_comp_feature_vectors).to(device), Variable(
                            val_target_feature_vectors).to(device), Variable(
                                val_labels).to(device)
                    total_validation_count += val_comp_feature_vectors.shape[0]

                    if val_comp_feature_vectors.shape[0] == batch_size:
                        val_inputs = None
                        val_y_pred = None

                        val_y_pred, h = model(val_comp_feature_vectors,
                                              val_target_feature_vectors, h)
                        loss_val = criterion(val_y_pred.squeeze(), val_labels)
                        total_validation_loss += float(loss_val.item())
                        for item in val_labels:
                            validation_labels.append(float(item.item()))

                        for item in val_y_pred:
                            validation_predictions.append(float(item.item()))

            if regression_classifier == "r":
                rmse_score = rmse(np.asarray(validation_labels),
                                  np.asarray(validation_predictions))
                pearson_score = pearson(np.asarray(validation_labels),
                                        np.asarray(validation_predictions))
                spearman_score = spearman(np.asarray(validation_labels),
                                          np.asarray(validation_predictions))
                ci_score = ci(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                f1_score = f1(np.asarray(validation_labels),
                              np.asarray(validation_predictions))
                ave_auc_score = average_AUC(np.asarray(validation_labels),
                                            np.asarray(validation_predictions))
                print(
                    "================================================================================"
                )
                print(
                    "Fold:{}\tEpoch:{}\tTest RMSE:{}\tTraining Loss:{}\tValidation Loss:{}"
                    .format(fold + 1, epoch, rmse_score, total_training_loss,
                            total_validation_loss))
                print("RMSE:\t{}".format(rmse_score)
                      )  # rmse, pearson, spearman, ci, ci, average_AUC
                print("Pearson:\t{}".format(pearson_score))
                print("Spearman:\t{}".format(spearman_score))
                print("Ci:\t{}".format(ci_score))
                print("F1-Score:\t{}".format(f1_score))
                print("Average_AUC:\t{}".format(ave_auc_score))
                print("IDG File:\t{}".format(comp_tar_pair_dataset))
                print("Number of training samples:\t{}".format(
                    total_training_count))
                print("Number of validation samples:\t{}".format(
                    total_validation_count))