def get_scores_full(labels, predictions, validation_test, total_training_loss, total_validation_test_loss, epoch, comp_tar_pair_dataset, fold_epoch_results): deep_dta_rm2 = get_rm2(np.asarray(labels), np.asarray(predictions)) # deep_dta_aupr = get_aupr(np.asarray(labels), np.asarray( # predictions)) deep_dta_cindex = get_cindex(np.asarray(labels), np.asarray(predictions)) deep_dta_mse = mse(np.asarray(labels), np.asarray(predictions)) rmse_score = rmse(np.asarray(labels), np.asarray(predictions)) pearson_score = pearson(np.asarray(labels), np.asarray(predictions)) spearman_score = spearman(np.asarray(labels), np.asarray(predictions)) ci_score = ci(np.asarray(labels), np.asarray(predictions)) f1_score = f1(np.asarray(labels), np.asarray(predictions)) ave_auc_score = average_AUC(np.asarray(labels), np.asarray(predictions)) fold_epoch_results.append([ deep_dta_rm2, deep_dta_cindex, deep_dta_mse, pearson_score, spearman_score, ci_score, f1_score, ave_auc_score ]) print("Epoch:{}\tTraining Loss:{}\t{} Loss:{}".format( epoch, total_training_loss, validation_test, total_validation_test_loss)) print("{} DeepDTA RM2:\t{}".format(validation_test, deep_dta_rm2)) print("{} DeepDTA MSE\t{}".format(validation_test, deep_dta_mse)) print("{} RMSE\t{}".format(validation_test, rmse_score)) print("{} DeepDTA c-index\t{}".format(validation_test, deep_dta_cindex)) print("{} Pearson:\t{}".format(validation_test, pearson_score)) print("{} Spearman:\t{}".format(validation_test, spearman_score)) print("{} Ci:\t{}".format(validation_test, ci_score)) print("{} F1-Score:\t{}".format(validation_test, f1_score)) print("{} Average_AUC:\t{}".format(validation_test, ave_auc_score))
def test(): model.eval() regression_classifier = "r" total_validation_loss = 0.0 total_validation_count = 0 validation_predictions = [] validation_labels = [] # h = model.init_hidden(args.batch_size) with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(test_loader): # print("Validation") val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to(device), Variable( val_target_feature_vectors).to(device), Variable( val_labels).to(device) total_validation_count += val_comp_feature_vectors.shape[0] # print(val_labels) # if val_comp_feature_vectors.shape[0] == args.batch_size: val_inputs = None val_y_pred = None # val_y_pred, h = model(val_comp_feature_vectors, val_target_feature_vectors, h) val_y_pred, h = model(val_comp_feature_vectors, val_target_feature_vectors) loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.item()) #print(len(val_y_pred)) for item in val_labels: validation_labels.append(float(item.item())) for item in val_y_pred: validation_predictions.append(float(item.item())) # print( len(validation_predictions), len(validation_labels)) if regression_classifier == "r": rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "Test RMSE:{}\tF1-Score:{}\tAverage_AUC:{}\tValidation Loss:{}" .format(rmse_score, f1_score, ave_auc_score, total_validation_loss)) reporter(mean_loss=total_validation_loss, mean_accuracy=f1_score)
def _test(self): self.model.eval() regression_classifier = "r" total_validation_loss = 0.0 total_validation_count = 0 validation_predictions = [] validation_labels = [] with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(self.test_loader): # print("Validation") val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to( self.device), Variable(val_target_feature_vectors).to( self.device), Variable(val_labels).to(self.device) # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1) total_validation_count += val_comp_feature_vectors.shape[0] # print(val_comp_feature_vectors) # print(val_labels) val_inputs = None val_y_pred = None concat_models = [""] # print(self.model.parameters) modeltype = None if modeltype in concat_models: val_inputs = torch.cat( (val_comp_feature_vectors, val_target_feature_vectors), 1) val_y_pred = self.model(val_inputs) else: # Forward pass: Compute predicted y by passing x to the model # print("girdi") val_y_pred = self.model(val_comp_feature_vectors, val_target_feature_vectors) # print(val_y_pred) criterion = torch.nn.MSELoss() loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.item()) for item in val_y_pred: # regression icin # validation_predictions.append(float(item.data[0])) # classification icin if regression_classifier == "r": validation_predictions.append(float(item.data[0])) else: validation_predictions.append( int(float(item.data[0]) >= 0.5)) # print(item.data[0], int(float(item.data[0])>=0.5)) # print("real pred", float(item.data[0])) # print("loggedpred", -math.log10(10e-10*float(item.data[0]))) # validation_predictions.append(-math.log10(10e-10*float(item.data[0]))) for item in val_labels: # regression icin # validation_labels.append(float(item.data[0])) # classification icin if regression_classifier == "r": validation_labels.append(float(item.item())) else: validation_labels.append(int(item.data[0])) # validation_labels.append(-math.log10(10e-10*float(item.data[0]))) # print("validation predictions", validation_predictions) # print("validation labels", validation_labels) if regression_classifier == "r": rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) # spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions)) # ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "================================================================================" ) print("Fold:{}\tEpoch:{}\tTest RMSE:{}\tValidation Loss:{}".format( 0 + 1, 0, rmse_score, total_validation_loss)) print("RMSE:\t{}".format( rmse_score)) # rmse, pearson, spearman, ci, ci, average_AUC #print("Pearson:\t{}".format(pearson_score)) #print("Spearman:\t{}".format(spearman_score)) #print("Ci:\t{}".format(ci_score)) print("F1-Score:\t{}".format(f1_score)) print("Average_AUC:\t{}".format(ave_auc_score)) # print("IDG File:\t{}".format(comp_tar_pair_dataset)) #print("Number of training samples:\t{}".format(total_training_count)) # print("Number of validation samples:\t{}".format(total_validation_count)) return {"RMSE": rmse_score, "F1-Score": f1_score} else: f1_score = sklearn.metrics.f1_score(validation_labels, validation_predictions) accuracy_score = sklearn.metrics.accuracy_score( validation_labels, validation_predictions) print( "================================================================================" ) print("Fold:{}\tEpoch:{}\tTest F1:{}\tValidation Loss:{}".format( 0 + 1, 0, f1_score, total_validation_loss)) print("F1 Score:\t{}".format(f1_score)) print("Accuracy:\t{}.".format(accuracy_score)) return { "neg_mean_loss": -1 * total_validation_loss, "mean_accuracy": accuracy_score }
def train_networks(mod, comp_feat, tar_feat, comp_hidden_lst, tar_hidden_lst, fc1, fc2, lr, comp_tar_pair_dataset, regression_classifier): torch.manual_seed(1) modeltype = mod comp_feature_list = comp_feat.split("_") tar_feature_list = tar_feat.split("_") comp_hidden_lst = [int(neuron) for neuron in comp_hidden_lst.split("_")] tar_hidden_lst = [int(neuron) for neuron in tar_hidden_lst.split("_")] fc1 = int(fc1) fc2 = int(fc2) learn_rate = float(lr) print(modeltype, comp_feature_list, tar_feature_list, fc1, fc2, learn_rate) #learn_rate = sys.argv[2] n_epoch = 10 num_of_folds = 1 batch_size = 64 # comp_tar_pair_dataset = "idg_comp_targ_uniq_inter_filtered.csv" comp_tar_pair_test_dataset = "comp_targ_affinity.csv" use_gpu = torch.cuda.is_available() device = "cpu" if use_gpu: print("GPU is available on this device!") device = "cuda" else: print("CPU is available on this device!") device = "cpu" # comp_tar_pair_dataset = "dummy_Dtc_comp_targ_uniq_inter_filtered_onlykinase.txt" # comp_feature_list = ["comp_dummy_feat_1", "comp_dummy_feat_2"] # tar_feature_list = ["prot_dummy_feat_1", "prot_dummy_feat_2"] # comp_feature_list = ["comp_dummy_feat_1"] # tar_feature_list = ["prot_dummy_feat_1"] # comp_feature_list = ["ecfp4", "fcfp4", "rdk5"] # tar_feature_list = ["k-sep-bigrams", "APAAC", "DDE", "pfam", "spmap_final"] # comp_feature_list = ["ecfp4"] # tar_feature_list = ["k-sep-bigrams"] if final_training: train_loader, number_of_comp_features, number_of_target_features = get_full_training_data_loader( batch_size, comp_feature_list, tar_feature_list, comp_tar_pair_dataset, regression_classifier) test_loader = get_test_loader_challenge(comp_feature_list, tar_feature_list) test_predictions = [] original_number_of_comp_features = int(number_of_comp_features) original_number_of_target_features = int(number_of_target_features) print(original_number_of_comp_features, original_number_of_target_features) total_number_of_features = number_of_comp_features + number_of_target_features concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"] number_of_comp_features = original_number_of_comp_features number_of_target_features = original_number_of_target_features model = None if modeltype == "FC1": model = FCModel1(total_number_of_features).to(device) elif modeltype == "FC1M": model = FCModel1_M(total_number_of_features).to(device) elif modeltype == "FC2": model = FCModel2(total_number_of_features).to(device) elif modeltype == "FC3": model = FCModel_3_Hidden(total_number_of_features, 1024, 400, 200, 0.5).to(device) elif modeltype == "FC3M": model = FCModel_3_Hidden_with_Modules(total_number_of_features, 1024, 400, 200, 0.5).to(device) else: model = FC_PINNModel_2_2_2_Modules( number_of_comp_features, comp_hidden_lst[0], comp_hidden_lst[1], number_of_target_features, tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2, regression_classifier).to(device) # print(model.parameters) #optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate, momentum=0.507344802825) criterion = torch.nn.MSELoss() optimizer.zero_grad() for epoch in range(n_epoch): total_training_loss = 0.0 total_training_count = 0 batch_number = 0 model.train() for i, data in enumerate(train_loader): batch_number += 1 # get the inputs comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data # wrap them in Variable comp_feature_vectors, target_feature_vectors, labels = Variable( comp_feature_vectors).to(device), Variable( target_feature_vectors).to(device), Variable( labels).to(device) inputs = None y_pred = None total_training_count += comp_feature_vectors.shape[0] if modeltype in concat_models: inputs = torch.cat( (comp_feature_vectors, target_feature_vectors), 1) y_pred = model(inputs) else: # Forward pass: Compute predicted y by passing x to the model y_pred = model(comp_feature_vectors, target_feature_vectors) # Compute and print loss loss = criterion(y_pred.squeeze(), labels) total_training_loss += float(loss.data[0]) loss.backward() optimizer.step() # clear gradient DO NOT forget you fool! optimizer.zero_grad() print("Epoch: {}, Loss: {}".format(epoch, total_training_loss)) model.eval() with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(test_loader): # print("Validation") test_comp_feature_vectors, test_target_feature_vectors, test_compound_ids, test_target_ids, test_number_of_comp_features, test_number_of_target_features = data test_comp_feature_vectors, test_target_feature_vectors = Variable( test_comp_feature_vectors).to(device), Variable( test_target_feature_vectors).to(device) # print(test_compound_ids) test_inputs = None test_y_pred = None if modeltype in concat_models: test_inputs = torch.cat((test_comp_feature_vectors, test_target_feature_vectors), 1) test_y_pred = model(test_inputs) else: # Forward pass: Compute predicted y by passing x to the model test_y_pred = model(test_comp_feature_vectors, test_target_feature_vectors) for item in test_y_pred: test_predictions.append([ test_compound_ids[0], test_target_ids[0], float(item.data[0]) ]) for pred in test_predictions: print("{}\t{}\t{}".format(pred[0], pred[1], pred[2])) # print(test_predictions) else: loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict( num_of_folds, batch_size, comp_feature_list, tar_feature_list, comp_tar_pair_dataset, regression_classifier) test_loader = get_test_loader(comp_feature_list, tar_feature_list, comp_tar_pair_test_dataset) original_number_of_comp_features = int(number_of_comp_features) original_number_of_target_features = int(number_of_target_features) print(original_number_of_comp_features, original_number_of_target_features) total_number_of_features = number_of_comp_features + number_of_target_features # feature_lst = ["tri_gram", "spmap", "pfam", "k_sep_bigrams", "DDE", "APAAC"] # feature_lst = ["k_sep_bigrams", "APAAC"] concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"] rmse_fold_lst = [-100000.0 for i in range(num_of_folds)] pearson_fold_lst = [-100000.0 for i in range(num_of_folds)] spearman_fold_lst = [-100000.0 for i in range(num_of_folds)] ci_fold_lst = [-100000.0 for i in range(num_of_folds)] f1_fold_lst = [-100000.0 for i in range(num_of_folds)] auc_fold_lst = [-100000.0 for i in range(num_of_folds)] for fold in range(num_of_folds): train_loader, valid_loader = loader_fold_dict[fold] # Just to check if everything is OK. # Remove this when you finish testing. print("FOLD : {}".format(fold + 1)) #print(len(train_loader), len(valid_loader)) number_of_comp_features = original_number_of_comp_features number_of_target_features = original_number_of_target_features model = None if modeltype == "FC1": model = FCModel1(total_number_of_features).to(device) elif modeltype == "FC1M": model = FCModel1_M(total_number_of_features).to(device) elif modeltype == "FC2": model = FCModel2(total_number_of_features).to(device) elif modeltype == "FC3": model = FCModel_3_Hidden(total_number_of_features, 1024, 400, 200, 0.5).to(device) elif modeltype == "FC3M": model = FCModel_3_Hidden_with_Modules(total_number_of_features, 1024, 400, 200, 0.5).to(device) else: model = FC_PINNModel_2_2_2( number_of_comp_features, comp_hidden_lst[0], comp_hidden_lst[1], number_of_target_features, tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2, regression_classifier).to(device) # print(model.parameters) #optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate, momentum=0.507344802825) criterion = torch.nn.MSELoss() optimizer.zero_grad() for epoch in range(n_epoch): total_training_loss = 0.0 total_validation_loss = 0.0 total_training_count = 0 total_validation_count = 0 validation_predictions = [] test_predictions = [] validation_labels = [] batch_number = 0 model.train() for i, data in enumerate(train_loader): batch_number += 1 # get the inputs comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data # wrap them in Variable comp_feature_vectors, target_feature_vectors, labels = Variable( comp_feature_vectors).to(device), Variable( target_feature_vectors).to(device), Variable( labels).to(device) inputs = None y_pred = None total_training_count += comp_feature_vectors.shape[0] if modeltype in concat_models: inputs = torch.cat( (comp_feature_vectors, target_feature_vectors), 1) y_pred = model(inputs) else: # Forward pass: Compute predicted y by passing x to the model y_pred = model(comp_feature_vectors, target_feature_vectors) # Compute and print loss loss = criterion(y_pred.squeeze(), labels) total_training_loss += float(loss.data[0]) loss.backward() optimizer.step() # clear gradient DO NOT forget you fool! optimizer.zero_grad() model.eval() with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(valid_loader): #print("Validation") val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to(device), Variable( val_target_feature_vectors).to( device), Variable(val_labels).to(device) # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1) total_validation_count += val_comp_feature_vectors.shape[ 0] val_inputs = None val_y_pred = None if modeltype in concat_models: val_inputs = torch.cat( (val_comp_feature_vectors, val_target_feature_vectors), 1) val_y_pred = model(val_inputs) else: # Forward pass: Compute predicted y by passing x to the model val_y_pred = model(val_comp_feature_vectors, val_target_feature_vectors) # print(val_y_pred) loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.data[0]) for item in val_y_pred: validation_predictions.append(float(item.data[0])) for item in val_labels: validation_labels.append(float(item.data[0])) for i, data in enumerate(test_loader): #print("Validation") test_comp_feature_vectors, test_target_feature_vectors, test_compound_ids, test_target_ids, test_number_of_comp_features, test_number_of_target_features = data test_comp_feature_vectors, test_target_feature_vectors = Variable( test_comp_feature_vectors).to(device), Variable( test_target_feature_vectors).to(device) #print(test_compound_ids) test_inputs = None test_y_pred = None if modeltype in concat_models: test_inputs = torch.cat( (test_comp_feature_vectors, test_target_feature_vectors), 1) test_y_pred = model(test_inputs) else: # Forward pass: Compute predicted y by passing x to the model test_y_pred = model(test_comp_feature_vectors, test_target_feature_vectors) for item in test_y_pred: test_predictions.append([ test_compound_ids[0], test_target_ids[0], float(item.data[0]) ]) print(test_predictions) rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions)) ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "================================================================================" ) print( "Fold:{}, Epoch:{}, Training Loss:{}, Validation Loss:{}". format(fold + 1, epoch, total_training_loss, total_validation_loss)) print("RMSE:\t{}".format(rmse_score) ) # rmse, pearson, spearman, ci, ci, average_AUC print("Pearson:\t{}".format(pearson_score)) print("Spearman:\t{}".format(spearman_score)) print("Ci:\t{}".format(ci_score)) print("F1-Score:\t{}".format(f1_score)) print("Average_AUC:\t{}".format(ave_auc_score)) print("Number of training samples:\t{}".format( total_training_count)) print("Number of validation samples:\t{}".format( total_validation_count)) rmse_fold_lst[fold] = rmse_score pearson_fold_lst[fold] = pearson_score spearman_fold_lst[fold] = spearman_score ci_fold_lst[fold] = ci_score f1_fold_lst[fold] = f1_score auc_fold_lst[fold] = ave_auc_score average_rmse_fold = sum(rmse_fold_lst) / num_of_folds average_pearson_fold = sum(pearson_fold_lst) / num_of_folds average_spearman_fold = sum(spearman_fold_lst) / num_of_folds average_ci_fold = sum(ci_fold_lst) / num_of_folds average_f1_fold = sum(f1_fold_lst) / num_of_folds average_auc_fold = sum(auc_fold_lst) / num_of_folds print( "-----------------------------------------------------------------" ) print( "ave_result\tmodeltype\tcomp_feature_list\ttar_feature_list\tcomp_hidden_lst\ttar_hidden_lst\tfc1\tfc2\tlearn_rate\tave_rmse\tave_pearson\tave_spearman\taverage_cie\taverage_f1score\tave_ave_auc" ) print( "average_results\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}" .format(modeltype, comp_feature_list, tar_feature_list, comp_hidden_lst, tar_hidden_lst, fc1, fc2, learn_rate, average_rmse_fold, average_pearson_fold, average_spearman_fold, average_ci_fold, average_f1_fold, average_auc_fold)) """
def train_networks(mod, comp_feat, tar_feat, comp_hidden_lst, tar_hidden_lst, fc1, fc2, lr, comp_tar_pair_dataset, regression_classifier): modeltype = mod torch.manual_seed(1) comp_feature_list = comp_feat.split("_") tar_feature_list = tar_feat.split("_") comp_hidden_lst = [int(neuron) for neuron in comp_hidden_lst.split("_")] tar_hidden_lst = [int(neuron) for neuron in tar_hidden_lst.split("_")] fc1 = int(fc1) fc2 = int(fc2) learn_rate = float(lr) print(modeltype, comp_feature_list, tar_feature_list, fc1, fc2, learn_rate) #learn_rate = sys.argv[2] n_epoch = 100 num_of_folds = 5 batch_size = 64 #comp_tar_pair_dataset = "idg_comp_targ_uniq_inter_filtered.csv" use_gpu = torch.cuda.is_available() device = "cpu" if use_gpu: print("GPU is available on this device!") device = "cuda" else: print("CPU is available on this device!") # comp_tar_pair_dataset = "dummy_Dtc_comp_targ_uniq_inter_filtered_onlykinase.txt" # comp_feature_list = ["comp_dummy_feat_1", "comp_dummy_feat_2"] # tar_feature_list = ["prot_dummy_feat_1", "prot_dummy_feat_2"] # comp_feature_list = ["comp_dummy_feat_1"] # tar_feature_list = ["prot_dummy_feat_1"] # comp_feature_list = ["ecfp4", "fcfp4", "rdk5"] # tar_feature_list = ["k-sep-bigrams", "APAAC", "DDE", "pfam", "spmap_final"] # comp_feature_list = ["ecfp4"] # tar_feature_list = ["k-sep-bigrams"] loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict( num_of_folds, batch_size, comp_feature_list, tar_feature_list, comp_tar_pair_dataset, regression_classifier) original_number_of_comp_features = int(number_of_comp_features) original_number_of_target_features = int(number_of_target_features) print(original_number_of_comp_features, original_number_of_target_features) total_number_of_features = number_of_comp_features + number_of_target_features # feature_lst = ["tri_gram", "spmap", "pfam", "k_sep_bigrams", "DDE", "APAAC"] # feature_lst = ["k_sep_bigrams", "APAAC"] concat_models = ["FC1", "FC1M", "FC2", "FC3", "FC3M"] rmse_fold_lst = [-100000.0 for i in range(num_of_folds)] pearson_fold_lst = [-100000.0 for i in range(num_of_folds)] spearman_fold_lst = [-100000.0 for i in range(num_of_folds)] ci_fold_lst = [-100000.0 for i in range(num_of_folds)] f1_fold_lst = [-100000.0 for i in range(num_of_folds)] auc_fold_lst = [-100000.0 for i in range(num_of_folds)] for fold in range(num_of_folds): train_loader, valid_loader = loader_fold_dict[fold] # Just to check if everything is OK. # Remove this when you finish testing. print("FOLD : {}".format(fold + 1)) #print(len(train_loader), len(valid_loader)) number_of_comp_features = original_number_of_comp_features number_of_target_features = original_number_of_target_features model = None if modeltype == "FC1": model = FCModel1(total_number_of_features).to(device) elif modeltype == "FC1M": model = FCModel1_M(total_number_of_features).to(device) elif modeltype == "FC2": model = FCModel2(total_number_of_features).to(device) elif modeltype == "FC3": model = FCModel_3_Hidden(total_number_of_features, 1024, 400, 200, 0.5).to(device) elif modeltype == "FC3M": model = FCModel_3_Hidden_with_Modules(total_number_of_features, 1024, 400, 200, 0.5).to(device) elif modeltype == "PINN_2_2": model = FC_PINNModel_2_2_2_Modules( number_of_comp_features, comp_hidden_lst[0], comp_hidden_lst[1], number_of_target_features, tar_hidden_lst[0], tar_hidden_lst[1], fc1, fc2, regression_classifier).to(device) elif modeltype == "PINN_2_3": model = FC_PINNModel_2_3_2_Modules( number_of_comp_features, comp_hidden_lst[0], comp_hidden_lst[1], number_of_target_features, tar_hidden_lst[0], tar_hidden_lst[1], tar_hidden_lst[2], fc1, fc2).to(device) # print(model.parameters) # optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate, momentum=0.507344802825) criterion = None if regression_classifier == "r": criterion = torch.nn.MSELoss() else: criterion = torch.nn.BCELoss() optimizer.zero_grad() for epoch in range(n_epoch): total_training_loss = 0.0 total_validation_loss = 0.0 total_training_count = 0 total_validation_count = 0 validation_predictions = [] validation_labels = [] batch_number = 0 model.train() for i, data in enumerate(train_loader): batch_number += 1 # get the inputs comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data # wrap them in Variable comp_feature_vectors, target_feature_vectors, labels = Variable( comp_feature_vectors).to(device), Variable( target_feature_vectors).to(device), Variable( labels).to(device) # print(labels) inputs = None y_pred = None total_training_count += comp_feature_vectors.shape[0] if modeltype in concat_models: inputs = torch.cat( (comp_feature_vectors, target_feature_vectors), 1) y_pred = model(inputs) else: # Forward pass: Compute predicted y by passing x to the model y_pred = model(comp_feature_vectors, target_feature_vectors) # Compute and print loss # loss = criterion(y_pred.squeeze(), labels) # print(y_pred, labels) # print(len(weights), len(labels)) weights = [] binary_labels = preprocessing.binarize(labels.reshape(1, -1), threshold=7.0, copy=False)[0] if regression_classifier == "c": for lbl in labels: weights.append([3, 1]) if regression_classifier == "r": for lbl in labels: if int(lbl) == 1: weights.append(1000) else: weights.append(1) weights = torch.FloatTensor(weights).to(device) # print(labels) # print(weights) # print(binary_labels) loss = None if regression_classifier == "r": if len(weights) == 64: criterion.weight = weights else: criterion.weight = None loss = criterion(y_pred.squeeze(), labels) else: """ if len(weights)==64: criterion.weight = weights else: criterion.weight=None """ loss = criterion(y_pred.squeeze(), labels) # print(loss) total_training_loss += float(loss.data[0]) # print(y_pred) loss.backward() optimizer.step() # clear gradient DO NOT forget you fool! optimizer.zero_grad() model.eval() with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(valid_loader): #print("Validation") val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to(device), Variable( val_target_feature_vectors).to(device), Variable( val_labels).to(device) # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1) total_validation_count += val_comp_feature_vectors.shape[0] val_inputs = None val_y_pred = None if modeltype in concat_models: val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1) val_y_pred = model(val_inputs) else: # Forward pass: Compute predicted y by passing x to the model val_y_pred = model(val_comp_feature_vectors, val_target_feature_vectors) # print(val_y_pred) loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.data[0]) for item in val_y_pred: # regression icin #validation_predictions.append(float(item.data[0])) # classification icin if regression_classifier == "r": validation_predictions.append(float(item.data[0])) else: validation_predictions.append( int(float(item.data[0]) >= 0.5)) #print(item.data[0], int(float(item.data[0])>=0.5)) # print("real pred", float(item.data[0])) # print("loggedpred", -math.log10(10e-10*float(item.data[0]))) # validation_predictions.append(-math.log10(10e-10*float(item.data[0]))) for item in val_labels: # regression icin #validation_labels.append(float(item.data[0])) # classification icin if regression_classifier == "r": validation_labels.append(float(item.data[0])) else: validation_labels.append(int(item.data[0])) # validation_labels.append(-math.log10(10e-10*float(item.data[0]))) # print("validation predictions", validation_predictions) # print("validation labels", validation_labels) if regression_classifier == "r": rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions)) ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "================================================================================" ) print( "Fold:{}\tEpoch:{}\tTest RMSE:{}\tTraining Loss:{}\tValidation Loss:{}" .format(fold + 1, epoch, rmse_score, total_training_loss, total_validation_loss)) print("RMSE:\t{}".format(rmse_score) ) # rmse, pearson, spearman, ci, ci, average_AUC print("Pearson:\t{}".format(pearson_score)) print("Spearman:\t{}".format(spearman_score)) print("Ci:\t{}".format(ci_score)) print("F1-Score:\t{}".format(f1_score)) print("Average_AUC:\t{}".format(ave_auc_score)) print("IDG File:\t{}".format(comp_tar_pair_dataset)) print("Number of training samples:\t{}".format( total_training_count)) print("Number of validation samples:\t{}".format( total_validation_count)) rmse_fold_lst[fold] = rmse_score pearson_fold_lst[fold] = pearson_score spearman_fold_lst[fold] = spearman_score ci_fold_lst[fold] = ci_score f1_fold_lst[fold] = f1_score auc_fold_lst[fold] = ave_auc_score else: f1_score = sklearn.metrics.f1_score(validation_labels, validation_predictions) accuracy_score = sklearn.metrics.accuracy_score( validation_labels, validation_predictions) print( "================================================================================" ) print( "Fold:{}\tEpoch:{}\tTest F1:{}\tTraining Loss:{}\tValidation Loss:{}" .format(fold + 1, epoch, f1_score, total_training_loss, total_validation_loss)) print("F1 Score:\t{}".format(f1_score)) print("Accuracy:\t{}.".format(accuracy_score)) if regression_classifier == "r": average_rmse_fold = sum(rmse_fold_lst) / num_of_folds average_pearson_fold = sum(pearson_fold_lst) / num_of_folds average_spearman_fold = sum(spearman_fold_lst) / num_of_folds average_ci_fold = sum(ci_fold_lst) / num_of_folds average_f1_fold = sum(f1_fold_lst) / num_of_folds average_auc_fold = sum(auc_fold_lst) / num_of_folds print( "-----------------------------------------------------------------" ) print( "ave_result\tmodeltype\tcomp_feature_list\ttar_feature_list\tcomp_hidden_lst\ttar_hidden_lst\tfc1\tfc2\tlearn_rate\tave_rmse\tave_pearson\tave_spearman\taverage_cie\taverage_f1score\tave_ave_auc" ) print( "average_results\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}" .format(comp_tar_pair_dataset, modeltype, comp_feature_list, tar_feature_list, comp_hidden_lst, tar_hidden_lst, fc1, fc2, learn_rate, average_rmse_fold, average_pearson_fold, average_spearman_fold, average_ci_fold, average_f1_fold, average_auc_fold)) """
def test(): """ model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() output = model(data) # sum up batch loss test_loss += F.nll_loss(output, target, reduction='sum').item() # get the index of the max log-probability pred = output.argmax(dim=1, keepdim=True) correct += pred.eq( target.data.view_as(pred)).long().cpu().sum() test_loss = test_loss / len(test_loader.dataset) accuracy = float(correct.item()) / len(test_loader.dataset) print(test_loss, accuracy) reporter(mean_loss=test_loss, mean_accuracy=accuracy) """ model.eval() regression_classifier = "r" total_validation_loss = 0.0 total_validation_count = 0 validation_predictions = [] validation_labels = [] criterion = torch.nn.MSELoss() with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(test_loader): # print("Validation") val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to(device), Variable( val_target_feature_vectors).to(device), Variable( val_labels).to(device) # val_inputs = torch.cat((val_comp_feature_vectors, val_target_feature_vectors), 1) total_validation_count += val_comp_feature_vectors.shape[0] val_inputs = None val_y_pred = None concat_models = [""] # print(self.model.parameters) modeltype = None if modeltype in concat_models: val_inputs = torch.cat( (val_comp_feature_vectors, val_target_feature_vectors), 1) val_y_pred = model(val_inputs) else: val_y_pred = model(val_comp_feature_vectors, val_target_feature_vectors) # print(val_y_pred) loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.item()) for item in val_y_pred: if regression_classifier == "r": validation_predictions.append(float(item.data[0])) else: validation_predictions.append( int(float(item.data[0]) >= 0.5)) for item in val_labels: if regression_classifier == "r": validation_labels.append(float(item.item())) else: validation_labels.append(int(item.data[0])) if regression_classifier == "r": rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) # spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions)) # ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "Test RMSE:{}\tF1-Score:{}\tAverage_AUC:{}\tValidation Loss:{}" .format(rmse_score, f1_score, ave_auc_score, total_validation_loss)) # print("F1-Score:\t{}".format(f1_score)) #print("Average_AUC:\t{}".format(ave_auc_score)) #return {"RMSE": rmse_score, "F1-Score": f1_score} else: f1_score = sklearn.metrics.f1_score(validation_labels, validation_predictions) accuracy_score = sklearn.metrics.accuracy_score( validation_labels, validation_predictions) print( "================================================================================" ) print("Fold:{}\tEpoch:{}\tTest F1:{}\tValidation Loss:{}".format( 0 + 1, 0, f1_score, total_validation_loss)) print("F1 Score:\t{}".format(f1_score)) print("Accuracy:\t{}.".format(accuracy_score)) reporter(mean_loss=total_validation_loss, mean_accuracy=f1_score)
def train_networks(comp_feature_list, tar_feature_list, comp_hidden_lst, vocab_size, output_size, embedding_dim, hidden_dim, n_rnn_layers, fc1, fc2, learn_rate, comp_tar_pair_dataset, regression_classifier, batch_size): print("PARAMETERS:", comp_feature_list, tar_feature_list, comp_hidden_lst, vocab_size, output_size, embedding_dim, hidden_dim, n_rnn_layers, fc1, fc2, learn_rate, comp_tar_pair_dataset, regression_classifier, batch_size) torch.manual_seed(1) use_gpu = torch.cuda.is_available() device = "cpu" if use_gpu: print("GPU is available on this device!") device = "cuda" else: print("CPU is available on this device!") loader_fold_dict, number_of_comp_features, number_of_target_features = get_nfold_data_loader_dict( num_of_folds, batch_size, comp_feature_list, tar_feature_list, comp_tar_pair_dataset, regression_classifier) original_number_of_comp_features = int(number_of_comp_features) original_number_of_target_features = int(number_of_target_features) print(original_number_of_comp_features, original_number_of_target_features) for fold in range(num_of_folds): train_loader, valid_loader = loader_fold_dict[fold] print("FOLD : {}".format(fold + 1)) number_of_comp_features = original_number_of_comp_features number_of_target_features = original_number_of_target_features print(int(number_of_comp_features), int(comp_hidden_lst[0]), int(comp_hidden_lst[1]), vocab_size, output_size, embedding_dim, hidden_dim, n_rnn_layers, fc1, fc2) model = CompFCNNTarRNN(int(number_of_comp_features), int(comp_hidden_lst[0]), int(comp_hidden_lst[1]), vocab_size, output_size, embedding_dim, hidden_dim, n_rnn_layers, fc1, fc2).to(device) # print(model) #optimizer = torch.optim.SGD( # model.parameters(), lr=learn_rate, momentum=0.507344802825) optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) criterion = torch.nn.MSELoss() optimizer.zero_grad() for epoch in range(n_epoch): total_training_loss, total_validation_loss = 0.0, 0.0 total_training_count, total_validation_count = 0, 0 validation_predictions, validation_labels = [], [] batch_number = 0 h = model.init_hidden(batch_size) model.train() for i, data in enumerate(train_loader): batch_number += 1 h = tuple([each.data for each in h]) # clear gradient DO NOT forget you fool! optimizer.zero_grad() # get the inputs comp_feature_vectors, target_feature_vectors, labels, compound_ids, target_ids, number_of_comp_features, number_of_target_features = data # wrap them in Variable comp_feature_vectors, target_feature_vectors, labels = Variable( comp_feature_vectors).to(device), Variable( target_feature_vectors).to(device), Variable( labels).to(device) if comp_feature_vectors.shape[0] == batch_size: inputs = None y_pred = None total_training_count += comp_feature_vectors.shape[0] y_pred, h = model(comp_feature_vectors, target_feature_vectors, h) loss = criterion(y_pred.squeeze(), labels) total_training_loss += float(loss.item()) loss.backward() optimizer.step() print("Epoch {} training loss:".format(epoch), total_training_loss) h = model.init_hidden(batch_size) model.eval() with torch.no_grad(): # torch.set_grad_enabled(False): for i, data in enumerate(valid_loader): val_comp_feature_vectors, val_target_feature_vectors, val_labels, val_compound_ids, val_target_ids, val_number_of_comp_features, val_number_of_target_features = data val_comp_feature_vectors, val_target_feature_vectors, val_labels = Variable( val_comp_feature_vectors).to(device), Variable( val_target_feature_vectors).to(device), Variable( val_labels).to(device) total_validation_count += val_comp_feature_vectors.shape[0] if val_comp_feature_vectors.shape[0] == batch_size: val_inputs = None val_y_pred = None val_y_pred, h = model(val_comp_feature_vectors, val_target_feature_vectors, h) loss_val = criterion(val_y_pred.squeeze(), val_labels) total_validation_loss += float(loss_val.item()) for item in val_labels: validation_labels.append(float(item.item())) for item in val_y_pred: validation_predictions.append(float(item.item())) if regression_classifier == "r": rmse_score = rmse(np.asarray(validation_labels), np.asarray(validation_predictions)) pearson_score = pearson(np.asarray(validation_labels), np.asarray(validation_predictions)) spearman_score = spearman(np.asarray(validation_labels), np.asarray(validation_predictions)) ci_score = ci(np.asarray(validation_labels), np.asarray(validation_predictions)) f1_score = f1(np.asarray(validation_labels), np.asarray(validation_predictions)) ave_auc_score = average_AUC(np.asarray(validation_labels), np.asarray(validation_predictions)) print( "================================================================================" ) print( "Fold:{}\tEpoch:{}\tTest RMSE:{}\tTraining Loss:{}\tValidation Loss:{}" .format(fold + 1, epoch, rmse_score, total_training_loss, total_validation_loss)) print("RMSE:\t{}".format(rmse_score) ) # rmse, pearson, spearman, ci, ci, average_AUC print("Pearson:\t{}".format(pearson_score)) print("Spearman:\t{}".format(spearman_score)) print("Ci:\t{}".format(ci_score)) print("F1-Score:\t{}".format(f1_score)) print("Average_AUC:\t{}".format(ave_auc_score)) print("IDG File:\t{}".format(comp_tar_pair_dataset)) print("Number of training samples:\t{}".format( total_training_count)) print("Number of validation samples:\t{}".format( total_validation_count))