def start_fit(dataSet): index = [i for i in range(len(dataSet))] random.shuffle(index) data = dataSet[index] X = dataSet[:, 0:148] Y = dataSet[:, 148] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # normalization scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dbn_model = KerasClassifier(model_init, epochs=500, batch_size=64, verbose=0) dbn_model.fit(X_train, y_train) y_ped = dbn_model.predict(X_test) acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace( len(y_ped), y_ped, y_test) print( 'DBN:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f' % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))
def pred_sep_model(model, X_test, y_test): y_score = model.predict([X_test, X_test]) y_test_tmp = utils.to_categorical(y_test) fpr, tpr, _ = roc_curve(y_test_tmp[:, 0], y_score[:, 0]) roc_auc = auc(fpr, tpr) y_class = utils.categorical_probas_to_classes(y_score) y_test_tmp = y_test acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace( len(y_class), y_class, y_test_tmp) print(( 'DeepPPI-sep:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f' % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc)))
def start_fit(dataset, label, title): #pre-processing dataset = np.array(dataset) label = np.array(label) dataset, label = get_shuffle(dataset, label, random_state=1) #split dataset to train set and test set X_train, X_test, y_train, y_test = train_test_split(dataset, label, random_state=0) #normalization scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) X1_train = X_train[:, 0:1164] X2_train = X_train[:, 1164:2328] X1_test = X_test[:, 0:1164] X2_test = X_test[:, 1164:2328] model = get_model(dropout_value=0.2) y_train = np_utils.to_categorical(y_train) #fit model.fit( [X1_train, X2_train], y_train, nb_epoch=30, batch_size=64, verbose=0, ) #prediction probability y_probas = model.predict([X1_test, X2_test]) y_test = np_utils.to_categorical(y_test) fpr, tpr, _ = roc_curve(y_test[:, 0], y_probas[:, 0]) roc_auc = auc(fpr, tpr) draw_roc(y_test, y_probas) draw_pr(y_test, y_probas) y_class = categorical_probas_to_classes(y_probas) y_test = categorical_probas_to_classes(y_test) acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace( len(y_class), y_class, y_test) print(title) print(( 'DeepPPI:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f' % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc)))
def calu_Accuracy(self): model = self.res_Net50(self.x) if os.path.exists(self.log_dir+'/'+'train_weights.h5'): model.load_weights(self.log_dir+'/'+'train_weights.h5') else: raise RuntimeError('load weights Error!') test_images=self.mnist.test.images test_labels=self.mnist.test.labels print("正在预测测试集样本") result=model.predict(test_images) with open('temp','w+')as file: for i in range(len(result)): file.write(str(result[i])+'\n') equal=np.equal(np.argmax(test_labels,axis=1),np.argmax(result,axis=1)) equal=np.where(equal==True,1,0) #print(np.cast(equal,np.float32)) correct_rate=np.mean(equal) print("测试集合准确率为:{}".format(correct_rate)) fw_perf = open(self.save_path + '/index2.txt', 'w') fw_perf.write('acc' + ',' + 'precision' + ',' + 'npv' + ',' + 'sensitivity' + ',' + 'specificity' + ',' + 'mcc' + ',' + 'ppv' + ',' + 'auc' + ',' + 'pr' + '\n') auc_ = roc_auc_score(test_labels, result) pr = average_precision_score(test_labels, result) y_class = utils.categorical_probas_to_classes(result) true_y = utils.categorical_probas_to_classes(test_labels) acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace( len(y_class), y_class, true_y) print("======================") print("======================") print( '\tacc=%0.4f,pre=%0.4f,npv=%0.4f,sn=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' % (acc, precision, npv, sensitivity, specificity, mcc, f1)) print('\tauc=%0.4f,pr=%0.4f' % (auc_, pr)) fw_perf.write( str(acc) + ',' + str(precision) + ',' + str(npv) + ',' + str(sensitivity) + ',' + str(specificity) + ',' + str(mcc) + ',' + str(f1) + ',' + str(auc_) + ',' + str(pr) + '\n') end = datetime.now().strftime('%Y-%m-%d-%H:%M:%S') print('start: %s' % self.start) print('end: %s' % end) fw_perf.write('start: %s' % self.start+'\n') fw_perf.write('end: %s' % end+'\n')
skf = StratifiedKFold(n_splits=10) for train, test in skf.split(X, y): y_train = utils.to_categorical(y[train]) #generate the resonable results cv_clf = model hist = cv_clf.fit(X[train], y_train, epochs=19) y_score = cv_clf.predict(X[test]) #the output of probability y_class = utils.categorical_probas_to_classes(y_score) y_test = utils.to_categorical(y[test]) #generate the test ytest = np.vstack((ytest, y_test)) y_test_tmp = y[test] yscore = np.vstack((yscore, y_score)) acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace( len(y_class), y_class, y_test_tmp) # fpr, tpr, _ = roc_curve(y_test[:,0], y_score[:,0]) fpr, tpr, _ = roc_curve(y_test[:, 1], y_score[:, 1]) roc_auc = auc(fpr, tpr) sepscores.append( [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc]) scores = np.array(sepscores) result1 = np.mean(scores, axis=0) H1 = result1.tolist() sepscores.append(H1) result = sepscores row = yscore.shape[0] yscore = yscore[np.array(range(1, row)), :] yscore_sum = pd.DataFrame(data=yscore)
[X1_train, X2_train], y_train, nb_epoch=30, #validation_split=0.1, batch_size=64, verbose=2) #plothistory(hist) #prediction probability y_score = model.predict([X1_test, X2_test]) y_test = np_utils.to_categorical(y_test) fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0]) roc_auc = auc(fpr, tpr) y_score = categorical_probas_to_classes(y_score) y_test = categorical_probas_to_classes(y_test) acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace( len(y_score), y_score, y_test) print(( 'DeepPPI-sep:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f' % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))) #%% model = get_con_model() hist = model.fit( X_train, y_train, nb_epoch=30, batch_size=64, #validation_split=0.1, verbose=0) plothistory(hist) #prediction probability
predictions_prob = model.predict(dataset[test_index])[:, 1] auc_ = roc_auc_score(label[test_index], predictions_prob) pr = average_precision_score(label[test_index], predictions_prob) y_class = utils.categorical_probas_to_classes(predictions) # true_y_C_C=utils.categorical_probas_to_classes(true_y_C) true_y = utils.categorical_probas_to_classes(y_test) ( acc, precision, npv, sensitivity, specificity, mcc, f1, ) = utils.calculate_performace(len(y_class), y_class, true_y) print("======================") print("======================") print( "\tacc=%0.4f,pre=%0.4f,npv=%0.4f,sn=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f" % (acc, precision, npv, sensitivity, specificity, mcc, f1)) print("\tauc=%0.4f,pr=%0.4f" % (auc_, pr)) fw_perf.write( str(acc) + "," + str(precision) + "," + str(npv) + "," + str(sensitivity) + "," + str(specificity) + "," + str(mcc) + "," + str(f1) + "," + str(auc_) + "," + str(pr) + "\n") scores.append([ acc, precision,
def print_result(predictions_test,y_test): auc_test = roc_auc_score(y_test[:,1], predictions_test[:,1]) pr_test = average_precision_score(y_test[:,1], predictions_test[:,1]) label_predict_test = utils.categorical_probas_to_classes(predictions_test) tp_test,fp_test,tn_test,fn_test,accuracy_test, precision_test, sensitivity_test,recall_test, specificity_test, MCC_test, f1_score_test,_,_,_= utils.calculate_performace(len(label_predict_test), label_predict_test, y_test[:,1]) print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f'%(tp_test,fp_test,tn_test,fn_test)) print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' % (accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test)) print('\tauc=%0.4f,pr=%0.4f'%(auc_test,pr_test)) print('========================')
def classify(size, window, maxlen, train_fea_protein_AB, train_label): time_start_classify = time() sg = 'swissProt_size_' + str(size) + '_window_' + str(window) db = sg + '_maxlen_' + str(maxlen) #db_dir= 'dataset/11188/different size represented data/size_'+str(size) plot_dir = "plot/11188/" result_dir = "result/11188/performance/" model_dir = "model/dl/11188/" mkdir(plot_dir + db) #mkdir(result_dir + db) mkdir(model_dir + db) sequence_len = size * maxlen Y = utils.to_categorical(train_label) skf = StratifiedKFold(n_splits=5, random_state=20181031, shuffle=True) scores = [] i = 0 mem_cv = [] for (train_index, test_index) in skf.split(train_fea_protein_AB, train_label): print("================") print(test_index) print(train_index) X_train, X_val, y_train, y_val = train_test_split( train_fea_protein_AB[train_index], Y[train_index], random_state=20181031, test_size=0.1, shuffle=True) X_train_left = X_train[:, 0:sequence_len] X_train_right = X_train[:, sequence_len:sequence_len * 2] X_validation_left = X_val[:, 0:sequence_len] X_validation_right = X_val[:, sequence_len:sequence_len * 2] X_test_left = train_fea_protein_AB[:, 0:sequence_len][test_index] X_test_right = train_fea_protein_AB[:, sequence_len:sequence_len * 2][test_index] # turn to np.array X_train_left = np.array(X_train_left) X_train_right = np.array(X_train_right) X_test_left = np.array(X_test_left) X_test_right = np.array(X_test_right) X_validation_left = np.array(X_validation_left) X_validation_right = np.array(X_validation_right) # label y_test = Y[test_index] # feed data into model model = merged_DBN(sequence_len) sgd = SGD(lr=0.01, momentum=0.9, decay=0.001) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['precision']) hist = model.fit( [X_train_left, X_train_right], y_train, validation_data=([X_validation_left, X_validation_right], y_val), batch_size=128, nb_epoch=45, verbose=1) mem_cv.append('round ' + str(i) + ' ' + getMemorystate()) train_validation__vis(hist, i, plot_dir, db) print('****** model created! ******') model.save(model_dir + db + '/round_' + str(i) + '.h5') predictions_test = model.predict([X_test_left, X_test_right]) auc_test = roc_auc_score(y_test[:, 1], predictions_test[:, 1]) pr_test = average_precision_score(y_test[:, 1], predictions_test[:, 1]) label_predict_test = utils.categorical_probas_to_classes( predictions_test) tp_test, fp_test, tn_test, fn_test, accuracy_test, precision_test, sensitivity_test, recall_test, specificity_test, MCC_test, f1_score_test, _, _, _ = utils.calculate_performace( len(label_predict_test), label_predict_test, y_test[:, 1]) print(db + ' test:' + str(i)) print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f' % (tp_test, fp_test, tn_test, fn_test)) print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' % (accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test)) print('\tauc=%0.4f,pr=%0.4f' % (auc_test, pr_test)) scores.append([ accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test, auc_test, pr_test ]) i = i + 1 K.clear_session() tf.reset_default_graph() sc = pd.DataFrame(scores) sc.to_csv(result_dir + '5cv_' + db + '_scores.csv') scores_array = np.array(scores) print(db + '_5cv:') print(("accuracy=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[0] * 100, np.std(scores_array, axis=0)[0] * 100))) print(("precision=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[1] * 100, np.std(scores_array, axis=0)[1] * 100))) print( "recall=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[2] * 100, np.std(scores_array, axis=0)[2] * 100)) print("specificity=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[3] * 100, np.std(scores_array, axis=0)[3] * 100)) print("MCC=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[4] * 100, np.std(scores_array, axis=0)[4] * 100)) print("f1_score=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[5] * 100, np.std(scores_array, axis=0)[5] * 100)) print("roc_auc=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[6] * 100, np.std(scores_array, axis=0)[6] * 100)) print( "roc_pr=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[7] * 100, np.std(scores_array, axis=0)[7] * 100)) time_end_classify = time() # memory and time for classify print('Time of create db(' + db + '):', time_end_classify - time_start_classify) with open('runInfo/11188_val/cv_mem_time.txt', 'a') as f: f.write('Time of cv(' + db + '):' + str(time_end_classify - time_start_classify)) f.write('\n') f.write(mem_cv[0]) f.write('\n') f.write(mem_cv[1]) f.write('\n') f.write(mem_cv[2]) f.write('\n') f.write(mem_cv[3]) f.write('\n') f.write(mem_cv[4]) f.write('\n') with open(result_dir + '5cv_' + db + '.txt', 'w') as f: f.write('accuracy=%.2f%% (+/- %.2f%%)' % (np.mean(scores_array, axis=0)[0] * 100, np.std(scores_array, axis=0)[0] * 100)) f.write('\n') f.write("precision=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[1] * 100, np.std(scores_array, axis=0)[1] * 100)) f.write('\n') f.write("recall=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[2] * 100, np.std(scores_array, axis=0)[2] * 100)) f.write('\n') f.write("specificity=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[3] * 100, np.std(scores_array, axis=0)[3] * 100)) f.write('\n') f.write("MCC=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[4] * 100, np.std(scores_array, axis=0)[4] * 100)) f.write('\n') f.write("f1_score=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[5] * 100, np.std(scores_array, axis=0)[5] * 100)) f.write('\n') f.write("roc_auc=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[6] * 100, np.std(scores_array, axis=0)[6] * 100)) f.write('\n') f.write("roc_pr=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[7] * 100, np.std(scores_array, axis=0)[7] * 100)) f.write('\n') f.write('\n')
nb_epoch = nb_epoch, verbose = 1) print('****** model created! ******') # mkdir(model_dir + swm+be+'/') # mkdir(plot_dir + swm+be+'/') # training_vis(hist,i,plot_dir,swm,be) # model.save(model_dir + swm+be+'/round_'+str(i)+'.h5') predictions_test = model.predict([X_test_left, X_test_right]) auc_test = roc_auc_score(y_test[:,1], predictions_test[:,1]) pr_test = average_precision_score(y_test[:,1], predictions_test[:,1]) label_predict_test = utils.categorical_probas_to_classes(predictions_test) tp_test,fp_test,tn_test,fn_test,accuracy_test, precision_test, sensitivity_test,recall_test, specificity_test, MCC_test, f1_score_test,_,_,_= utils.calculate_performace(len(label_predict_test), label_predict_test, y_test[:,1]) print(' =========== test:'+str(i)) print('\ttp=%0.0f,fp=%0.0f,tn=%0.0f,fn=%0.0f'%(tp_test,fp_test,tn_test,fn_test)) print('\tacc=%0.4f,pre=%0.4f,rec=%0.4f,sp=%0.4f,mcc=%0.4f,f1=%0.4f' % (accuracy_test, precision_test, recall_test, specificity_test, MCC_test, f1_score_test)) print('\tauc=%0.4f,pr=%0.4f'%(auc_test,pr_test)) scores.append([accuracy_test,precision_test, recall_test,specificity_test, MCC_test, f1_score_test, auc_test,pr_test]) i=i+1 K.clear_session() tf.reset_default_graph() sc= pd.DataFrame(scores) # sc.to_csv(result_dir+swm+be+'.csv') scores_array = np.array(scores) print(("accuracy=%.2f%% (+/- %.2f%%)" % (np.mean(scores_array, axis=0)[0]*100,np.std(scores_array, axis=0)[0]*100)))
def start_fit(dataset, label, title): dataset = np.array(dataset) label = np.array(label) dataset, label = get_shuffle(dataset, label, random_state=1) #normalization scaler = StandardScaler().fit(dataset) dataset = scaler.transform(dataset) X1_train = dataset[:, 0:1164] X2_train = dataset[:, 1164:2328] label = label.reshape(len(label), ) y_train = np_utils.to_categorical(label) # define 5-fold cross validation test harness skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1) cvscores = [] mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) lw = 2 i = 0 for train, test in skf.split(dataset, label): model = get_model(dropout_value=0.2) model.fit( [X1_train[train], X2_train[train]], y_train[train], epochs=30, batch_size=64, verbose=0, ) #prediction probability y_probas = model.predict([X1_train[test], X2_train[test]]) fpr, tpr, _ = roc_curve(y_train[test][:, 0], y_probas[:, 0]) roc_auc = auc(fpr, tpr) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 plt.plot(fpr, tpr, lw=lw, color=plt.cm.Set1(i / 10.), label='ROC fold %d (area = %0.2f%%)' % (i, (roc_auc * 100))) i += 1 y_class = categorical_probas_to_classes(y_probas) y_test = categorical_probas_to_classes(y_train[test]) acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace( len(y_class), y_class, y_test) cvscores.append( [acc, precision, npv, sensitivity, specificity, mcc, roc_auc]) plt.plot([0, 1], [0, 1], linestyle='--', lw=lw, color='k', label='Luck') mean_tpr /= skf.get_n_splits(dataset, label) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, color='g', linestyle='--', label='Mean ROC (area = %0.2f%%)' % (mean_auc * 100), lw=lw) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic') plt.legend(loc="lower right") plt.show() print(title) scores = np.array(cvscores) print( ("acc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[0] * 100, np.std(scores, axis=0)[0] * 100))) print( ("precision=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[1] * 100, np.std(scores, axis=0)[1] * 100))) print( ("npv=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[2] * 100, np.std(scores, axis=0)[2] * 100))) print( ("sensitivity=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[3] * 100, np.std(scores, axis=0)[3] * 100))) print( ("specificity=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[4] * 100, np.std(scores, axis=0)[4] * 100))) print( ("mcc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[5] * 100, np.std(scores, axis=0)[5] * 100))) print( ("roc_auc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[6] * 100, np.std(scores, axis=0)[6] * 100)))