print(feature) #ax = [] #ay = [] #plt.ion() for k, v in order_dict[2:]: temp = np.zeros(rows) for q in range(rows): temp[q] = sample[q][k] feature = np.c_[feature, temp] print(feature) gram = np.zeros((rows, rows)) for i in range(rows): for j in range(rows): gram[i][j] = round(metrics_function.cosine(feature[i], feature[j]), 6) print(gram) #G = pd.DataFrame(gram) #pd.DataFrame.to_csv(G, 'D:/Study/Bioinformatics/AFP/feature_matrix/Antifp_Main/188-bit/gram.csv') clf = svm.SVC(kernel = 'precomputed', probability = False) clf.fit(gram, y_train) cv = model_selection.StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0) five_fold = model_selection.cross_validate(clf, gram, label, cv = cv, scoring = 'accuracy', n_jobs = -1) ACC = np.mean(five_fold['test_score']) print('ACC =', ACC) if ACC > best_ACC: best_ACC = ACC best_feature = np.copy(feature) #ax.append(k) #ay.append(ACC)
file = np.loadtxt('D:/Study/Bioinformatics/QSP_new/' + name + '/train_' + name + '.csv', delimiter=',', skiprows=1) m = np.shape(file)[0] n = np.shape(file)[1] data = np.zeros((m, n - 1)) for index in range(m): data[index] = file[index][1:] np.set_printoptions(suppress=True) K1 = np.zeros((m, m)) for i in range(m): for j in range(m): K1[i][j] = round(metrics_function.cosine(data[i], data[j]), 6) print(K1) with open( 'D:/Study/Bioinformatics/QSP_new/kernel_matrix/KM_train_cosine/KM_cosine_' + name + '_train.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) for row in K1: writer.writerow(row) csvfile.close() K3 = np.zeros((m, m)) for i in range(m): for j in range(m): K3[i][j] = round(metrics_function.tanimoto(data[i], data[j]), 6)
file1 = np.loadtxt("D:/study/Bioinformatics/QSP/200p_200n/10_fold/" + name + "/train/train_" + name + "_" + str(k) + ".csv", delimiter=',') p = np.shape(file1)[0] q = np.shape(file1)[1] X_train = np.zeros((p, q - 1)) for index in range(p): X_train[index] = file1[index][1:] K1 = np.zeros((p, p)) for i in range(p): for j in range(p): K1[i][j] = round( metrics_function.cosine(X_train[i], X_train[j]), 6) print(K1) with open('D:/study/Bioinformatics/QSP/200p_200n/10_fold/' + name + '/km_train/KM_cosine_' + name + '_train_' + str(k) + '.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) for row in K1: writer.writerow(row) csvfile.close() K2 = np.zeros((m, p)) for i in range(m): for j in range(p): K2[i][j] = round( metrics_function.cosine(X_test[i], X_train[j]), 6)
X_test[index] = f1[index][1:] f2 = np.loadtxt('D:/study/Bioinformatics/AMP/' + name + '/train_' + name + '.csv', delimiter=',', skiprows=1) p = np.shape(f2)[0] q = np.shape(f2)[1] X_train = np.zeros((p, q - 1)) for index in range(p): X_train[index] = f2[index][1:] K1 = np.zeros((m, p)) for i in range(m): for j in range(p): K1[i][j] = round(metrics_function.cosine(X_test[i], X_train[j]), 6) print(K1) with open( 'D:/study/Bioinformatics/AMP/kernel_matrix/KM_test_cosine/KM_cosine_' + name + '_test.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) for row in K1: writer.writerow(row) csvfile.close() K3 = np.zeros((m, p)) for i in range(m): for j in range(p): K3[i][j] = round(metrics_function.tanimoto(X_test[i], X_train[j]),
print(thresholds) best_ACC = 0 best_train = X_train best_test = X_test for thres in thresholds: print('threshold =', thres) selector = SelectFromModel(model, threshold=thres, prefit=True) X_train_selected = selector.transform(X_train) print(np.shape(X_train_selected)) X_test_selected = selector.transform(X_test) print(np.shape(X_test_selected)) gram_train = metrics_function.cosine(X_train_selected, X_train_selected) clf = svm.SVC(kernel='precomputed', probability=False) try: clf.fit(gram_train, y_train) except ValueError as e: print("ValueError Details : " + str(e)) continue cv = model_selection.StratifiedKFold(n_splits=5, shuffle=False) five_fold = model_selection.cross_validate(clf, gram_train, y_train, cv=cv, scoring='accuracy', n_jobs=-1) ACC = np.mean(five_fold['test_score'])