def kmeans(): train_x, test_x, train_y, test_y = data_scan.data_split() pca = PCA(n_components=2) pca.fit(train_x) pca.transform(train_x) y_pred = KMeans(n_clusters=2, random_state=2).fit_predict(train_x) plt.scatter(train_x[:, 0], train_x[:, 1], c=y_pred) plt.show()
def train_and_test(): train_x, test_x, train_y, test_y = data_scan.data_split() classifier = BayesClassifier() classifier.train(train_x, train_y) # 测试集 print("验证测试集", test_x.shape) correct_size = 0 for i in range(len(test_y)): # print(data) test_data_x = test_x[i] test_data_y = test_y[i] result = classifier.classify(test_data_x.reshape(1, 21)) print(test_data_x, test_data_y, result[0]) if result[0] == test_data_y: correct_size = correct_size + 1 print("正确率:%f%%" % (correct_size * 100 / test_x.shape[0]))
def load_total_data(): train_x, test_x, train_y, test_y = data_split( path='../../data/data_preceded.csv') threshold = 26 train = [] for label in train_y: temp = np.zeros(2) temp[label] = 1 train.append(temp.T) train_y = np.array(train) test = [] for label in test_y: temp = np.zeros(2) temp[label] = 1 test.append(temp.T) test_y = np.array(test) return (train_x / threshold), (test_x / threshold), train_y, test_y
def load_data(is_load_train_data): train_x, test_x, train_y, test_y = data_split( path='../../../data/data_preceded.csv') # 一定要标准化呀!!! threshold = 26 if is_load_train_data: data = train_x labels = train_y else: data = test_x labels = test_y ls = [] for label in labels: temp = np.zeros(2) temp[label] = 1 ls.append(temp.T) labels = np.array(ls) return (data / threshold), labels
# @Author : Equator from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsRegressor from sklearn.model_selection import KFold, cross_val_score, GridSearchCV from code.preprocessing.data_scan import data_split num_folds = 10 seed = 7 scoring = 'neg_mean_squared_error' def knn_improve(train_x, train_y): scaler = StandardScaler().fit(train_x) rescaledX = scaler.transform(train_x) param_gird = {'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]} model = KNeighborsRegressor() fold = KFold(n_splits=num_folds, random_state=seed, shuffle=True) grid = GridSearchCV(model, param_gird, scoring=scoring, cv=fold) grid_result = grid.fit(X=rescaledX, y=train_y) cv_result = zip(grid_result.cv_results_['mean_test_score'], grid_result.cv_results_['std_test_score'], grid_result.cv_results_['params']) for mean, std, param in cv_result: print("%f (%f) with %r" % (mean, std, param)) print('最优:%s 使用 %s' % (grid_result.best_score_, grid_result.best_params_)) if __name__ == '__main__': train_x, test_x, train_y, test_y = data_split() knn_improve(train_x, train_y)