def cost_vs_iterations_plotting(learning_rates_list): start = time.time() if not learning_rates_list: learning_rates_list = [.1, .5, 1] # Dataset list datasets = [ Datasets.IONOSPHERE, Datasets.ADULT, Datasets.WINE_QUALITY, Datasets.BREAST_CANCER_DIAGNOSIS ] # Initialize model classifier = LogisticRegression() for dataset_name in datasets: print("dataset: ", dataset_name) # Load the datasets X, y = get_dataset(dataset_name) # Feature Scalling X = feature_scaling(X) # Split the datasets X_train, X_test, y_train, y_test = train_test_split(X, y, 0.8, shuffle=True) for lr in learning_rates_list: classifier.lr = lr # Fit the model to the dataset classifier.fit(X_train, y_train) # Plot the evolution of the cost during training plt.plot(range(len(classifier.cost_history)), classifier.cost_history) legends = [] for l in learning_rates_list: l_str = 'lr = ' + str(l) legends.append(l_str) plt.legend(legends, loc='upper right') plt.title(dataset_name) plt.xlim((0, 100)) plt.ylabel('Cost') plt.xlabel('Iterations') plt.show() print('\n\nDONE!') print('It took', time.time() - start, 'seconds.')
assert dW.shape == self.W.shape self.W -= self.lr * dW self.b -= self.lr * db def predict(self, X): # 将矩阵压缩成向量,与原始输入Y保持一致 return np.squeeze(np.dot(X, self.W) + self.b) def RMSE(y_true, y_pred): return sum((y_true - y_pred) ** 2) ** 0.5 / len(y_true) if __name__ == "__main__": from datasets.dataset import load_boston from model_selection.train_test_split import train_test_split data = load_boston() X = data.data Y = data.target X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) line_reg = LinearRegression(max_iter=2000) line_reg.fit(X_train, Y_train) Y_pred = line_reg.predict(X_test) rmse = RMSE(Y_test, Y_pred) print(rmse)
#### E-step,计算概率 #### return np.argmax(P_mat, axis=1) if __name__ == '__main__': from sklearn.datasets.samples_generator import make_blobs from model_selection.train_test_split import train_test_split X, _ = make_blobs(cluster_std=1.5, random_state=42, n_samples=1000, centers=3) X = np.dot(X, np.random.RandomState(0).randn(2, 2)) # 生成斜形类簇 import matplotlib.pyplot as plt plt.clf() plt.scatter(X[:, 0], X[:, 1], alpha=0.3) plt.show() X_train, X_test = train_test_split(X, test_size=0.2) n_samples, n_feature = X_train.shape gmm = GaussianMixture(n_components=6) gmm.fit(X_train) Y_pred = gmm.predict(X_test) plt.clf() plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_pred, alpha=0.3) plt.show()
clus_pred = np.argmin(dist_test, axis=1) return clus_pred if __name__ == '__main__': import numpy as np data_1 = np.random.randn(200, 2) + [1, 1] data_2 = np.random.randn(200, 2) + [4, 4] data_3 = np.random.randn(200, 2) + [7, 1] data = np.concatenate((data_1, data_2, data_3), axis=0) from model_selection.train_test_split import train_test_split X_train, X_test = train_test_split(data, test_size=0.2) kmeans = KMeans(n_clusters=3) kmeans.fit(X_train) import matplotlib.pyplot as plt plt.clf() plt.scatter(X_train[:, 0], X_train[:, 1], alpha=0.5, c=kmeans.labels_) plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker='*', c='k') plt.show() clus_pred = kmeans.predict(X_test)
# Feature scaling X = feature_scaling(X) # Create the classifiers lr_classifier = LogisticRegression() nb_classifier = GaussianNaiveBayes() train_sizes = np.arange(0.05, 1, 0.05) lr_accuracy = [] nb_accuracy = [] for t in train_sizes: # Split into train and test X_train, X_test, y_train, y_test = train_test_split(X, y, t, shuffle=True) # Train and evaluate the models lr_classifier.fit(X_train, y_train) y_pred = lr_classifier.predict(X_test) lr_accuracy.append(evaluate_acc(y_test, y_pred)) nb_classifier.fit(X_train, y_train) y_pred = nb_classifier.predict(X_test) nb_accuracy.append(evaluate_acc(y_test, y_pred)) printAccuracyComparison(ds, lr_accuracy, nb_accuracy, train_sizes) print('\n\nDONE!')