def logistic_regression(data=None, eta=0.002, iterations=7000): print("\nPerforming Logistic Regression...") lr = LoR(data=data, eta=eta, iterations=iterations) print("Weights: ", lr.weights()) print("Accuracy: ", lr.accuracy())
filename = 'LogisticRegressionData1.txt' def load_data(filename): data = pd.read_csv(filename) data=data.values return data data = load_data(filename) dimensions = data.shape x_train = data[0:50,0:dimensions[1]-1] y_train = data[0:50,dimensions[1]-1:] m = x_train.shape[0] n = x_train.shape[1] x_test = data[51:,0:dimensions[1]-1] y_test = data[51:,dimensions[1]-1:] tester = LogisticRegression() tester.logistic_regression(x_train,y_train) pred_probability = tester.predict_probability(x_test) print "Probability of y being 1 for given testing samples:" + str(pred_probability) pred_y = tester.predict_y(x_test) print "Predicted value of y for given testing samples : " + str(pred_y) plot_train = tester.plot_train(x_train,y_train) plot_test = tester.plot_test(x_test,y_test) acc = tester.accuracy(x_test,y_test) print "Accuracy of regression algorithm = " + str(acc)
print('Data sorted ...') # Split data into training and test data - ignoring the critical data for now X = np.concatenate((X_ordered, X_disordered)) Y = np.concatenate((Y_ordered, Y_disordered)) print(np.shape(X)) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.20) saveData = np.c_[X_train, Y_train] np.save('test_set', saveData) print('saved test set') # Illustrate ordered vs disordered data # L = 40 # fig = plt.figure() # plt.subplot(121) # plt.imshow(X_ordered[20000].reshape(L, L), cmap='plasma_r') # plt.title('Ordered') # # plt.subplot(122) # plt.imshow(X_disordered[20000].reshape(L, L), cmap='plasma_r') # plt.title('Disordered') # plt.show() # Train on the training data logreg = LogisticRegression(X_train, Y_train.reshape(len(Y_train), 1), X_test, Y_test.reshape(len(Y_test), 1)) logreg.fit_standard() print(logreg.accuracy()) weight = logreg.getWeights() np.save('weights_logreg.npy', weight)
def train(all_data): """ Run training on data Reports different metrics after training :param all_data: input data """ best_model = None stochastic_data = None folds = kfold(all_data) avg_epoch_data_train = EpochData() avg_epoch_data_val = EpochData() test_acc = [] k = len(folds) for fold in range(k): for stochastic in range(2 if STOCHASTIC_VS_BATCH else 1): # define the model if LOGISTIC: model = LogisticRegression(LEARNING_RATE, PRINCIPAL_COMPONENTS) else: model = SoftmaxRegression(LEARNING_RATE, PRINCIPAL_COMPONENTS, len(CATEGORIES)) # split data val_data, test_data = split_x_y(folds[fold]), split_x_y(folds[(fold + 1) % k]) train_data = None for i in range(k): if i != fold and i != ((fold + 1) % k): if train_data is None: train_data = folds[i] else: train_data = np.concatenate((train_data, folds[i])) train_data = split_x_y(train_data) pca = PCA(train_data[0], PRINCIPAL_COMPONENTS) # PCA and one_hot train_data, test_data, val_data = transform(pca, train_data), transform(pca, test_data), transform(pca, val_data) validation_performance = EpochData() training_performance = EpochData() assert not (any([val_img in train_data for val_img in val_data])) for epoch in range(EPOCHS): if STOCHASTIC_GRADIENT or (STOCHASTIC_VS_BATCH and stochastic == 0): model.stochastic_gradient_descent(train_data[0], train_data[1]) else: model.batch_gradient_descent(train_data[0], train_data[1]) train_prob = model.probabilities(train_data[0]) val_prob = model.probabilities(val_data[0]) training_error = model.loss(train_data[1], train_prob) validation_error = model.loss(val_data[1], val_prob) traning_acc = model.accuracy(train_prob, train_data[1]) validation_acc = model.accuracy(val_prob, val_data[1]) if epoch % 10 == 0: print("Training error: {}, validation error: {}, accuracy: {}".format(training_error, validation_error, traning_acc)) # save validation_performance.save(validation_error, validation_acc) training_performance.save(training_error, traning_acc) # early stopping if validation_performance.increments > EARLY_STOPPING_THRESHOLD: break # plot the graphs data_to_plot = [training_performance.error, validation_performance.error] legends = ["Training error", "Validation error"] visualize_data(data_to_plot, legends, "Epoch", "Cross entropy error") data_to_plot = [training_performance.acc, validation_performance.acc] legends = ["Training accuracy", "Validation accuracy"] visualize_data(data_to_plot, legends, "Epoch", "Accuracy") # save the validation data to the model model.epoch_data = validation_performance # save the test data to the model model.test_data = test_data # save the pca model.pca = pca # save the epoch data avg_epoch_data_train.add(training_performance) avg_epoch_data_val.add(validation_performance) # save test accuracy test_acc.append(model.accuracy(model.probabilities(test_data[0]), test_data[1])) print("Test accuracy: {} ".format(test_acc[-1])) # save the best model if best_model is None: best_model = model elif best_model.epoch_data.score() > model.epoch_data.score(): best_model = model if STOCHASTIC_VS_BATCH: # display graph if stochastic == 1: data_to_visualize = [stochastic_data.error, training_performance.error] visualize_data(data_to_visualize, ["Stochastic - train error", "Batch - train error"], "Epoch", "Loss") else: stochastic_data = training_performance avg_test_acc = np.average(np.array(test_acc)) avg_test_acc_std = np.std(np.array(test_acc)) print("Avg test accuracy: {} ({})".format(avg_test_acc, avg_test_acc_std)) avg_epoch_data_train.align(FOLDS) avg_epoch_data_val.align(FOLDS) visualize_data_avg(avg_epoch_data_train, avg_epoch_data_val) if not LOGISTIC: best_model.visualize_weights(model.pca) if SHOW_CONFUSION_MATRIX: confusion_matrix(best_model) if SHOW_PRINCIPAL_COMPONENTS: show_principal_components(pca)