示例#1
0
def logistic_regression(data=None, eta=0.002, iterations=7000):
    print("\nPerforming Logistic Regression...")
    lr = LoR(data=data, eta=eta, iterations=iterations)
    print("Weights: ", lr.weights())
    print("Accuracy: ", lr.accuracy())
filename = 'LogisticRegressionData1.txt'
def load_data(filename):
    data = pd.read_csv(filename)
    data=data.values
    return data


data = load_data(filename)
dimensions = data.shape
x_train = data[0:50,0:dimensions[1]-1]
y_train = data[0:50,dimensions[1]-1:]
m = x_train.shape[0]
n = x_train.shape[1] 
x_test = data[51:,0:dimensions[1]-1]
y_test = data[51:,dimensions[1]-1:]
tester = LogisticRegression()
tester.logistic_regression(x_train,y_train)
pred_probability = tester.predict_probability(x_test)
print "Probability of y being 1 for given testing samples:" + str(pred_probability)
pred_y = tester.predict_y(x_test)
print "Predicted value of y for given testing samples : " + str(pred_y)
plot_train = tester.plot_train(x_train,y_train)
plot_test = tester.plot_test(x_test,y_test)
acc = tester.accuracy(x_test,y_test)
print "Accuracy of regression algorithm = " + str(acc)
                             



示例#3
0
print('Data sorted ...')

# Split data into training and test data - ignoring the critical data for now
X = np.concatenate((X_ordered, X_disordered))
Y = np.concatenate((Y_ordered, Y_disordered))
print(np.shape(X))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.20)
saveData = np.c_[X_train, Y_train]
np.save('test_set', saveData)
print('saved test set')


# Illustrate ordered vs disordered data
# L = 40
# fig = plt.figure()
# plt.subplot(121)
# plt.imshow(X_ordered[20000].reshape(L, L), cmap='plasma_r')
# plt.title('Ordered')
#
# plt.subplot(122)
# plt.imshow(X_disordered[20000].reshape(L, L), cmap='plasma_r')
# plt.title('Disordered')
# plt.show()

# Train on the training data
logreg = LogisticRegression(X_train, Y_train.reshape(len(Y_train), 1), X_test, Y_test.reshape(len(Y_test), 1))
logreg.fit_standard()
print(logreg.accuracy())
weight = logreg.getWeights()
np.save('weights_logreg.npy', weight)
示例#4
0
def train(all_data):
    """
    Run training on data

    Reports different metrics after training

    :param all_data: input data
    """
    best_model = None
    stochastic_data = None

    folds = kfold(all_data)
    avg_epoch_data_train = EpochData()
    avg_epoch_data_val = EpochData()
    test_acc = []

    k = len(folds)
    for fold in range(k):
        for stochastic in range(2 if STOCHASTIC_VS_BATCH else 1):

            # define the model
            if LOGISTIC:
                model = LogisticRegression(LEARNING_RATE, PRINCIPAL_COMPONENTS)
            else:
                model = SoftmaxRegression(LEARNING_RATE, PRINCIPAL_COMPONENTS, len(CATEGORIES))

            # split data
            val_data, test_data = split_x_y(folds[fold]), split_x_y(folds[(fold + 1) % k])
            train_data = None
            for i in range(k):
                if i != fold and i != ((fold + 1) % k):
                    if train_data is None:
                        train_data = folds[i]
                    else:
                        train_data = np.concatenate((train_data, folds[i]))
            train_data = split_x_y(train_data)

            pca = PCA(train_data[0], PRINCIPAL_COMPONENTS)

            # PCA and one_hot
            train_data, test_data, val_data = transform(pca, train_data), transform(pca, test_data), transform(pca,
                                                                                                               val_data)
            validation_performance = EpochData()
            training_performance = EpochData()

            assert not (any([val_img in train_data for val_img in val_data]))

            for epoch in range(EPOCHS):
                if STOCHASTIC_GRADIENT or (STOCHASTIC_VS_BATCH and stochastic == 0):
                    model.stochastic_gradient_descent(train_data[0], train_data[1])
                else:
                    model.batch_gradient_descent(train_data[0], train_data[1])

                train_prob = model.probabilities(train_data[0])
                val_prob = model.probabilities(val_data[0])

                training_error = model.loss(train_data[1], train_prob)
                validation_error = model.loss(val_data[1], val_prob)

                traning_acc = model.accuracy(train_prob, train_data[1])
                validation_acc = model.accuracy(val_prob, val_data[1])

                if epoch % 10 == 0:
                    print("Training error: {}, validation error: {}, accuracy: {}".format(training_error,
                                                                                          validation_error,
                                                                                          traning_acc))

                # save
                validation_performance.save(validation_error, validation_acc)
                training_performance.save(training_error, traning_acc)

                # early stopping
                if validation_performance.increments > EARLY_STOPPING_THRESHOLD:
                    break

            # plot the graphs
            data_to_plot = [training_performance.error, validation_performance.error]
            legends = ["Training error", "Validation error"]
            visualize_data(data_to_plot, legends, "Epoch", "Cross entropy error")

            data_to_plot = [training_performance.acc, validation_performance.acc]
            legends = ["Training accuracy", "Validation accuracy"]
            visualize_data(data_to_plot, legends, "Epoch", "Accuracy")

            # save the validation data to the model
            model.epoch_data = validation_performance

            # save the test data to the model
            model.test_data = test_data

            # save the pca
            model.pca = pca

            # save the epoch data
            avg_epoch_data_train.add(training_performance)
            avg_epoch_data_val.add(validation_performance)

            # save test accuracy
            test_acc.append(model.accuracy(model.probabilities(test_data[0]),
                                           test_data[1]))
            print("Test accuracy: {} ".format(test_acc[-1]))

            # save the best model
            if best_model is None:
                best_model = model
            elif best_model.epoch_data.score() > model.epoch_data.score():
                best_model = model

            if STOCHASTIC_VS_BATCH:
                # display graph
                if stochastic == 1:
                    data_to_visualize = [stochastic_data.error,
                                         training_performance.error]
                    visualize_data(data_to_visualize, ["Stochastic - train error", "Batch - train error"], "Epoch", "Loss")
                else:
                    stochastic_data = training_performance

    avg_test_acc = np.average(np.array(test_acc))
    avg_test_acc_std = np.std(np.array(test_acc))
    print("Avg test accuracy: {} ({})".format(avg_test_acc, avg_test_acc_std))

    avg_epoch_data_train.align(FOLDS)
    avg_epoch_data_val.align(FOLDS)

    visualize_data_avg(avg_epoch_data_train, avg_epoch_data_val)
    if not LOGISTIC:
        best_model.visualize_weights(model.pca)

    if SHOW_CONFUSION_MATRIX:
        confusion_matrix(best_model)

    if SHOW_PRINCIPAL_COMPONENTS:
        show_principal_components(pca)