示例#1
0
 def predict_digit_nn(theta_one, theta_two, X):
     first = lg.sigmoid(np.matmul(X, theta_one))
     X_sec = np.ones((first.shape[0], first.shape[1] + 1))
     X_sec[:, 1:] = first[:, :]
     #X_sec     = np.matrix([np.ones((first.shape[0], 1))[:, 0], first]).reshape(first.shape[0], 1)
     second = lg.sigmoid(np.matmul(X_sec, theta_two))
     predict = np.zeros((second.shape[0], 1))
     predict[:, 0] = np.argmax(second, axis=1)
     return predict + 1  # due to the formatting of octave/matlab (math based frameworks - starts counting at one)
示例#2
0
def costRegu(theta, X, y, learningRate):
    """

    :param theta:
    :param X:
    :param y:
    :param learningRate:
    :return:
    """
    theta = np.ndarray(theta)
    X = np.ndarray(X)
    y = np.ndarray(y)
    firstpart = np.multiply(-y, np.log(lr.sigmoid(np.multiply(X, theta.T))))
    secondpart = np.multiply(1 - y,
                             np.log(1 - lr.sigmoid(np.multiply(X, theta.T))))
    regu = (learningRate / (2 * len(X)) *
            np.sum(np.power(theta[:, 1:theta.shape[1]], 2)))
    return np.sum(firstpart - secondpart) / (len(X)) + regu
示例#3
0
 def testLogisticRegression():
     print("\n\n Testing Logistic Regression:")
     df = pd.read_csv("testData/ex2data1.txt", delimiter=',', header=None)
     df_copy = df.copy()
     df.columns = [0, 1, 2]
     print(df)
     df.describe()
     df_y = df[2]
     df, mu, sigma = featureScaling(df)
     X = df.values
     y = df_y.values
     y_vector = np.zeros((y.shape[0], 1))
     y_vector[:, 0] = y[:]
     y = y_vector
     theta, cost, X = lg.logisticRegression(400, 0.1, X[:, 0:2], y[:, 0:1])
     plotLogisticRegression(df_copy.values, y, theta, cost)
     print("\nTest successful")
     print("<-------------------->")
     print("Theta:")
     print(theta)
     print("<-------------------->")
     print("Testing for:")
     print(
         "Student with a score of 45 in Exam 1 and a score of 85 in Exam 2")
     test = np.array([1, (45 - mu[0]) / sigma[0], (85 - mu[1]) / sigma[1]])
     predict_prob = lg.sigmoid(np.matmul(test, theta))
     #print(str(test) + " * " + str(theta))
     print("Probability that the Student passes: " + str(predict_prob) +
           " Expected Value: 0.775 +/- 0.002")
     print("<-------------------->")
     predict_prob_one = lg.sigmoid(np.matmul(X[12:13, :], theta))
     predict_prob_two = lg.sigmoid(np.matmul(X[43:44, :], theta))
     predict_prob_three = lg.sigmoid(np.matmul(X[21:22, :], theta))
     print(
         str(float(predict_prob_one)) + " <-- Example 14 --> " +
         str(y[12:13, 0]))
     print(
         str(float(predict_prob_two)) + " <-- Example 45 --> " +
         str(y[43:44, 0]))
     print(
         str(float(predict_prob_three)) + " <-- Example 23 --> " +
         str(y[21:22, 0]))
示例#4
0
 def testRegularizedLogisticRegression():
     print("\n\n Testing Regularized Logistic Regression:")
     df = pd.read_csv("testData/ex2data2.txt", delimiter=',', header=None)
     df_copy = df.copy()
     df.columns = [0, 1, 2]
     print(df)
     df.describe()
     df_y = df[2]
     df, mu, sigma = featureScaling(df)
     X = df.values
     y = df_y.values
     y_vector = np.zeros((y.shape[0], 1))
     y_vector[:, 0] = y[:]
     y = y_vector
     #output         = mf.featureMapping(X[:, 0], X[:, 1], 6)
     #print(y[:, 0])
     theta, X, cost = lg.regularized_logisticRegression(
         400, 0.01, X[:, :2], y[:, 0:1], 1)
     print("\nTest successful")
     print("<-------------------->")
     print("Theta:")
     print(theta)
     print("<-------------------->")
     print(
         "Testing Train Accuracy - Polynomial Features were not used - lambda = 1"
     )
     p = lg.sigmoid(np.matmul(X, theta))
     p[p >= 0.5] = 1
     p[p < 0.5] = 0
     print("Train Accuracy " + str(np.mean(p == y) * 100))
     print("<-------------------->")
     X, y = mapFeatures(df_copy.values)
     df, mu, sigma = featureScaling(X)
     X_copy = X.copy()
     X = df.values
     theta, X, cost_1 = lg.regularized_logisticRegression(
         400, 0.01, X[:, :], y[:, 0:1], 1)
     print(
         "Testing Train Accuracy - Polynomial Features were  used - lambda = 1"
     )
     p = lg.predict(theta, X)
     print("Train Accuracy " + str(np.mean(p == y) * 100))
     print("<-------------------->")
     X = X_copy.values
     theta, X, cost_2 = lg.regularized_logisticRegression(
         900, 0.01, X[:, :], y[:, 0:1], 0.00001)
     print(
         "Testing Train Accuracy - Polynomial Features were  used - lambda = 0.00001"
     )
     p = lg.predict(theta, X)
     print("Train Accuracy " + str(np.mean(p == y) * 100))
     print("<-------------------->")
     plotRegLogisticRegression(df_copy.values, y, cost, cost_1, cost_2)
示例#5
0
def CFlogisticregression(theta, X, Y):
    """
    CF means cost function
    :param theta:
    :param X:
    :param Y:
    :return:
    """

    theta = np.ndarray(theta)
    X = np.ndarray(X)
    Y = np.ndarray(Y)

    # y=1时的代价函数
    firstpart = np.multiply(-Y, np.log(lr.sigmoid(np.multiply(X, theta.T))))
    # firsrtpart = -Y*np.log(sigmoid(theta.T*X)

    # y=0时的代价函数
    secondpart = np.multiply(1 - Y,
                             np.log(1 - lr.sigmoid(np.multiply(X, theta.T))))
    # secondpart = (1-Y)*np.log(1-sigmoid(theta.T*X)
    return
示例#6
0
def predictDigitFromImage(theta):
    im = imageio.imread(
        'C:\Study\DataSets\MNIST_Handwritten_Digit_Recognizer\\3.png',
        as_gray=True)
    data_im = np.ones([im.flatten().shape[0] + 1])
    data_im[1:] = im.flatten()
    data_im = data_im.reshape(1, data_im.shape[0])
    print("Predict Digit: Input data =", im.shape, data_im.shape, theta)

    Z = logisticRegression.sigmoid(np.dot(data_im, theta.T))
    prediction = np.argmax(Z, axis=1)
    prob_max = np.max(Z, axis=1)
    print("Predict Digit: Prediction Result and Probability =", Z, prediction,
          prob_max)
示例#7
0
def test_OneVsAll():
    # This dataset is downloaded from Kaggle
    train_data = loadTrainingData(
        'C:\Study\DataSets\MNIST_Handwritten_Digit_Recognizer\\train.csv')

    # Total number of records
    m = len(train_data)

    # Populate y data into a m-dim vector
    # And then drop that column from feature list
    num_labels = len(train_data.label.unique())
    data_y = train_data.label.values.reshape(m, 1)
    train_data = train_data.drop('label', 1)

    # Setting first feature to 1, this is the bias/y-intercept or theta0
    train_data.insert(0, 'first_dummy_feature', 1)

    # Populate X (features) data into a mxn matrix
    data_X = train_data.values

    # Reduce the training data set
    data_X_1 = data_X[15000:20000, :]
    data_y_1 = data_y[15000:20000, :]

    # Call one-vs-all calculation
    lambda_reg = 1
    all_theta = logisticRegression.oneVsAll(data_X_1, data_y_1, num_labels,
                                            lambda_reg)
    print("OneVsAll: Theta after Advanced Optimization =", all_theta.shape)

    #    # Predict results of test data (on test data from Kaggle dataset)
    #    test_data = loadTrainingData('C:\Study\DataSets\MNIST_Handwritten_Digit_Recognizer\\test.csv')
    #    test_data_m = len(test_data)
    #    test_data.insert(0, 'first_dummy_feature', 1)
    #    test_data_X = test_data.values

    # Predict results of test data (from subset of input training data)
    test_data_X = data_X[25000:30000, :]
    test_data_y = data_y[25000:30000, :]

    Z = logisticRegression.sigmoid(np.dot(test_data_X, all_theta.T))
    prediction = np.argmax(Z, axis=1)
    prob_max = np.max(Z, axis=1)
    print("OneVsAll: Prediction Result =", prediction.shape)
    accuracy = np.mean(
        prediction.reshape(test_data_y.shape) == test_data_y) * 100
    print("OneVsAll: Prediction Accuracy % =", accuracy)

    # Predict the digit I have written from the PNG file supplied
    predictDigitFromImage(all_theta)
def classifyVector(dataIn,weights):
    h=LR.sigmoid(sum(dataIn*weights))
    if h>0.5:
        return 1.0
    else:
        return 0.0
示例#9
0
import numpy as np
import matplotlib.pyplot as plt
from costFunction import crossEntropyVectorized
from Data_Process import get_binaryData, get_data
from logisticRegression import classification_rate, sigmoid, forward

N = 50
D = 50

X = (np.random.random((N, D)) - 0.5) * 10
W = np.array([1, 0.5, -0.5] + [0] * (D - 3))
Y = np.round(sigmoid(X.dot(W) + np.random.randn(N) * 0.5))

costs = []
W_hat = np.random.randn(D)
b = 0
lr = 0.001
lamba = 5
for i in range(1000):
    Y_hat = forward(X, W_hat, b)
    delta = Y_hat - Y
    W_hat -= lr * (X.T.dot(delta) + lamba * np.sign(W_hat))
    cost = crossEntropyVectorized(Y, Y_hat) + lamba * np.mean(np.abs(W_hat))
    costs.append(cost)
plt.plot(costs)
plt.show()

plt.plot(W, label="Original W")
plt.plot(W_hat, label="L1 W")
plt.legend()
plt.savefig("images/L1 regularization logistic")
示例#10
0
theta = 2 * np.pi * np.random.random(N // 2)
X1 = np.concatenate([[X1 * np.cos(theta)], [X1 * np.sin(theta)]]).T
theta = 2 * np.pi * np.random.random(N // 2)
X2 = np.concatenate([[X2 * np.cos(theta)], [X2 * np.sin(theta)]]).T
X = np.concatenate([X1, X2])
T = np.array([0] * (N // 2) + [1] * (N // 2))
plt.scatter(X[:, 0], X[:, 1], c=T)
plt.show()
plt.savefig("images/donus")

bias = np.ones((N, 1))
r = np.sqrt((X * X).sum(axis=1)).reshape(-1, 1)
X = np.concatenate((bias, r, X), axis=1)
# print(X.shape)
W = np.random.randn(D + 2)
Y = sigmoid(X.dot(W))
lr = 0.001
costs = []
for i in range(3000):
    cost = crossEntropyVectorized(T, Y)
    costs.append(cost)
    if i % 500 == 0:
        print(cost)
    W += lr * (X.T.dot(T - Y) - 0.1 * W)
    Y = sigmoid(X.dot(W))

plt.plot(costs)
plt.title("DoughNut logistic")
plt.savefig("images/DoughNut logistic")
plt.show()
print(classification_rate(T, np.round(Y)))