Exemplo n.º 1
0
def one_versus_all_train(data, lambda_):
    cat_unique = np.unique(data[2])
    val = np.ones(data[0].shape[0])
    prob = np.zeros(data[0].shape[0])
    test = val.astype(float)
    y_train = data[2]
    x_train = add_polynomial_features_extend(data[0], 3)
    theta_list = []
    for planete in cat_unique:
        verif = y_train == planete
        y_zero_train = verif.astype(float)
        mlr = MLR(np.ones(x_train.shape[1] + 1))
        mlr.fit_(x_train,
                 y_zero_train,
                 alpha=5e-15,
                 n_cycle=100000,
                 lambda_=lambda_)
        y_hat = mlr.predict_(x_train)
        for i, planete_prob in enumerate(y_hat.tolist()):
            # print(prob[i],"\n", planete_prob)
            if prob[i] < planete_prob[0]:
                val[i] = planete
                prob[i] = planete_prob[0]
        theta_list.append(mlr.theta)
    ret_list = []
    ret_list.append(val)
    ret_list.append(theta_list)
    return ret_list
Exemplo n.º 2
0
def test_logistic_regression():
    X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
    Y = np.array([[1], [0], [1]])
    # mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], 5e-5, 500000)
    mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], alpha=0.001, n_cycle=22000)

    # Example 0:
    print(mylr.predict_(X), end="\n\n")
    # Output:
    # array([[0.99930437],
    #        [1.],
    #        [1.]])

    # Example 1:
    print(mylr.cost_(X, Y), end="\n\n")
    # Output:
    # 11.513157421577004

    # Example 2:
    mylr.fit_(X, Y)
    print(mylr.theta, end="\n\n")
    # Output:
    # array([[1.04565272],
    #        [0.62555148],
    #        [0.38387466],
    #        [0.15622435],
    #        [-0.45990099]])

    # Example 3:
    print(mylr.predict_(X), end="\n\n")
    # Output:
    # array([[0.72865802],
    #        [0.40550072],
    #        [0.45241588]])

    # Example 4:
    print(mylr.cost_(X, Y), end="\n\n")
Exemplo n.º 3
0
def one_versus_all_test(data, result):
    cat_unique = np.unique(data[2])
    val = np.ones(data[1].shape[0])
    prob = np.zeros(data[1].shape[0])
    test = val.astype(float)
    y_test = data[3]
    x_test = data[1]
    for j, planete in enumerate(cat_unique):
        verif = y_test == planete
        y_zero_test = verif.astype(float)
        mlr = MLR(result[j])
        # mlr.fit_(x_train, y_zero_train, alpha= 4/1000000, n_cycle=1000000)
        y_hat = mlr.predict_(x_test)
        for i, planete_prob in enumerate(y_hat.tolist()):
            if prob[i] < planete_prob[0]:
                val[i] = planete
                prob[i] = planete_prob[0]
    return val
Exemplo n.º 4
0
def ofa(x: np.ndarray, y: np.ndarray, zipcode: float, x_test: np.ndarray,
        theta) -> np.ndarray:
    """ One For All
    Args:
        x (np.ndarray): shape(m * n)
        y (np.ndarray): shape(m * 1)
        zipcode (float): the one zipcode against the all
        theta: init value for theta
    Returns:
        np.ndarray: shape(m_2 * 1),
            percentage for each citizen of belonging to $zipcode planet
        None: if shapes doesn't match
    """
    if x.shape[0] != y.shape[0] or y.shape[1] != 1:
        return None
    y = (y[...] == zipcode)
    alpha = 3e-4
    max_iter = int(3e+5)
    lr_model = MyLR(theta, alpha, max_iter)
    lr_model.fit_(x, y)
    y_test_hat = lr_model.predict_(x_test)
    #print(lr_model.theta)
    return y_test_hat
mylr3 = MyLR([[-4.41035678], [4.24667587], [-3.76787019],
              [-5.23183696]])  # The Asteroids’ Belt colonies (3).
mylr3.fit_(x_train, y_train3)
mylr3.alpha = 0.03
mylr3.fit_(x_train, y_train3)
mylr3.alpha = 0.3
mylr3.fit_(x_train, y_train3)

print(mylr0.thetas)
print(mylr1.thetas)
print(mylr2.thetas)
print(mylr3.thetas)
# 모델 생성 완료
# 전체 데이터 예측
y_hat0 = mylr0.predict_(x)
y_hat1 = mylr1.predict_(x)
y_hat2 = mylr2.predict_(x)
y_hat3 = mylr3.predict_(x)

y_hat_total = np.append(y_hat0, y_hat1, axis=1)
y_hat_total = np.append(y_hat_total, y_hat2, axis=1)
y_hat_total = np.append(y_hat_total, y_hat3, axis=1)

y_hat_pre_all = np.array([])
# 데이터 확률 최댓값을 기준으로 클래스 분류
for i in range(len(y_hat_total)):
    y_hat_pre_all = np.append(y_hat_pre_all, np.argmax(y_hat_total[i]))

y_hat_pre_all = y_hat_pre_all.reshape(-1, 1)
print(accuracy_score_(y, y_hat_pre_all))
Exemplo n.º 6
0
thetas = np.zeros(X.shape[1] + 1)

mlogr = MLogR(thetas, alpha=0.02, n_cycle=2000, penalty='l2')
#for each category (0, 1, 2, 3) carry out logistic regression
#and concatenate the 4 predicted probability vectors, giving a matrix
#with the probablitiy that each element belongs to each category
f1_scores = []
for l in range(0, 100, 10):
    for i in range(4):
        mlogr.thetas = thetas
        mlogr.lambda_ = float(l / 10)
        Y_train_label = select_label(Y_train, i)
        mlogr.fit_(X_train, Y_train_label)
        if i == 0:
            results = mlogr.predict_(X_test).reshape(-1, 1)
        if i > 0:
            results = np.concatenate(
                (results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1)

    #choose the most probable category for each element and condense into a vector
    final_labels = np.array([get_biggest_index(i)
                             for i in results]).reshape(-1, 1)

    #plot the test results and predicted results together, one graph for each feature
    #for i in range(4):
    #    plt.plot(X_test[:, i:i + 1], Y_test, 'o')
    #    plt.plot(X_test[:, i:i + 1], final_labels, 'ro', markersize=3)
    #    plt.title((people.columns[i + 1] + ' -- lambda = ' + str(l / 10)))
    #    plt.show()
Exemplo n.º 7
0
#los 2 sets de datos, test y training, divididos en X e Y
xtr_set = data[0][0]
ytr_set = data[1][0]
xtest_set = data[0][1]
ytest_set = data[1][1]

#Los zipcodes convertidos a binario, es decir, son iguales a 0 o no
xtr_set = regularize(xtr_set)
#xtest_set =regularize(xtest_set)
for i in range(4):
    mylr = MyLogisticRegression([0, 0, 0, 0], alpha=0.0001, n_cycle=10000)
    mylr_t = MyLogisticRegression([0, 0, 0, 0], alpha=0.001, n_cycle=10000)
    mylr.fit_(xtr_set, np.array(ytest_set == i))
    if i == 0:
        pred = np.array(mylr.predict_(xtest_set).reshape(-1, 1))
        pred_t = np.array(mylr_t.predict_(xtest_set).reshape(-1, 1))
    else:
        pred = np.c_[pred, mylr.predict_(xtest_set).reshape(-1, 1)]
        pred_t = np.c_[pred, mylr_t.predict_(xtest_set).reshape(-1, 1)]

result = []
for val in pred:
    result.append(val.argmax())
result_t = []
for val in pred_t:
    result_t.append(val.argmax())
answer = np.array(result).reshape(-1, 1)
answer_t = np.array(result_t).reshape(-1, 1)
dif = 0
dif_t = 0
Exemplo n.º 8
0
#split data into training and testing sets
X_train, X_test, origins_train, origins_test = data_splitter(X, origins, 0.5)

thetas = np.zeros(4)

mlogr = MLogR(thetas, alpha=0.9, n_cycle=2000)
for i in range(4):
    mlogr.thetas = thetas
    origins_train_label = select_label(origins_train, i)
    origins_test_label = select_label(origins_test, i)
    #mlogr.fit_(X_train, origins_train_label)
    plt.title(('label: ' + str(i)))
    mlogr.plot_convergence(X_train, origins_train_label)
    if i == 0:
        results = mlogr.predict_(X_test).reshape(-1, 1)
    if i > 0:
        results = np.concatenate((results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1)

def get_biggest_index(row):
    biggest = -1
    index = -1
    i = 0
    for elem in row:
        if elem > biggest:
            biggest = elem
            index = i
        i += 1
    return index

labels = np.array([get_biggest_index(i) for i in results]).reshape(-1, 1)
Exemplo n.º 9
0
import numpy as np
from my_logistic_regression import MyLogisticRegression as MyLR

X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
Y = np.array([[1], [0], [1]])

# mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09])
mylr = MyLR([0., 0., 0., 0., 0.])

# Example 0:
print(mylr.predict_(X))
# Output:
# array([[0.99930437], [1. ],[1. ]])

# Example 1:
print(mylr.cost_(X, Y))
# Output: 11.513157421577004

# Example 2:
mylr.fit_(X, Y, alpha=1e-4, n_cycle=50000)
print(mylr.thetas)
# exit()
# Output:
# array([[ 1.04565272], [ 0.62555148], [ 0.38387466], [ 0.15622435], [-0.45990099]])

# Example 3:
print(mylr.predict_(X))
# Output:
# array([[0.72865802], [0.40550072], [0.45241588]])

# Example 4:
Exemplo n.º 10
0
    mylr3.fit_(x_train_add_poly, y_train3)
    mylr3.alpha = 0.00003
    mylr3.fit_(x_train_add_poly, y_train3)
    mylr3.alpha = 0.00007
    mylr3.fit_(x_train_add_poly, y_train3)
    mylr3.alpha = 0.0001
    mylr3.fit_(x_train_add_poly, y_train3)
    mylr3.lambda_ += 0.1

    print(mylr0.thetas)
    print(mylr1.thetas)
    print(mylr2.thetas)
    print(mylr3.thetas)
    # 모델 생성 완료
    # 테스트 데이터 예측
    y_hat0 = mylr0.predict_(x_test_add_poly)
    y_hat1 = mylr1.predict_(x_test_add_poly)
    y_hat2 = mylr2.predict_(x_test_add_poly)
    y_hat3 = mylr3.predict_(x_test_add_poly)

    y_hat_total = np.append(y_hat0, y_hat1, axis=1)
    y_hat_total = np.append(y_hat_total, y_hat2, axis=1)
    y_hat_total = np.append(y_hat_total, y_hat3, axis=1)

    y_hat_pre_all = np.array([])
    # 데이터 확률 최댓값을 기준으로 클래스 분류
    for i in range(len(y_hat_total)):
        y_hat_pre_all = np.append(y_hat_pre_all, np.argmax(y_hat_total[i]))

    y_hat_pre_all = y_hat_pre_all.reshape(-1, 1)
Exemplo n.º 11
0
import pandas as pd
from my_logistic_regression import MyLogisticRegression as MyLR
import numpy as np
import matplotlib.pyplot as plt

X = pd.read_csv("solar_system_census.csv")
Y = pd.read_csv("solar_system_census_planets.csv")

X = np.array(X[['height', 'weight', 'bone_density']]).reshape(-1, 3)
Y = np.array(Y['Origin']).reshape(-1, 1)

mylr = MyLR([1, 1, 1, 1])
X = mylr.zscore(X)
Y = mylr.label(Y, 2)

x_train, x_test, y_train, y_test = mylr.data_spliter(X, Y, 0.6)

print(mylr.fit_(x_train, y_train))
#print(mylr.predict_(x_train))
print(mylr.cost_(x_train, y_train))

plt.plot(x_train[:, 0], y_train, 'b.')
plt.plot(x_train[:, 0], mylr.predict_(x_train), 'g.')

plt.show()
Exemplo n.º 12
0
#                                                 +#+#+#+#+#+   +#+            #
#    Created: 2020/12/26 18:14:12 by mli               #+#    #+#              #
#    Updated: 2020/12/26 18:49:06 by mli              ###   ########.fr        #
#                                                                              #
# **************************************************************************** #

import numpy as np
from my_logistic_regression import MyLogisticRegression as MyLR

if __name__ == "__main__":
    X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
    Y = np.array([[1], [0], [1]])
    mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], max_iter=int(2e4))

    # Example 0:
    Y_HAT = mylr.predict_(X)
    print(Y_HAT)
    # Output:
    """
    array([[0.99930437],
           [1.        ],
           [1.        ]])
    """

    # Example 1:
    print(mylr.cost_(Y, Y_HAT))
    # Output:
    """
    11.513157421577004
    """