Exemplo n.º 1
0
def one_versus_all_train(data, lambda_):
    cat_unique = np.unique(data[2])
    val = np.ones(data[0].shape[0])
    prob = np.zeros(data[0].shape[0])
    test = val.astype(float)
    y_train = data[2]
    x_train = add_polynomial_features_extend(data[0], 3)
    theta_list = []
    for planete in cat_unique:
        verif = y_train == planete
        y_zero_train = verif.astype(float)
        mlr = MLR(np.ones(x_train.shape[1] + 1))
        mlr.fit_(x_train,
                 y_zero_train,
                 alpha=5e-15,
                 n_cycle=100000,
                 lambda_=lambda_)
        y_hat = mlr.predict_(x_train)
        for i, planete_prob in enumerate(y_hat.tolist()):
            # print(prob[i],"\n", planete_prob)
            if prob[i] < planete_prob[0]:
                val[i] = planete
                prob[i] = planete_prob[0]
        theta_list.append(mlr.theta)
    ret_list = []
    ret_list.append(val)
    ret_list.append(theta_list)
    return ret_list
Exemplo n.º 2
0
def train_cat(x, y, cat, theta=None, alpha=0.001, n_cycle=1000):
    if theta is None:
        theta = np.ones(x.shape[1] + 1)

    y_prime = select_category(y, cat)
    lr = MyLogR(theta, alpha, n_cycle)
    lr.fit_(x, y_prime)
    # print(f"theta: {lr.theta}")

    return lr
Exemplo n.º 3
0
    def solve_multiclass_classification(self, iterations=500, learning_rate=0.01):
        classifier = MyLogisticRegression(iterations, learning_rate, multi_class=True)
        classifier.fit(self.train_inputs, self.train_outputs)

        computed_test_outputs = classifier.predict(self.test_inputs)
        print(self.test_outputs)
        print(computed_test_outputs)
        self.eval_classification(computed_test_outputs)

        # sklearn results
        classifier_sk = linear_model.LogisticRegression(multi_class="ovr")
        classifier_sk.fit(self.train_inputs, self.train_outputs)
        computed_test_outputs_sk = classifier_sk.predict(self.test_inputs)
        print(sklearn.metrics.classification_report(self.test_outputs, computed_test_outputs_sk,
                                                    target_names=self.output_names))
Exemplo n.º 4
0
    def solve_binary_classification(self, iterations=100, learning_rate=0.01):
        nr_features = self.train_inputs[0]
        if len(nr_features) == 2:
            plot_classification_data(self.train_inputs, self.train_outputs, self.input_features, self.output_names)

        classifier = MyLogisticRegression(iterations, learning_rate)
        classifier.fit(self.train_inputs, self.train_outputs)
        b = classifier.get_coef()
        intercept = classifier.get_intercept()
        f = "f(x) = " + str(intercept)
        for i in range(len(b)):
            f += " + " + str(b[i]) + "*x" + str(i + 1)
        print("model: " + f)

        computed_test_outputs = classifier.predict(self.test_inputs)
        if len(nr_features) == 2:
            plot_predictions(self.test_inputs, self.test_outputs, computed_test_outputs, self.input_features,
                             self.output_names)
        self.eval_classification(computed_test_outputs)

        # sklearn results
        classifier_sk = linear_model.LogisticRegression()
        classifier_sk.fit(self.train_inputs, self.train_outputs)
        b = classifier_sk.coef_.tolist()[0]
        intercept = classifier_sk.intercept_[0]
        f = "f(x) = " + str(intercept)
        for i in range(len(b)):
            f += " + " + str(b[i]) + "*x" + str(i + 1)
        print("model sk: " + f)
        computed_test_outputs_sk = classifier_sk.predict(self.test_inputs)
        print(sklearn.metrics.classification_report(self.test_outputs, computed_test_outputs_sk,
                                                    target_names=self.output_names))
Exemplo n.º 5
0
def one_versus_all_test(data, result):
    cat_unique = np.unique(data[2])
    val = np.ones(data[1].shape[0])
    prob = np.zeros(data[1].shape[0])
    test = val.astype(float)
    y_test = data[3]
    x_test = data[1]
    for j, planete in enumerate(cat_unique):
        verif = y_test == planete
        y_zero_test = verif.astype(float)
        mlr = MLR(result[j])
        # mlr.fit_(x_train, y_zero_train, alpha= 4/1000000, n_cycle=1000000)
        y_hat = mlr.predict_(x_test)
        for i, planete_prob in enumerate(y_hat.tolist()):
            if prob[i] < planete_prob[0]:
                val[i] = planete
                prob[i] = planete_prob[0]
    return val
Exemplo n.º 6
0
def ofa(x: np.ndarray, y: np.ndarray, zipcode: float, x_test: np.ndarray,
        theta) -> np.ndarray:
    """ One For All
    Args:
        x (np.ndarray): shape(m * n)
        y (np.ndarray): shape(m * 1)
        zipcode (float): the one zipcode against the all
        theta: init value for theta
    Returns:
        np.ndarray: shape(m_2 * 1),
            percentage for each citizen of belonging to $zipcode planet
        None: if shapes doesn't match
    """
    if x.shape[0] != y.shape[0] or y.shape[1] != 1:
        return None
    y = (y[...] == zipcode)
    alpha = 3e-4
    max_iter = int(3e+5)
    lr_model = MyLR(theta, alpha, max_iter)
    lr_model.fit_(x, y)
    y_test_hat = lr_model.predict_(x_test)
    #print(lr_model.theta)
    return y_test_hat
Exemplo n.º 7
0
def test_logistic_regression():
    X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
    Y = np.array([[1], [0], [1]])
    # mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], 5e-5, 500000)
    mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], alpha=0.001, n_cycle=22000)

    # Example 0:
    print(mylr.predict_(X), end="\n\n")
    # Output:
    # array([[0.99930437],
    #        [1.],
    #        [1.]])

    # Example 1:
    print(mylr.cost_(X, Y), end="\n\n")
    # Output:
    # 11.513157421577004

    # Example 2:
    mylr.fit_(X, Y)
    print(mylr.theta, end="\n\n")
    # Output:
    # array([[1.04565272],
    #        [0.62555148],
    #        [0.38387466],
    #        [0.15622435],
    #        [-0.45990099]])

    # Example 3:
    print(mylr.predict_(X), end="\n\n")
    # Output:
    # array([[0.72865802],
    #        [0.40550072],
    #        [0.45241588]])

    # Example 4:
    print(mylr.cost_(X, Y), end="\n\n")
Exemplo n.º 8
0
#    test.py                                            :+:      :+:    :+:    #
#                                                     +:+ +:+         +:+      #
#    By: mli <*****@*****.**>                    +#+  +:+       +#+         #
#                                                 +#+#+#+#+#+   +#+            #
#    Created: 2020/12/26 18:14:12 by mli               #+#    #+#              #
#    Updated: 2020/12/26 18:49:06 by mli              ###   ########.fr        #
#                                                                              #
# **************************************************************************** #

import numpy as np
from my_logistic_regression import MyLogisticRegression as MyLR

if __name__ == "__main__":
    X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
    Y = np.array([[1], [0], [1]])
    mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09], max_iter=int(2e4))

    # Example 0:
    Y_HAT = mylr.predict_(X)
    print(Y_HAT)
    # Output:
    """
    array([[0.99930437],
           [1.        ],
           [1.        ]])
    """

    # Example 1:
    print(mylr.cost_(Y, Y_HAT))
    # Output:
    """
Exemplo n.º 9
0
census = np.array(dcensus[["height", "weight", "bone_density"]]).reshape(-1, 3)
planets = np.array(dplanets[["Origin"]]).reshape(-1, 1)
data = data_spliter(census, planets, 0.7)

#los 2 sets de datos, test y training, divididos en X e Y
xtr_set = adp(data[0][0], 3)
ytr_set = data[1][0]
xtest_set = adp(data[0][1], 3)
ytest_set = data[1][1]
theta = np.zeros(xtr_set.shape[1] + 1)
#Los zipcodes convertidos a binario, es decir, son iguales a 0 o no
xtr_set = regularize(xtr_set)
#xtest_set =regularize(xtest_set)
for i in range(4):
    mylr = MyLR(theta, alpha=0.0001, n_cycle=100, penalty=1)
    mylr.fit_(xtr_set, np.array(ytr_set == i))
    if i == 0:
        pred = np.array(mylr.predict_(xtest_set).reshape(-1, 1))
    else:
        pred = np.c_[pred, mylr.predict_(xtest_set).reshape(-1, 1)]

result = []
n_answer = []
for val in pred:
    result.append(val.argmax())
answer = np.array(result).reshape(-1, 1)
for j in range(pred.shape[1]):
    for i in range(pred.shape[0]):
        if pred[i][j] > 0.01:
            n_answer.append(1)
Exemplo n.º 10
0
Y = np.array(planets[["Origin"]]).reshape(-1, 1)
X = np.array(people[["height", "weight", "bone_density"]]).reshape(-1, 3)

#add polynomial features to a degree of 3
X = add_polynomial_features(X, 3)

#normalise the data
for i in range(X.shape[1]):
    X[:, i:i + 1] = minmax(X[:, i:i + 1])

#split data into training and testing sets
X_train, X_test, Y_train, Y_test = data_splitter(X, Y, 0.5)

thetas = np.zeros(X.shape[1] + 1)

mlogr = MLogR(thetas, alpha=0.02, n_cycle=2000, penalty='l2')
#for each category (0, 1, 2, 3) carry out logistic regression
#and concatenate the 4 predicted probability vectors, giving a matrix
#with the probablitiy that each element belongs to each category
f1_scores = []
for l in range(0, 100, 10):
    for i in range(4):
        mlogr.thetas = thetas
        mlogr.lambda_ = float(l / 10)
        Y_train_label = select_label(Y_train, i)
        mlogr.fit_(X_train, Y_train_label)
        if i == 0:
            results = mlogr.predict_(X_test).reshape(-1, 1)
        if i > 0:
            results = np.concatenate(
                (results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1)
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test0 = np.array([1 if i == 0 else 0 for i in y_test]).reshape(-1, 1)

y_train1 = np.array([1 if i == 1 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test1 = np.array([1 if i == 1 else 0 for i in y_test]).reshape(-1, 1)

y_train2 = np.array([1 if i == 2 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test2 = np.array([1 if i == 2 else 0 for i in y_test]).reshape(-1, 1)

y_train3 = np.array([1 if i == 3 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test3 = np.array([1 if i == 3 else 0 for i in y_test]).reshape(-1, 1)

mylr0 = MyLR([[-1.32069828], [-1.02177506], [-0.64913889],
              [-0.06329356]])  # The flying cities of Venus (0)
mylr0.fit_(x_train, y_train0)
mylr0.alpha = 0.03
mylr0.fit_(x_train, y_train0)
mylr0.alpha = 0.3
mylr0.fit_(x_train, y_train0)

mylr1 = MyLR([[-1.56373886], [-0.58824757], [0.28303058],
              [2.20809316]])  #  United Nations of Earth (1)
mylr1.fit_(x_train, y_train1)
mylr1.alpha = 0.03
mylr1.fit_(x_train, y_train1)
mylr1.alpha = 0.3
mylr1.fit_(x_train, y_train1)

mylr2 = MyLR([[-2.58616195], [0.60780971], [2.8277886],
Exemplo n.º 12
0
census = np.array(dcensus[["height", "weight", "bone_density"]]).reshape(-1, 3)
planets = np.array(dplanets[["Origin"]]).reshape(-1, 1)
data = data_spliter(census, planets, 0.5)

#los 2 sets de datos, test y training, divididos en X e Y
xtr_set = data[0][0]
ytr_set = data[1][0]
xtest_set = data[0][1]
ytest_set = data[1][1]

#Los zipcodes convertidos a binario, es decir, son iguales a 0 o no
xtr_set = regularize(xtr_set)
#xtest_set =regularize(xtest_set)
for i in range(4):
    mylr = MyLogisticRegression([0, 0, 0, 0], alpha=0.0001, n_cycle=10000)
    mylr_t = MyLogisticRegression([0, 0, 0, 0], alpha=0.001, n_cycle=10000)
    mylr.fit_(xtr_set, np.array(ytest_set == i))
    if i == 0:
        pred = np.array(mylr.predict_(xtest_set).reshape(-1, 1))
        pred_t = np.array(mylr_t.predict_(xtest_set).reshape(-1, 1))
    else:
        pred = np.c_[pred, mylr.predict_(xtest_set).reshape(-1, 1)]
        pred_t = np.c_[pred, mylr_t.predict_(xtest_set).reshape(-1, 1)]

result = []
for val in pred:
    result.append(val.argmax())
result_t = []
for val in pred_t:
    result_t.append(val.argmax())
Exemplo n.º 13
0
import pandas as pd
from my_logistic_regression import MyLogisticRegression as MyLR
import numpy as np
import matplotlib.pyplot as plt

X = pd.read_csv("solar_system_census.csv")
Y = pd.read_csv("solar_system_census_planets.csv")

X = np.array(X[['height', 'weight', 'bone_density']]).reshape(-1, 3)
Y = np.array(Y['Origin']).reshape(-1, 1)

mylr = MyLR([1, 1, 1, 1])
X = mylr.zscore(X)
Y = mylr.label(Y, 2)

x_train, x_test, y_train, y_test = mylr.data_spliter(X, Y, 0.6)

print(mylr.fit_(x_train, y_train))
#print(mylr.predict_(x_train))
print(mylr.cost_(x_train, y_train))

plt.plot(x_train[:, 0], y_train, 'b.')
plt.plot(x_train[:, 0], mylr.predict_(x_train), 'g.')

plt.show()
Exemplo n.º 14
0
y_test0 = np.array([1 if i == 0 else 0 for i in y_test]).reshape(-1, 1)

y_train1 = np.array([1 if i == 1 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test1 = np.array([1 if i == 1 else 0 for i in y_test]).reshape(-1, 1)

y_train2 = np.array([1 if i == 2 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test2 = np.array([1 if i == 2 else 0 for i in y_test]).reshape(-1, 1)

y_train3 = np.array([1 if i == 3 else 0
                     for i in y_train]).reshape(-1, 1)  #각각의 분류모델 데이터 전처리
y_test3 = np.array([1 if i == 3 else 0 for i in y_test]).reshape(-1, 1)
theta = np.array([[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]],
                 dtype=float)
mylr0 = MyLR(theta, lambda_=0)  # The flying cities of Venus (0)
mylr1 = MyLR(theta, lambda_=0)  #  United Nations of Earth (1)
mylr2 = MyLR(theta, lambda_=0)  # Mars Republic (2)
mylr3 = MyLR(theta, lambda_=0)  # The Asteroids’ Belt colonies (3).
y_n = []
y_n2 = []
for i in range(10):
    mylr0.thetas = np.array([[-0.38004857], [0.12257596], [-1.13496089],
                             [0.64144711], [0.13721429], [-0.46771826],
                             [-1.18485222], [-0.46742162], [0.03928006],
                             [-0.1718098]])
    mylr0.fit_(x_train_add_poly, y_train0)
    mylr0.alpha = 0.00003
    mylr0.fit_(x_train_add_poly, y_train0)
    mylr0.alpha = 0.00007
    mylr0.fit_(x_train_add_poly, y_train0)
Exemplo n.º 15
0
import numpy as np
from my_logistic_regression import MyLogisticRegression as MyLR

X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
Y = np.array([[1], [0], [1]])
mylr = MyLR([[2], [0.5], [7.1], [-4.3], [2.09]])

# Example 0:
print(mylr.predict_(X))
# Output:
# array([[0.99930437],
#  [1.        ],
#  [1.        ]])

# Example 1:
print(mylr.cost_(X, Y))
# Output:
# 11.513157421577004

# Example 2:
mylr.fit_(X, Y)
print(mylr.thetas)
# Output:
# array([[ 1.04565272],
#  [ 0.62555148],
#  [ 0.38387466],
#  [ 0.15622435],
#  [-0.45990099]])

# Example 3:
print(mylr.predict_(X))
Exemplo n.º 16
0
x_test = add_polynomial_features(x_test, 3)

# Training
print("Train models")

thetas = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
alpha = 1e-4
n_cycle=100000
lambda_ = 0

models = []
for i in range(0, len(zipcodes)):
    lambda_ += 0.1

    print("For zipcode = {}".format(zipcodes[i]))
    y_train_z = np.array([[1 if y_train[j] == zipcodes[i] else 0 for j, x in enumerate(x_train)]]).T

    mn = MyLR(thetas=thetas, alpha=alpha, n_cycle=n_cycle, penalty='l2', lambda_=lambda_)
    mn.fit_(x_train, y_train_z)
    print("new thetas = {}".format(mn.thetas))

    models.append(mn)


for i in range(0, len(models)):
    l = i * 0.1

    y_hat = models[i].predict_(x_test)
    print(f1_score_(y_test, y_hat))

Exemplo n.º 17
0
    return np.array(new).reshape(-1, 1)

planets = pd.read_csv("../subjects/day03/resources/solar_system_census_planets.csv")
people = pd.read_csv("../subjects/day03/resources/solar_system_census.csv")
origins = np.array(planets[["Origin"]]).reshape(-1,1)
X = np.array(people[["height", "weight", "bone_density"]]).reshape(-1, 3)

#normalise the data
X = np.concatenate((minmax(X[:, :1]), minmax(X[:, 1:2]), minmax(X[:, 2:3])), axis=1)

#split data into training and testing sets
X_train, X_test, origins_train, origins_test = data_splitter(X, origins, 0.5)

thetas = np.zeros(4)

mlogr = MLogR(thetas, alpha=0.9, n_cycle=2000)
for i in range(4):
    mlogr.thetas = thetas
    origins_train_label = select_label(origins_train, i)
    origins_test_label = select_label(origins_test, i)
    #mlogr.fit_(X_train, origins_train_label)
    plt.title(('label: ' + str(i)))
    mlogr.plot_convergence(X_train, origins_train_label)
    if i == 0:
        results = mlogr.predict_(X_test).reshape(-1, 1)
    if i > 0:
        results = np.concatenate((results, mlogr.predict_(X_test).reshape(-1, 1)), axis=1)

def get_biggest_index(row):
    biggest = -1
    index = -1
Exemplo n.º 18
0
import numpy as np
from my_logistic_regression import MyLogisticRegression as MyLR

X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
Y = np.array([[1], [0], [1]])

# mylr = MyLR([2, 0.5, 7.1, -4.3, 2.09])
mylr = MyLR([0., 0., 0., 0., 0.])

# Example 0:
print(mylr.predict_(X))
# Output:
# array([[0.99930437], [1. ],[1. ]])

# Example 1:
print(mylr.cost_(X, Y))
# Output: 11.513157421577004

# Example 2:
mylr.fit_(X, Y, alpha=1e-4, n_cycle=50000)
print(mylr.thetas)
# exit()
# Output:
# array([[ 1.04565272], [ 0.62555148], [ 0.38387466], [ 0.15622435], [-0.45990099]])

# Example 3:
print(mylr.predict_(X))
# Output:
# array([[0.72865802], [0.40550072], [0.45241588]])

# Example 4: