Exemplo n.º 1
0
def main():

    df, X, y = preprocess_data()
    X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8)
    logistic_regressor = LogisticRegressor(alpha=0.05,
                                           c=0.01,
                                           T=1000,
                                           random_seed=0,
                                           intercept=True)
    losses = logistic_regressor.fit(X_train, y_train)
    plot_losses(losses=losses, savefig=True)

    train_error = error_rate(y_train, logistic_regressor.predict(X_train))
    test_error = error_rate(y_test, logistic_regressor.predict(X_test))

    print('Training Error Rate: %f' % train_error)
    print('Test Error Rate: %f' % test_error)
Exemplo n.º 2
0
df2 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.01, 0.99)))
regressor2 = LogisticRegressor(df2, 'y', 1)

df3 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.001, 0.999)))
regressor3 = LogisticRegressor(df3, 'y', 1)

df4 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.0001, 0.9999)))
regressor4 = LogisticRegressor(df4, 'y', 1)

plt.clf()
plt.style.use('bmh')
plt.plot([point[0] for point in points], [point[1] for point in points])

plt.plot([x / 1000 for x in range(5001)],
         [regressor1.predict({'x': x / 1000}) for x in range(5001)],
         label='0.1')

plt.plot([x / 1000 for x in range(5001)],
         [regressor2.predict({'x': x / 1000}) for x in range(5001)],
         label='0.01')

plt.plot([x / 1000 for x in range(5001)],
         [regressor3.predict({'x': x / 1000}) for x in range(5001)],
         label='0.001')

plt.plot([x / 1000 for x in range(5001)],
         [regressor4.predict({'x': x / 1000}) for x in range(5001)],
         label='0.0001')

plt.legend()
Exemplo n.º 3
0
    #     if pair[1] == 0:
    #         new_list.append([pair[0],delta])
    #     else:
    #         new_list.append([pair[0],1-delta])

    df = DataFrame.from_array(list_data, columns=['x', 'y'])

    regressor = LogisticRegressor(df,
                                  prediction_column='y',
                                  max_value=1,
                                  delta=delta_low)

    coords = [[], []]
    for x in range(20):
        coords[0].append(x / 100)
        coords[1].append(regressor.predict({'constant': 1, 'x': x}))
    all_coords.append(coords)
print(all_coords)
plt.style.use('bmh')
for coords in all_coords:
    plt.plot(coords[0], coords[1], linewidth=2.5)
plt.legend(['0.1', '0.01', '0.001', '0.0001'])
plt.savefig('logistic_regressor_109.png')

# dfgd = DataFrame.from_array(
#     [[1,0],
#     [2,0],
#     [3,0],
#     [2,1],
#     [3,1],
#     [4,1]],
Exemplo n.º 4
0
import sys
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor

 df = DataFrame.from_array(
[[0, 0, 1, 0], 
[1, 0, 2, 0], 
[2, 0, 4, 0], 
[4, 0, 8, 0], 
[6, 0, 9, 0], 
[0, 2, 2, 0], 
[0, 4, 5, 0], 
[0, 6, 7, 0], 
[0, 8, 6, 0],
[2, 2, 0.1, 4],
[3, 4, 0.1, 12]],
columns = ['beef', 'pb', 'rating', 'interactive']
)
log_reg = LogisticRegressor(df,10, dependent_variable = 'rating')
print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0}))
print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))
Exemplo n.º 5
0
     [0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5],
     [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0],
     [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0],
     [5, 5, ['mayo', 'jelly'], 0]],
    columns=['beef', 'pb', 'condiments', 'rating'])
df = df.create_dummy_variables('condiments')
df = df.create_interaction_terms('beef', 'pb')

df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
log_df = DataFrame(df.data_dict, df.columns)

logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating')

# test 8 slices of beef + mayo
observation = {'beef': 8, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 9.72

# test 4 tbsp of pb + 8 slices of beef + mayo
observation = {'beef': 8, 'pb': 4, 'mayo': 1}

assert round(logistic_regressor.predict(observation), 2) == 0.77

# test 8 slices of beef + mayo + jelly
observation = {'beef': 8, 'mayo': 1, 'jelly': 1}
assert round(logistic_regressor.predict(observation), 2) == 0.79
    'constant': [1 for _ in range(len(data_dict['rating']))],
    'rating': data_dict['rating']
})

df = df.apply('rating', lambda x: 0.1 if x == 0 else x)

regressor = LogisticRegressor(df, prediction_column='rating', max_val=10)

assert regressor.multipliers == [
    -0.039, -0.0205, 1.7483, -0.3978, 0.1497, -0.7485, 0.4682, 0.3296, -0.5288,
    2.6441, 1.0125
], 'Wong multipliers'

assert regressor.predict({
    'beef': 5,
    'pb': 5,
    'mayo': 1,
    'jelly': 1,
}) == 0.02342, 'Nah bruh'
assert regressor.predict({
    'beef': 0,
    'pb': 3,
    'mayo': 0,
    'jelly': 1,
}) == 7.37536

assert regressor.predict({
    'beef': 1,
    'pb': 1,
    'mayo': 1,
    'jelly': 0,
}) == 0.80757, 'Nah'
Exemplo n.º 7
0
print 'Theta found by fmin_bfgs: ',theta_opt

log_reg1.theta = theta_opt
print "Final loss = ", log_reg1.loss(theta_opt,XX,y)

# make a prediction on a student with exam 1 score of 45 and exam2 score of 85

# TODO: calculate the probability of a student being admitted with score of 45,85
#       replace pred_prob = 0 with pred_prob = expression for that probability

pred_prob = theta_opt.dot(np.array([1, 45, 85]))
print "For a student with 45 on exam 1 and 85 on exam 2, the probability of admission = ", pred_prob

# compute accuracy on the training set

predy = log_reg1.predict(XX)

# TODO: calculate the accuracy of predictions on training set (hint: compare predy and y)

accuracy = 1. * sum([predy[i] == y[i] for i in xrange(len(y))]) / len(y)
print "Accuracy on the training set = ", accuracy

# plot the decision surface

plot_utils.plot_decision_boundary(X,y,theta_opt,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted'])
plt.savefig('fig2.pdf')

# Compare with sklearn logistic regression
# note the parameters fed into the LogisticRegression call

from sklearn import linear_model
Exemplo n.º 8
0
import sys
sys.path.append('src')
from logistic_regressor import LogisticRegressor
from matrix import Matrix
from dataframe import DataFrame

df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y'])

log_reg = LogisticRegressor(df, 'y', 1)
print('Testing method predict...')
assert round(log_reg.predict({'x': 5}), 3) == 0.777
print('PASSED')

df = DataFrame.from_array(
    [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4],
     [0, 0, ['mayo', 'jelly'], 0.1], [5, 0, [], 4], [5, 0, ['mayo'], 8],
     [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0.1], [0, 5, [], 5],
     [0, 5, ['mayo'], 0.1], [0, 5, ['jelly'], 9],
     [0, 5, ['mayo', 'jelly'], 0.1], [5, 5, [], 0.1], [5, 5, ['mayo'], 0.1],
     [5, 5, ['jelly'], 0.1], [5, 5, ['mayo', 'jelly'], 0.1]],
    columns=['beef', 'pb', 'condiments', 'rating'])

df = df.create_dummy_variables('condiments')

df = df.create_interaction_terms('beef', 'pb')
df = df.create_interaction_terms('beef', 'mayo')
df = df.create_interaction_terms('beef', 'jelly')
df = df.create_interaction_terms('pb', 'mayo')
df = df.create_interaction_terms('pb', 'jelly')
df = df.create_interaction_terms('mayo', 'jelly')
Exemplo n.º 9
0
        'jelly': 0,
        'beef * pb': 0,
        'beef * mayo': 8,
        'beef * jelly': 0,
        'pb * mayo': 0,
        'pb * jelly': 0,
        'mayo * jelly': 0
    }))
print(
    'Logistic',
    logistic_regressor.predict({
        'beef': 8,
        'pb': 0,
        'mayo': 1,
        'jelly': 0,
        'beef * pb': 0,
        'beef * mayo': 8,
        'beef * jelly': 0,
        'pb * mayo': 0,
        'pb * jelly': 0,
        'mayo * jelly': 0
    }))

print('4 tbsp of pb + jelly')
print(
    'Linear',
    linear_regressor.predict({
        'beef': 0,
        'pb': 4,
        'mayo': 0,
        'jelly': 1,
        'beef * pb': 0,
Exemplo n.º 10
0
theta_opt = log_reg1.train(XX,y,num_iters=400)

# print the theta found

print 'Theta found by fmin_bfgs: ',theta_opt

log_reg1.theta = theta_opt
print "Final loss = ", log_reg1.loss(theta_opt,XX,y)

# make a prediction on a student with exam 1 score of 45 and exam2 score of 85

# TODO: calculate the probability of a student being admitted with score of 45,85
#       replace pred_prob = 0 with pred_prob = expression for that probability

pred_prob = log_reg1.predict(np.asarray([1, 45, 85]))
print "For a student with 45 on exam 1 and 85 on exam 2, the probability of admission = ", pred_prob

# compute accuracy on the training set

predy = log_reg1.predict(XX)

# TODO: calculate the accuracy of predictions on training set (hint: compare predy and y)
predy = np.around(predy)

accuracy = 1 - float(np.count_nonzero(y-predy)) / y.shape[0]
print "Accuracy on the training set = ", accuracy

# plot the decision surface

plot_utils.plot_decision_boundary(X,y,theta_opt,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted'])
#     'mayo': 1.74825378,
#     'jelly': -0.39777219,
#     'beef_pb': 0.14970983,
#     'beef_mayo': -0.74854916,
#     'beef_jelly': 0.46821312,
#     'pb_mayo': 0.32958369,
#     'pb_jelly': -0.5288267,
#     'mayo_jelly': 2.64413352,
#     'constant': 1.01248436
# }, 'Incorrect multipliers an is instead:'+str(regressor.multipliers)
# print("     passed")

print("\n Testing prediction #1")
assert regressor.predict({
    'beef': 5,
    'pb': 5,
    'mayo': 1,
    'jelly': 1,
}) == 0.023417480134512895, "Incorrect prediction #1, is instead " + str(
    regressor.predict({
        'beef': 5,
        'pb': 5,
        'mayo': 1,
        'jelly': 1,
    }))
print("     passed")

print("\n Testing prediction #2")
assert regressor.predict({
    'beef': 0,
    'pb': 3,
    'mayo': 0,
reg = LogisticRegressor(df, dependent_variable='y', upper_bound=1)
reg.set_coefficients({'constant': 0.5, 'x': 0.5})

alpha = 0.01
delta = 0.01
num_steps = 20000
reg.gradient_descent(alpha, delta, num_steps)

print("\nreg.coefficients:",
      reg.coefficients)  # should be {'constant': 2.7911, 'x': -1.1165}

x = [pair[0] for pair in arr]
y = [pair[1] for pair in arr]

lots_of_xs = [x / 100 for x in range(100, 401)]
prediction = [reg.predict({'x': x}) for x in lots_of_xs]

plt.scatter(x, y, label="Actual", color="red")
plt.plot(lots_of_xs, prediction, label='Gradient descent')

plt.legend(loc='best')
plt.savefig('logistic_regressor_gradient_descent.png')
"""

x_points = [pair[0] for pair in arr]
y_points = [pair[1] for pair in arr]
plt.scatter(x_points, y_points, label="Actual", color="red")

def num_into_approximation(this_df, this_dv, zero_val):
    
    new_data_dict = {}
Exemplo n.º 13
0
df = df.append_columns({
    'constant': [1 for _ in range(len(data_dict['percentile']))],
    'acceptance':
    [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001]
})
# print(df.ordered_dict)
df = df.apply('acceptance', lambda x: 0.1 if x == 0 else x)
# print(df.ordered_dict)

regressor = LogisticRegressor(df, prediction_column='acceptance', max_value=1)
print(regressor.coefficients)
print(
    "Martha: " +
    str(regressor.predict({
        'percentile': 95,
        'ACT': 33,
        'extracurricular': 1
    })))
print(
    "Jeremy: " +
    str(regressor.predict({
        'percentile': 95,
        'ACT': 34,
        'extracurricular': 0
    })))
print(
    "Alphie: " +
    str(regressor.predict({
        'percentile': 92,
        'ACT': 35,
        'extracurricular': 1
Exemplo n.º 14
0
import sys

sys.path.append('src')
from logistic_regressor import LogisticRegressor
from dataframe import DataFrame

data = [[10, 0.05], [100, 0.35], [1000, 0.95]]

df = DataFrame.from_array(data, ['x', 'y'])

regressor = LogisticRegressor(df, 'y', 1)

print(regressor.coefficients)
print(regressor.predict({'x': 500}))
Exemplo n.º 15
0
sys.path.append('src')
from matrix import Matrix
from dataframe import DataFrame
from linear_regressor import LinearRegressor
from logistic_regressor import LogisticRegressor
#test 1
Test_1 = False
if Test_1 == True:
    df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]],
                              columns=['x', 'y'])

    log_reg = LogisticRegressor(df, dependent_variable='y')
    assert round(log_reg.predict({'x': 5}), 3) == 0.777