def main(): df, X, y = preprocess_data() X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8) logistic_regressor = LogisticRegressor(alpha=0.05, c=0.01, T=1000, random_seed=0, intercept=True) losses = logistic_regressor.fit(X_train, y_train) plot_losses(losses=losses, savefig=True) train_error = error_rate(y_train, logistic_regressor.predict(X_train)) test_error = error_rate(y_test, logistic_regressor.predict(X_test)) print('Training Error Rate: %f' % train_error) print('Test Error Rate: %f' % test_error)
df2 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.01, 0.99))) regressor2 = LogisticRegressor(df2, 'y', 1) df3 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.001, 0.999))) regressor3 = LogisticRegressor(df3, 'y', 1) df4 = df.apply('y', (lambda x: change_1s_0s_to(x, 0.0001, 0.9999))) regressor4 = LogisticRegressor(df4, 'y', 1) plt.clf() plt.style.use('bmh') plt.plot([point[0] for point in points], [point[1] for point in points]) plt.plot([x / 1000 for x in range(5001)], [regressor1.predict({'x': x / 1000}) for x in range(5001)], label='0.1') plt.plot([x / 1000 for x in range(5001)], [regressor2.predict({'x': x / 1000}) for x in range(5001)], label='0.01') plt.plot([x / 1000 for x in range(5001)], [regressor3.predict({'x': x / 1000}) for x in range(5001)], label='0.001') plt.plot([x / 1000 for x in range(5001)], [regressor4.predict({'x': x / 1000}) for x in range(5001)], label='0.0001') plt.legend()
# if pair[1] == 0: # new_list.append([pair[0],delta]) # else: # new_list.append([pair[0],1-delta]) df = DataFrame.from_array(list_data, columns=['x', 'y']) regressor = LogisticRegressor(df, prediction_column='y', max_value=1, delta=delta_low) coords = [[], []] for x in range(20): coords[0].append(x / 100) coords[1].append(regressor.predict({'constant': 1, 'x': x})) all_coords.append(coords) print(all_coords) plt.style.use('bmh') for coords in all_coords: plt.plot(coords[0], coords[1], linewidth=2.5) plt.legend(['0.1', '0.01', '0.001', '0.0001']) plt.savefig('logistic_regressor_109.png') # dfgd = DataFrame.from_array( # [[1,0], # [2,0], # [3,0], # [2,1], # [3,1], # [4,1]],
import sys sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor df = DataFrame.from_array( [[0, 0, 1, 0], [1, 0, 2, 0], [2, 0, 4, 0], [4, 0, 8, 0], [6, 0, 9, 0], [0, 2, 2, 0], [0, 4, 5, 0], [0, 6, 7, 0], [0, 8, 6, 0], [2, 2, 0.1, 4], [3, 4, 0.1, 12]], columns = ['beef', 'pb', 'rating', 'interactive'] ) log_reg = LogisticRegressor(df,10, dependent_variable = 'rating') print(log_reg.predict({'beef': 5, 'pb': 0 , 'interactive':0})) print(log_reg.predict({'beef': 12, 'pb': 0 , 'interactive':0})) print(log_reg.predict({'beef': 5, 'pb': 5 , 'interactive':25}))
[0, 0, ['mayo', 'jelly'], 0], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0], [0, 5, [], 5], [0, 5, ['mayo'], 0], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0], [5, 5, [], 0], [5, 5, ['mayo'], 0], [5, 5, ['jelly'], 0], [5, 5, ['mayo', 'jelly'], 0]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly') log_df = DataFrame(df.data_dict, df.columns) logistic_regressor = LogisticRegressor(log_df, 10, dependent_variable='rating') # test 8 slices of beef + mayo observation = {'beef': 8, 'mayo': 1} assert round(logistic_regressor.predict(observation), 2) == 9.72 # test 4 tbsp of pb + 8 slices of beef + mayo observation = {'beef': 8, 'pb': 4, 'mayo': 1} assert round(logistic_regressor.predict(observation), 2) == 0.77 # test 8 slices of beef + mayo + jelly observation = {'beef': 8, 'mayo': 1, 'jelly': 1} assert round(logistic_regressor.predict(observation), 2) == 0.79
'constant': [1 for _ in range(len(data_dict['rating']))], 'rating': data_dict['rating'] }) df = df.apply('rating', lambda x: 0.1 if x == 0 else x) regressor = LogisticRegressor(df, prediction_column='rating', max_val=10) assert regressor.multipliers == [ -0.039, -0.0205, 1.7483, -0.3978, 0.1497, -0.7485, 0.4682, 0.3296, -0.5288, 2.6441, 1.0125 ], 'Wong multipliers' assert regressor.predict({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, }) == 0.02342, 'Nah bruh' assert regressor.predict({ 'beef': 0, 'pb': 3, 'mayo': 0, 'jelly': 1, }) == 7.37536 assert regressor.predict({ 'beef': 1, 'pb': 1, 'mayo': 1, 'jelly': 0, }) == 0.80757, 'Nah'
print 'Theta found by fmin_bfgs: ',theta_opt log_reg1.theta = theta_opt print "Final loss = ", log_reg1.loss(theta_opt,XX,y) # make a prediction on a student with exam 1 score of 45 and exam2 score of 85 # TODO: calculate the probability of a student being admitted with score of 45,85 # replace pred_prob = 0 with pred_prob = expression for that probability pred_prob = theta_opt.dot(np.array([1, 45, 85])) print "For a student with 45 on exam 1 and 85 on exam 2, the probability of admission = ", pred_prob # compute accuracy on the training set predy = log_reg1.predict(XX) # TODO: calculate the accuracy of predictions on training set (hint: compare predy and y) accuracy = 1. * sum([predy[i] == y[i] for i in xrange(len(y))]) / len(y) print "Accuracy on the training set = ", accuracy # plot the decision surface plot_utils.plot_decision_boundary(X,y,theta_opt,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted']) plt.savefig('fig2.pdf') # Compare with sklearn logistic regression # note the parameters fed into the LogisticRegression call from sklearn import linear_model
import sys sys.path.append('src') from logistic_regressor import LogisticRegressor from matrix import Matrix from dataframe import DataFrame df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y']) log_reg = LogisticRegressor(df, 'y', 1) print('Testing method predict...') assert round(log_reg.predict({'x': 5}), 3) == 0.777 print('PASSED') df = DataFrame.from_array( [[0, 0, [], 1], [0, 0, ['mayo'], 1], [0, 0, ['jelly'], 4], [0, 0, ['mayo', 'jelly'], 0.1], [5, 0, [], 4], [5, 0, ['mayo'], 8], [5, 0, ['jelly'], 1], [5, 0, ['mayo', 'jelly'], 0.1], [0, 5, [], 5], [0, 5, ['mayo'], 0.1], [0, 5, ['jelly'], 9], [0, 5, ['mayo', 'jelly'], 0.1], [5, 5, [], 0.1], [5, 5, ['mayo'], 0.1], [5, 5, ['jelly'], 0.1], [5, 5, ['mayo', 'jelly'], 0.1]], columns=['beef', 'pb', 'condiments', 'rating']) df = df.create_dummy_variables('condiments') df = df.create_interaction_terms('beef', 'pb') df = df.create_interaction_terms('beef', 'mayo') df = df.create_interaction_terms('beef', 'jelly') df = df.create_interaction_terms('pb', 'mayo') df = df.create_interaction_terms('pb', 'jelly') df = df.create_interaction_terms('mayo', 'jelly')
'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0 })) print( 'Logistic', logistic_regressor.predict({ 'beef': 8, 'pb': 0, 'mayo': 1, 'jelly': 0, 'beef * pb': 0, 'beef * mayo': 8, 'beef * jelly': 0, 'pb * mayo': 0, 'pb * jelly': 0, 'mayo * jelly': 0 })) print('4 tbsp of pb + jelly') print( 'Linear', linear_regressor.predict({ 'beef': 0, 'pb': 4, 'mayo': 0, 'jelly': 1, 'beef * pb': 0,
theta_opt = log_reg1.train(XX,y,num_iters=400) # print the theta found print 'Theta found by fmin_bfgs: ',theta_opt log_reg1.theta = theta_opt print "Final loss = ", log_reg1.loss(theta_opt,XX,y) # make a prediction on a student with exam 1 score of 45 and exam2 score of 85 # TODO: calculate the probability of a student being admitted with score of 45,85 # replace pred_prob = 0 with pred_prob = expression for that probability pred_prob = log_reg1.predict(np.asarray([1, 45, 85])) print "For a student with 45 on exam 1 and 85 on exam 2, the probability of admission = ", pred_prob # compute accuracy on the training set predy = log_reg1.predict(XX) # TODO: calculate the accuracy of predictions on training set (hint: compare predy and y) predy = np.around(predy) accuracy = 1 - float(np.count_nonzero(y-predy)) / y.shape[0] print "Accuracy on the training set = ", accuracy # plot the decision surface plot_utils.plot_decision_boundary(X,y,theta_opt,'Exam 1 score', 'Exam 2 score',['Not Admitted','Admitted'])
# 'mayo': 1.74825378, # 'jelly': -0.39777219, # 'beef_pb': 0.14970983, # 'beef_mayo': -0.74854916, # 'beef_jelly': 0.46821312, # 'pb_mayo': 0.32958369, # 'pb_jelly': -0.5288267, # 'mayo_jelly': 2.64413352, # 'constant': 1.01248436 # }, 'Incorrect multipliers an is instead:'+str(regressor.multipliers) # print(" passed") print("\n Testing prediction #1") assert regressor.predict({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, }) == 0.023417480134512895, "Incorrect prediction #1, is instead " + str( regressor.predict({ 'beef': 5, 'pb': 5, 'mayo': 1, 'jelly': 1, })) print(" passed") print("\n Testing prediction #2") assert regressor.predict({ 'beef': 0, 'pb': 3, 'mayo': 0,
reg = LogisticRegressor(df, dependent_variable='y', upper_bound=1) reg.set_coefficients({'constant': 0.5, 'x': 0.5}) alpha = 0.01 delta = 0.01 num_steps = 20000 reg.gradient_descent(alpha, delta, num_steps) print("\nreg.coefficients:", reg.coefficients) # should be {'constant': 2.7911, 'x': -1.1165} x = [pair[0] for pair in arr] y = [pair[1] for pair in arr] lots_of_xs = [x / 100 for x in range(100, 401)] prediction = [reg.predict({'x': x}) for x in lots_of_xs] plt.scatter(x, y, label="Actual", color="red") plt.plot(lots_of_xs, prediction, label='Gradient descent') plt.legend(loc='best') plt.savefig('logistic_regressor_gradient_descent.png') """ x_points = [pair[0] for pair in arr] y_points = [pair[1] for pair in arr] plt.scatter(x_points, y_points, label="Actual", color="red") def num_into_approximation(this_df, this_dv, zero_val): new_data_dict = {}
df = df.append_columns({ 'constant': [1 for _ in range(len(data_dict['percentile']))], 'acceptance': [0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001, 0.999, 0.001] }) # print(df.ordered_dict) df = df.apply('acceptance', lambda x: 0.1 if x == 0 else x) # print(df.ordered_dict) regressor = LogisticRegressor(df, prediction_column='acceptance', max_value=1) print(regressor.coefficients) print( "Martha: " + str(regressor.predict({ 'percentile': 95, 'ACT': 33, 'extracurricular': 1 }))) print( "Jeremy: " + str(regressor.predict({ 'percentile': 95, 'ACT': 34, 'extracurricular': 0 }))) print( "Alphie: " + str(regressor.predict({ 'percentile': 92, 'ACT': 35, 'extracurricular': 1
import sys sys.path.append('src') from logistic_regressor import LogisticRegressor from dataframe import DataFrame data = [[10, 0.05], [100, 0.35], [1000, 0.95]] df = DataFrame.from_array(data, ['x', 'y']) regressor = LogisticRegressor(df, 'y', 1) print(regressor.coefficients) print(regressor.predict({'x': 500}))
sys.path.append('src') from matrix import Matrix from dataframe import DataFrame from linear_regressor import LinearRegressor from logistic_regressor import LogisticRegressor #test 1 Test_1 = False if Test_1 == True: df = DataFrame.from_array([[1, 0.2], [2, 0.25], [3, 0.5]], columns=['x', 'y']) log_reg = LogisticRegressor(df, dependent_variable='y') assert round(log_reg.predict({'x': 5}), 3) == 0.777