def predict(x_i, beta): return dot(x_i, beta)
def ridge_penalty(beta, alpha): return alpha * dot(beta[1:], beta[1:])
print() random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print("bootstrap standard errors", bootstrap_standard_errors) print() print("p_value(30.63, 1.174)", p_value(30.63, 1.174)) print("p_value(0.972, 0.079)", p_value(0.972, 0.079)) print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131)) print("p_value(0.911, 0.990)", p_value(0.911, 0.990)) print() print("regularization") random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]: beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print("alpha", alpha) print("beta", beta) print("dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:])) print("r-squared", multiple_r_squared(x, daily_minutes_good, beta)) print()
print() random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)] print("bootstrap standard errors", bootstrap_standard_errors) print() print("p_value(30.63, 1.174)", p_value(30.63, 1.174)) print("p_value(0.972, 0.079)", p_value(0.972, 0.079)) print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131)) print("p_value(0.911, 0.990)", p_value(0.911, 0.990)) print() print("regularization") random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]: beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print("alpha", alpha) print("beta", beta) print("dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:])) print("r-squared", multiple_r_squared(x, daily_minutes_good, beta)) print()
def logistic_log_partial_ij(x_i, y_i, beta, j): """here i is the index of the data point, j the index of the derivative""" return (y_i - logistic(dot(x_i, beta))) * x_i[j]
def logistic_log_likelihood_i(x_i, y_i, beta): if y_i == 1: return np.log(logistic(dot(x_i, beta))) else: return np.log(1 - logistic(dot(x_i, beta)))
print("\nlogistic regression:") random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.1) beta_0 = [random.random() for _ in range(3)] beta_hat = maximize_stochastic(logistic_log_likelihood_i, logistic_log_gradient_i, x_train, y_train, beta_0) print("Beta Stochastic Gradient Value : ", beta_hat) true_positives = false_positives = true_negatives = false_negatives = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: # TP: paid and we predict paid true_positives += 1 elif y_i == 1: # FN: paid and we predict unpaid false_negatives += 1 elif predict >= 0.5: # FP: unpaid and we predict paid false_positives += 1 else: # TN: unpaid and we predict unpaid true_negatives += 1 precision = true_positives / (true_positives + false_positives) # Given a positive output from the LR, what is the probability that it is correct? recall = true_positives / (true_positives + false_negatives) # Given a positive sample, will LR correctly identify it as Positive. print("True Positive : ", true_positives, " False Positive : ", false_positives, " True Negative : ", true_negatives, " False Negative : ", false_negatives)