Exemplo n.º 1
0
def predict(x_i, beta):
    return dot(x_i, beta)
Exemplo n.º 2
0
def ridge_penalty(beta, alpha):
    return alpha * dot(beta[1:], beta[1:])
Exemplo n.º 3
0
def predict(x_i, beta):
    return dot(x_i, beta)
Exemplo n.º 4
0
    print()

    random.seed(0)  # so that you get the same results as me

    bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)),
                                          estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print("bootstrap standard errors", bootstrap_standard_errors)
    print()

    print("p_value(30.63, 1.174)", p_value(30.63, 1.174))
    print("p_value(0.972, 0.079)", p_value(0.972, 0.079))
    print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131))
    print("p_value(0.911, 0.990)", p_value(0.911, 0.990))
    print()

    print("regularization")

    random.seed(0)
    for alpha in [0.0, 0.01, 0.1, 1, 10]:
        beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha)
        print("alpha", alpha)
        print("beta", beta)
        print("dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:]))
        print("r-squared", multiple_r_squared(x, daily_minutes_good, beta))
        print()
Exemplo n.º 5
0
def ridge_penalty(beta, alpha):
  return alpha * dot(beta[1:], beta[1:])
Exemplo n.º 6
0
    print()

    random.seed(0) # so that you get the same results as me

    bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)),
                                          estimate_sample_beta,
                                          100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)]

    print("bootstrap standard errors", bootstrap_standard_errors)
    print()

    print("p_value(30.63, 1.174)", p_value(30.63, 1.174))
    print("p_value(0.972, 0.079)", p_value(0.972, 0.079))
    print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131))
    print("p_value(0.911, 0.990)", p_value(0.911, 0.990))
    print()

    print("regularization")

    random.seed(0)
    for alpha in [0.0, 0.01, 0.1, 1, 10]:
        beta = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha)
        print("alpha", alpha)
        print("beta", beta)
        print("dot(beta[1:],beta[1:])", dot(beta[1:], beta[1:]))
        print("r-squared", multiple_r_squared(x, daily_minutes_good, beta))
        print()
Exemplo n.º 7
0
def logistic_log_partial_ij(x_i, y_i, beta, j):
    """here i is the index of the data point,
    j the index of the derivative"""

    return (y_i - logistic(dot(x_i, beta))) * x_i[j]
Exemplo n.º 8
0
def logistic_log_likelihood_i(x_i, y_i, beta):
    if y_i == 1:
        return np.log(logistic(dot(x_i, beta)))
    else:
        return np.log(1 - logistic(dot(x_i, beta)))
Exemplo n.º 9
0
    print("\nlogistic regression:")
    random.seed(0)
    x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.1)

    beta_0 = [random.random() for _ in range(3)]
    beta_hat = maximize_stochastic(logistic_log_likelihood_i,
                                   logistic_log_gradient_i,
                                   x_train, y_train, beta_0)

    print("Beta Stochastic Gradient Value : ", beta_hat)

    true_positives = false_positives = true_negatives = false_negatives = 0

    for x_i, y_i in zip(x_test, y_test):
        predict = logistic(dot(beta_hat, x_i))

        if y_i == 1 and predict >= 0.5:  # TP: paid and we predict paid
            true_positives += 1
        elif y_i == 1:                   # FN: paid and we predict unpaid
            false_negatives += 1
        elif predict >= 0.5:             # FP: unpaid and we predict paid
            false_positives += 1
        else:                            # TN: unpaid and we predict unpaid
            true_negatives += 1

    precision = true_positives / (true_positives + false_positives) # Given a positive output from the LR, what is the probability that it is correct?
    recall = true_positives / (true_positives + false_negatives) # Given a positive sample, will LR correctly identify it as Positive.

    print("True Positive : ", true_positives, " False Positive : ", false_positives, " True Negative : ", true_negatives, " False Negative : ", false_negatives)