def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]:
    """
    Given two vectors x and y,
    find the least-squares values of alpha and beta
    """
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]:
    """
    Na podstawie przekazanych wartości treningowych x i y
    znajdź za pomocą metody najmniejszych kwadratów optymalne wartości alpha i beta.
    """
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]:
    """
    Given two vectors x and y,
    find the least-squares values of alpha and beta
    """
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    """returns the means and standard deviations for each position"""
    dim = len(data[0])

    means = vector_mean(data)
    stdevs = [standard_deviation([vector[i] for vector in data])
              for i in range(dim)]

    return means, stdevs
def scale(data):
    dim = len(data[0])

    means = vector_mean(data)
    stdevs = [
        standard_deviation([vector[i] for vector in data]) for i in range(dim)
    ]

    return means, stdevs
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    """returns the means and standard deviations for each position"""
    dim = len(data[0])

    means = vector_mean(data)
    stdevs = [standard_deviation([vector[i] for vector in data])
              for i in range(dim)]

    return means, stdevs
def least_squares_fit(x, y):
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def least_squares_fit(x, y):
    """given training values for x and y, find the least-squares values of alpha and beta"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
print(stat.mean(num_friends))
print(stat.median(num_friends))

assert stat.quantile(num_friends, 0.10) == 1
assert stat.quantile(num_friends, 0.25) == 3
assert stat.quantile(num_friends, 0.75) == 9
assert stat.quantile(num_friends, 0.90) == 13

assert set(stat.mode(num_friends)) == {1, 6}

assert stat.data_range(num_friends) == 99

assert 81.54 < stat.variance(num_friends) < 81.55

assert 9.02 < stat.standard_deviation(num_friends) < 9.04

assert stat.interquartile_range(num_friends) == 6

assert 22.42 < stat.covariance(num_friends, daily_minutes) < 22.43
assert 22.42 / 60 < stat.covariance(num_friends, daily_hours) < 22.43 / 60

assert 0.24 < stat.correlation(num_friends, daily_minutes) < 0.25
assert 0.24 < stat.correlation(num_friends, daily_hours) < 0.25

outlier = num_friends.index(100)

num_friends_good = [x for i, x in enumerate(num_friends) if i != outlier]

daily_minutes_good = [x for i, x in enumerate(daily_minutes) if i != outlier]
Пример #10
0
def main():
    from scratch.statistics import daily_minutes_good
    from scratch.gradient_descent import gradient_step

    random.seed(0)
    # Użyłem metody prób i błędów, aby określić num_iters i step_size.
    # To może zająć chwilę.
    learning_rate = 0.001

    beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000,
                             25)
    assert 30.50 < beta[0] < 30.70  # stała
    assert 0.96 < beta[1] < 1.00  # liczba znajomych
    assert -1.89 < beta[2] < -1.85  # dzienna liczba godzin pracy
    assert 0.91 < beta[3] < 0.94  # czy ma doktorat

    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68

    from typing import Tuple

    import datetime

    def estimate_sample_beta(pairs: List[Tuple[Vector, float]]):
        x_sample = [x for x, _ in pairs]
        y_sample = [y for _, y in pairs]
        beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25)
        print("bootstrap sample", beta)
        return beta

    random.seed(0)  # Dzięki temu poleceniu uzyskasz takie same wyniki jak ja.

    # To może zająć chwilę czasu
    bootstrap_betas = bootstrap_statistic(
        list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print(bootstrap_standard_errors)

    # [1,272,    # stały czynnik,    błąd rzeczywisty = 1,19
    #  0,103,    # liczba znajomych, błąd rzeczywisty = 0,080
    #  0,155,    # bezrobotni,       błąd rzeczywisty = 0,127
    #  1,249]    # doktorat,         błąd rzeczywisty = 0,998

    random.seed(0)
    beta_0 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.0,  # alpha
        learning_rate,
        5000,
        25)
    # [30.51, 0.97, -1.85, 0.91]
    assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69

    beta_0_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.8, 0.95, -1.83, 0.54]
    assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good,
                                     beta_0_1) < 0.69

    beta_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.6, 0.90, -1.68, 0.10]
    assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69

    beta_10 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        10,  # alpha
        learning_rate,
        5000,
        25)
    # [28.3, 0.67, -0.90, -0.01]
    assert 1 < dot(beta_10[1:], beta_10[1:]) < 2
    assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6
Пример #11
0
# 101 wartości zbliżonych do 100.
close_to_100 = [99.5 + random.random() for _ in range(101)]

# 101 wartości, 50 z nich jest bliskich 0, a kolejne 50 bliskich 200.
far_from_100 = ([99.5 + random.random()] +
                [random.random() for _ in range(50)] +
                [200 + random.random() for _ in range(50)])

from scratch.statistics import median, standard_deviation

medians_close = bootstrap_statistic(close_to_100, median, 100)

medians_far = bootstrap_statistic(far_from_100, median, 100)

assert standard_deviation(medians_close) < 1
assert standard_deviation(medians_far) > 90

from scratch.probability import normal_cdf


def p_value(beta_hat_j: float, sigma_hat_j: float) -> float:
    if beta_hat_j > 0:
        # Jeżeli współczynnik ma wartość dodatnią, musimy obliczyć dwukrotność
        # prawdopodobieństwa spotkania „większej” wartości.
        return 2 * (1 - normal_cdf(beta_hat_j / sigma_hat_j))
    else:
        # W przeciwnym wypadku obliczamy dwukrotność prawdopodobieństwa spotkania „mniejszej” wartości.
        return 2 * normal_cdf(beta_hat_j / sigma_hat_j)

Пример #12
0
def main():
    from scratch.statistics import daily_minutes_good
    from scratch.gradient_descent import gradient_step

    random.seed(0)
    # I used trial and error to choose niters and step_size.
    # This will run for a while.
    learning_rate = 0.001

    beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000,
                             25)
    assert 30.50 < beta[0] < 30.70  # constant
    assert 0.96 < beta[1] < 1.00  # num friends
    assert -1.89 < beta[2] < -1.85  # work hours per day
    assert 0.91 < beta[3] < 0.94  # has PhD

    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68

    from typing import Tuple

    import datetime

    def estimate_sample_beta(pairs: List[Tuple[Vector, float]]):
        x_sample = [x for x, _ in pairs]
        y_sample = [y for _, y in pairs]
        beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25)
        print("bootstrap sample", beta)
        return beta

    random.seed(0)  # so that you get the same results as me

    # This will take a couple of minutes!
    bootstrap_betas = bootstrap_statistic(
        list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print(bootstrap_standard_errors)

    # [1.272,    # constant term, actual error = 1.19
    #  0.103,    # num_friends,   actual error = 0.080
    #  0.155,    # work_hours,    actual error = 0.127
    #  1.249]    # phd,           actual error = 0.998

    random.seed(0)
    beta_0 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.0,  # alpha
        learning_rate,
        5000,
        25)
    # [30.51, 0.97, -1.85, 0.91]
    assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69

    beta_0_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        0.1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.8, 0.95, -1.83, 0.54]
    assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good,
                                     beta_0_1) < 0.69

    beta_1 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        1,  # alpha
        learning_rate,
        5000,
        25)
    # [30.6, 0.90, -1.68, 0.10]
    assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
    assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69

    beta_10 = least_squares_fit_ridge(
        inputs,
        daily_minutes_good,
        10,  # alpha
        learning_rate,
        5000,
        25)
    # [28.3, 0.67, -0.90, -0.01]
    assert 1 < dot(beta_10[1:], beta_10[1:]) < 2
    assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6