def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]: """ Given two vectors x and y, find the least-squares values of alpha and beta """ beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]: """ Na podstawie przekazanych wartości treningowych x i y znajdź za pomocą metody najmniejszych kwadratów optymalne wartości alpha i beta. """ beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def least_squares_fit(x: Vector, y: Vector) -> Tuple[float, float]: """ Given two vectors x and y, find the least-squares values of alpha and beta """ beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def scale(data: List[Vector]) -> Tuple[Vector, Vector]: """returns the means and standard deviations for each position""" dim = len(data[0]) means = vector_mean(data) stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(dim)] return means, stdevs
def scale(data): dim = len(data[0]) means = vector_mean(data) stdevs = [ standard_deviation([vector[i] for vector in data]) for i in range(dim) ] return means, stdevs
def scale(data: List[Vector]) -> Tuple[Vector, Vector]: """returns the means and standard deviations for each position""" dim = len(data[0]) means = vector_mean(data) stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(dim)] return means, stdevs
def least_squares_fit(x, y): beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
print(stat.mean(num_friends)) print(stat.median(num_friends)) assert stat.quantile(num_friends, 0.10) == 1 assert stat.quantile(num_friends, 0.25) == 3 assert stat.quantile(num_friends, 0.75) == 9 assert stat.quantile(num_friends, 0.90) == 13 assert set(stat.mode(num_friends)) == {1, 6} assert stat.data_range(num_friends) == 99 assert 81.54 < stat.variance(num_friends) < 81.55 assert 9.02 < stat.standard_deviation(num_friends) < 9.04 assert stat.interquartile_range(num_friends) == 6 assert 22.42 < stat.covariance(num_friends, daily_minutes) < 22.43 assert 22.42 / 60 < stat.covariance(num_friends, daily_hours) < 22.43 / 60 assert 0.24 < stat.correlation(num_friends, daily_minutes) < 0.25 assert 0.24 < stat.correlation(num_friends, daily_hours) < 0.25 outlier = num_friends.index(100) num_friends_good = [x for i, x in enumerate(num_friends) if i != outlier] daily_minutes_good = [x for i, x in enumerate(daily_minutes) if i != outlier]
def main(): from scratch.statistics import daily_minutes_good from scratch.gradient_descent import gradient_step random.seed(0) # Użyłem metody prób i błędów, aby określić num_iters i step_size. # To może zająć chwilę. learning_rate = 0.001 beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000, 25) assert 30.50 < beta[0] < 30.70 # stała assert 0.96 < beta[1] < 1.00 # liczba znajomych assert -1.89 < beta[2] < -1.85 # dzienna liczba godzin pracy assert 0.91 < beta[3] < 0.94 # czy ma doktorat assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68 from typing import Tuple import datetime def estimate_sample_beta(pairs: List[Tuple[Vector, float]]): x_sample = [x for x, _ in pairs] y_sample = [y for _, y in pairs] beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25) print("bootstrap sample", beta) return beta random.seed(0) # Dzięki temu poleceniu uzyskasz takie same wyniki jak ja. # To może zająć chwilę czasu bootstrap_betas = bootstrap_statistic( list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print(bootstrap_standard_errors) # [1,272, # stały czynnik, błąd rzeczywisty = 1,19 # 0,103, # liczba znajomych, błąd rzeczywisty = 0,080 # 0,155, # bezrobotni, błąd rzeczywisty = 0,127 # 1,249] # doktorat, błąd rzeczywisty = 0,998 random.seed(0) beta_0 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.0, # alpha learning_rate, 5000, 25) # [30.51, 0.97, -1.85, 0.91] assert 5 < dot(beta_0[1:], beta_0[1:]) < 6 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69 beta_0_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.1, # alpha learning_rate, 5000, 25) # [30.8, 0.95, -1.83, 0.54] assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69 beta_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 1, # alpha learning_rate, 5000, 25) # [30.6, 0.90, -1.68, 0.10] assert 3 < dot(beta_1[1:], beta_1[1:]) < 4 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69 beta_10 = least_squares_fit_ridge( inputs, daily_minutes_good, 10, # alpha learning_rate, 5000, 25) # [28.3, 0.67, -0.90, -0.01] assert 1 < dot(beta_10[1:], beta_10[1:]) < 2 assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6
# 101 wartości zbliżonych do 100. close_to_100 = [99.5 + random.random() for _ in range(101)] # 101 wartości, 50 z nich jest bliskich 0, a kolejne 50 bliskich 200. far_from_100 = ([99.5 + random.random()] + [random.random() for _ in range(50)] + [200 + random.random() for _ in range(50)]) from scratch.statistics import median, standard_deviation medians_close = bootstrap_statistic(close_to_100, median, 100) medians_far = bootstrap_statistic(far_from_100, median, 100) assert standard_deviation(medians_close) < 1 assert standard_deviation(medians_far) > 90 from scratch.probability import normal_cdf def p_value(beta_hat_j: float, sigma_hat_j: float) -> float: if beta_hat_j > 0: # Jeżeli współczynnik ma wartość dodatnią, musimy obliczyć dwukrotność # prawdopodobieństwa spotkania „większej” wartości. return 2 * (1 - normal_cdf(beta_hat_j / sigma_hat_j)) else: # W przeciwnym wypadku obliczamy dwukrotność prawdopodobieństwa spotkania „mniejszej” wartości. return 2 * normal_cdf(beta_hat_j / sigma_hat_j)
def main(): from scratch.statistics import daily_minutes_good from scratch.gradient_descent import gradient_step random.seed(0) # I used trial and error to choose niters and step_size. # This will run for a while. learning_rate = 0.001 beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000, 25) assert 30.50 < beta[0] < 30.70 # constant assert 0.96 < beta[1] < 1.00 # num friends assert -1.89 < beta[2] < -1.85 # work hours per day assert 0.91 < beta[3] < 0.94 # has PhD assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68 from typing import Tuple import datetime def estimate_sample_beta(pairs: List[Tuple[Vector, float]]): x_sample = [x for x, _ in pairs] y_sample = [y for _, y in pairs] beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25) print("bootstrap sample", beta) return beta random.seed(0) # so that you get the same results as me # This will take a couple of minutes! bootstrap_betas = bootstrap_statistic( list(zip(inputs, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print(bootstrap_standard_errors) # [1.272, # constant term, actual error = 1.19 # 0.103, # num_friends, actual error = 0.080 # 0.155, # work_hours, actual error = 0.127 # 1.249] # phd, actual error = 0.998 random.seed(0) beta_0 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.0, # alpha learning_rate, 5000, 25) # [30.51, 0.97, -1.85, 0.91] assert 5 < dot(beta_0[1:], beta_0[1:]) < 6 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69 beta_0_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 0.1, # alpha learning_rate, 5000, 25) # [30.8, 0.95, -1.83, 0.54] assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69 beta_1 = least_squares_fit_ridge( inputs, daily_minutes_good, 1, # alpha learning_rate, 5000, 25) # [30.6, 0.90, -1.68, 0.10] assert 3 < dot(beta_1[1:], beta_1[1:]) < 4 assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69 beta_10 = least_squares_fit_ridge( inputs, daily_minutes_good, 10, # alpha learning_rate, 5000, 25) # [28.3, 0.67, -0.90, -0.01] assert 1 < dot(beta_10[1:], beta_10[1:]) < 2 assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6