def scale(data_matrix): # data matrix e.g. [[63,67,70], [160,170.2,177.8], [150,160,171]] """returns the means and standard deviations of each column""" num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)] return means, stdevs
One way to see this is by randomly generating pairs of points in the d-dimensional "unit cube" in a variety of dimensions, and calculating the distances between them. """ import sys sys.path.insert(0, "../statistics") from central_tendencies import mean def random_point(dim): return [random.random() for _ in range(dim)] def random_distances(dim, num_pairs): return [distance(random_point(dim), random_point(dim)) for _ in range(num_pairs)] dimensions = range(1, 101) avg_distances = [] min_distances = [] random.seed(0) for dim in dimensions: distances = random_distances(dim, 10000) # 10,000 random pairs avg_distances.append(mean(distances)) # track the average min_distances.append(min(distances)) # track the minimum # ratio between the closest distance and the average distance min_avg_ratio = [min_dist / avg_dist for min_dist, avg_dist in zip(min_distances, avg_distances)]
def mean_deviations(nums): nums_mean = mean(nums) return [num - nums_mean for num in nums]
def de_mean(x): """translate x by subtracting its mean (so the result has mean 0)""" x_bar = mean(x) return [x_i - x_bar for x_i in x]
from central_tendencies import mean def random_point(dim): return [random.random() for _ in range(dim)] def random_distances(dim, num_pairs): return [ distance(random_point(dim), random_point(dim)) for _ in range(num_pairs) ] dimensions = range(1, 101) avg_distances = [] min_distances = [] random.seed(0) for dim in dimensions: distances = random_distances(dim, 10000) # 10,000 random pairs avg_distances.append(mean(distances)) # track the average min_distances.append(min(distances)) # track the minimum # ratio between the closest distance and the average distance min_avg_ratio = [ min_dist / avg_dist for min_dist, avg_dist in zip(min_distances, avg_distances) ]
def squared_mean_deviations(a_vector): list_mean = mean(a_vector) return map(lambda num: (num - list_mean)**2, a_vector)
def least_squares_fit(xs, ys): beta = correlation(xs, ys) * standard_deviation(ys) / standard_deviation(xs) alpha = mean(ys) - beta * mean(xs) return alpha, beta