Пример #1
0
def scale(data_matrix):
    # data matrix e.g. [[63,67,70], [160,170.2,177.8], [150,160,171]]
    
    """returns the means and standard deviations of each column"""
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix, j))
              for j in range(num_cols)]
    return means, stdevs
One way to see this is by randomly generating pairs of points in the d-dimensional 
"unit cube" in a variety of dimensions, and calculating the distances between 
them.
"""
import sys
sys.path.insert(0, "../statistics")
from central_tendencies import mean

def random_point(dim):
    return [random.random() for _ in range(dim)]

def random_distances(dim, num_pairs):
    return [distance(random_point(dim), random_point(dim))
            for _ in range(num_pairs)]

dimensions = range(1, 101)

avg_distances = []
min_distances = []

random.seed(0)
for dim in dimensions:
    distances = random_distances(dim, 10000)        # 10,000 random pairs
    avg_distances.append(mean(distances))           # track the average
    min_distances.append(min(distances))            # track the minimum
    
# ratio between the closest distance and the average distance
min_avg_ratio = [min_dist / avg_dist
                 for min_dist, avg_dist in zip(min_distances, avg_distances)]

def mean_deviations(nums):
    nums_mean = mean(nums)
    return [num - nums_mean for num in nums]
def de_mean(x):
    """translate x by subtracting its mean (so the result has mean 0)"""
    x_bar = mean(x)
    return [x_i - x_bar for x_i in x]
def de_mean(x):
    """translate x by subtracting its mean (so the result has mean 0)"""
    x_bar = mean(x)
    return [x_i - x_bar for x_i in x]
from central_tendencies import mean


def random_point(dim):
    return [random.random() for _ in range(dim)]


def random_distances(dim, num_pairs):
    return [
        distance(random_point(dim), random_point(dim))
        for _ in range(num_pairs)
    ]


dimensions = range(1, 101)

avg_distances = []
min_distances = []

random.seed(0)
for dim in dimensions:
    distances = random_distances(dim, 10000)  # 10,000 random pairs
    avg_distances.append(mean(distances))  # track the average
    min_distances.append(min(distances))  # track the minimum

# ratio between the closest distance and the average distance
min_avg_ratio = [
    min_dist / avg_dist
    for min_dist, avg_dist in zip(min_distances, avg_distances)
]
def squared_mean_deviations(a_vector):
    list_mean = mean(a_vector)
    return map(lambda num: (num - list_mean)**2, a_vector)
def least_squares_fit(xs, ys):
    beta = correlation(xs,
                       ys) * standard_deviation(ys) / standard_deviation(xs)
    alpha = mean(ys) - beta * mean(xs)
    return alpha, beta