def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j)) for j in range(num_cols)]
    stdevs = [
        standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)
    ]
    return means, stdevs
Пример #2
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
Пример #3
0
    plot_cities()

    # try several different values for k
    for k in range(1,76):
        num_correct = 0

        for location, actual_language in cities:

            other_cities = [other_city 
                            for other_city in cities
                            if other_city != (location, actual_language)]

            predicted_language = knn_classify(k, other_cities, location)

            if predicted_language == actual_language: 
                num_correct += 1

        print k, "neighbor[s]:", num_correct, "correct out of", len(cities)

    dimensions = range(1, 101, 5)

    avg_distances = []
    min_distances = []

    random.seed(0)
    for dim in dimensions:
        distances = random_distances(dim, 10000)  # 10,000 random pairs
        avg_distances.append(mean(distances))     # track the average
        min_distances.append(min(distances))      # track the minimum
        print dim, min(distances), mean(distances), min(distances) / mean(distances)