def generate_distribution_plot(clusters, output_path): pyplot.clf() for centroid, points in clusters.iteritems(): distances = sorted( [KMeans.calculate_distance(centroid, p) for p in points]) pdf = stats.norm.pdf(distances, np.mean(distances), np.std(distances)) pl.plot(distances, pdf) pl.savefig(output_path + 'distribution.png')
def test_calculate_distance(self): a = (3, 5, 8, 15) b = (2, 3, 4, 5) distance = KMeans.calculate_distance(a, b) self.assertEqual(distance, 11)