Exemplo n.º 1
0
def assignment_q3():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters
    """
    data_table = load_data_table(DATA_3108_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    cluster_list = project.kmeans_clustering(singleton_list, 15, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Exemplo n.º 2
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    # cluster_list = sequential_clustering(singleton_list, 50)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = project.hierarchical_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = project.kmeans_clustering(singleton_list, 15, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Exemplo n.º 3
0
def test_kmeans():
    """
    Test for k-means clustering
    kmeans_clustering should not mutate cluster_list, but make a new copy of each test anyways
    """

    # load small data table
    print
    print "Testing kmeans_clustering on 24 county set"
    data_24_table = load_data_table(DATA_24_URL)

    kmeansdata_24 = [[15, 1, set(
        [('34017', '36061'), ('06037',), ('06059',), ('36047',), ('36081',), ('06071', '08031'), ('36059',), ('36005',),
         ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',), ('41051', '41067'),
         ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [15, 3, set(
                         [('34017', '36061'), ('06037', '06059'), ('06071',), ('36047',), ('36081',), ('08031',),
                          ('36059',), ('36005',), ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',),
                          ('41051', '41067'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [15, 5, set(
                         [('34017', '36061'), ('06037', '06059'), ('06071',), ('36047',), ('36081',), ('08031',),
                          ('36059',), ('36005',), ('55079',), ('34013', '34039'), ('06075',), ('01073',), ('06029',),
                          ('41051', '41067'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [10, 1, set([('34017', '36061'), ('06029', '06037', '06075'),
                                  ('11001', '24510', '34013', '34039', '51013', '51760', '51840', '54009'), ('06059',),
                                  ('36047',), ('36081',), ('06071', '08031', '41051', '41067'), ('36059',), ('36005',),
                                  ('01073', '55079')])],
                     [10, 3, set([('34013', '34017', '36061'), ('06029', '06037', '06075'), ('08031', '41051', '41067'),
                                  ('06059', '06071'), ('34039', '36047'), ('36081',), ('36059',), ('36005',),
                                  ('01073', '55079'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [10, 5, set([('34013', '34017', '36061'), ('06029', '06037', '06075'), ('08031', '41051', '41067'),
                                  ('06059', '06071'), ('34039', '36047'), ('36081',), ('36059',), ('36005',),
                                  ('01073', '55079'), ('11001', '24510', '51013', '51760', '51840', '54009')])],
                     [5, 1, set([('06029', '06037', '06075'), (
                     '01073', '11001', '24510', '34013', '34017', '34039', '36047', '51013', '51760', '51840', '54009',
                     '55079'), ('06059',), ('36005', '36059', '36061', '36081'),
                                 ('06071', '08031', '41051', '41067')])],
                     [5, 3, set([('06029', '06037', '06075'), (
                     '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013'),
                                 ('08031', '41051', '41067'), ('06059', '06071'),
                                 ('01073', '51760', '51840', '54009', '55079')])],
                     [5, 5, set([('06029', '06037', '06075'), ('08031', '41051', '41067'), ('06059', '06071'),
                                 ('01073', '55079'), (
                         '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081',
                         '51013', '51760', '51840', '54009')])]]

    suite = simpletest.TestSuite()

    for num_clusters, num_iterations, expected_county_tuple in kmeansdata_24:

        # build initial list of clusters for each test since mutation is allowed
        cluster_list = []
        for idx in range(len(data_24_table)):
            line = data_24_table[idx]
            cluster_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

        # compute student answer
        student_clustering = student.kmeans_clustering(cluster_list, num_clusters, num_iterations)
        student_county_tuple = set_of_county_tuples(student_clustering)

        # Prepare test
        error_message = "Testing kmeans_custering on 24 county table, num_clusters = " + str(num_clusters)
        error_message += " num_iterations = " + str(num_iterations)
        error_message += "\nStudent county tuples:\t" + str(student_county_tuple)
        error_message += "\nExpected county tuples:\t" + str(expected_county_tuple)
        suite.run_test(student_county_tuple == expected_county_tuple, True, error_message)

    suite.report_results()