for cluster in cluster_list:
        ans += cluster.cluster_error(datatable)
    return ans

data_table1 = load_data_table(DATA_896_URL)
data_table2 = load_data_table(DATA_896_URL)
    
singleton_list1 = []
singleton_list2 = []
for line in data_table1:
    singleton_list1.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

for line in data_table2:
    singleton_list2.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

data1 = []
data2 = []
cluster_list1 = cl.hierarchical_clustering(singleton_list1, 21)
    
for i in range(20, 5, -1):
    new_pair = cl.fast_closest_pair(cluster_list1)
    cluster_list1[new_pair[1]].merge_clusters(cluster_list1[new_pair[2]])
    cluster_list1.remove(cluster_list1[new_pair[2]])     
    
    cluster_list2 = cl.kmeans_clustering(singleton_list2, i, 1)
    data1.append((i, compute_distortion(cluster_list1, data_table1)))
    data2.append((i, compute_distortion(cluster_list2, data_table2)))
        
simpleplot.plot_lines('Quality Analysis - 896', 800, 600, 'num of clusters', 'Total Error', [data1, data2], 
                  False, ['hierarchical_clustering', 'kmeans_clustering'])
import simpleplot
import time
import codeskulptor

codeskulptor.set_timeout(100)

#########################################
def gen_random_clusters(num_clusters):
    """
    Generate random cluster
    """
    ans = []
    for _ in range(num_clusters):
        ans.append(alg_cluster.Cluster(set([]), random.randrange(-1, 1),
                               random.randrange(-1, 1), 0, 0))
    return ans

data1 = []
data2 = []
for i in range(2, 201):
    cluster_set = gen_random_clusters(i)
    time0 = time.time()
    cl.slow_closest_pair(cluster_set)
    time1 = time.time()
    cl.fast_closest_pair(cluster_set)
    time2 = time.time()
    data1.append((i, time1 - time0))
    data2.append((i, time2 - time1))

simpleplot.plot_lines('Running Time Analysis', 400, 300, 'num of clusters', 'time usage', [data1, data2], 
                      False, ['slow_closest_pair', 'fast_closest_pair'])