forked from mgbarsky/clustering_lab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hierarchical_cluster.py
39 lines (31 loc) · 1.65 KB
/
hierarchical_cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import clusters, data_processing
'''Import Dataset'''
data = data_processing.open_csv_file('dataset.csv')
'''Create a list of countries in the order of the similarity matrix'''
countries_list = data_processing.get_country_names(data)
'''Create numerical attributes matrix'''
attr_matrix = data_processing.create_attribute_matrix(data)
data_processing.str_to_float(attr_matrix)
'''hierachical clustering: euclidean distance'''
num_cluster=3
resulting_clusters = clusters.hcluster(attr_matrix,distance=clusters.euclidean)
print ('clusters by euclidean distance')
clusters.printhclust(resulting_clusters,labels=countries_list)
clusters.drawdendrogram(resulting_clusters,countries_list,jpeg='Euclidean Cluster.jpg')
'''hierachical clustering: tanimoto coefficient'''
resulting_clusters = clusters.hcluster(attr_matrix,distance=clusters.tanimoto)
print ('clusters by tanimoto coefficient')
clusters.printhclust(resulting_clusters,labels=countries_list)
clusters.drawdendrogram(resulting_clusters,countries_list,jpeg='Tanimoto Cluster.jpg')
print()
'''hierachical clustering: pearson similarity'''
resulting_clusters = clusters.hcluster(attr_matrix,distance=clusters.pearson)
print ('clusters by pearson correlation')
clusters.printhclust(resulting_clusters,labels=countries_list)
clusters.drawdendrogram(resulting_clusters,countries_list,jpeg='Pearson Cluster.jpg')
print()
'''hierachical clustering: cosine similarity'''
resulting_clusters = clusters.hcluster(attr_matrix,distance=clusters.cosine)
print ('clusters by cosine similarity')
clusters.printhclust(resulting_clusters,labels=countries_list)
clusters.drawdendrogram(resulting_clusters,countries_list,jpeg='Cosine Cluster.jpg')