def kmedoid_clustering(g, r, measure, num_cluster): all_rdds_df = pd.DataFrame() for target_one in g: rdd_list = [] for target_two in g: rdd_list.append( RDD.realworld_distance_compare(g, target_one, target_two, measure, r)) all_rdds_df[target_one] = rdd_list data = all_rdds_df np_of_rdds = np.array(data) kmedoids = KMedoids(n_clusters=num_cluster, random_state=0).fit(np_of_rdds) cluster_data = kmedoids.labels_ node_list = [] degree_list = [] rad_list = [] # Populate and construct a DataFrame with basic node information for node in g: node_list.append(node) degree_list.append(g.degree(node)) # TODO: Broken rad_list.append(1) df = pd.DataFrame({ 'node_name': node_list, 'radius': rad_list, 'degree': degree_list, 'cluster': cluster_data }) return df
def agglomerative_hierarchical_clustering(g, r, measure, num_cluster): all_rdds_df = pd.DataFrame() for target_one in g: rdd_list = [] for target_two in g: rdd_list.append( RDD.realworld_distance_compare(g, target_one, target_two, measure, r)) all_rdds_df[target_one] = rdd_list data = all_rdds_df # dend = shc.dendrogram(shc.linkage(data, method='ward')) cluster = AgglomerativeClustering(n_clusters=num_cluster, affinity='euclidean', linkage='ward') cluster_data = cluster.fit_predict(data) node_list = [] degree_list = [] rad_list = [] # Populate and construct a DataFrame with basic node information for node in g: node_list.append(node) degree_list.append(g.degree(node)) # TODO: Broken rad_list.append(1) df = pd.DataFrame({ 'node_name': node_list, 'radius': rad_list, 'degree': degree_list, 'cluster': cluster_data }) return df