示例#1
0
def kmedoid_clustering(g, r, measure, num_cluster):
    all_rdds_df = pd.DataFrame()

    for target_one in g:
        rdd_list = []
        for target_two in g:
            rdd_list.append(
                RDD.realworld_distance_compare(g, target_one, target_two,
                                               measure, r))
        all_rdds_df[target_one] = rdd_list

    data = all_rdds_df
    np_of_rdds = np.array(data)
    kmedoids = KMedoids(n_clusters=num_cluster, random_state=0).fit(np_of_rdds)
    cluster_data = kmedoids.labels_

    node_list = []
    degree_list = []
    rad_list = []
    # Populate and construct a DataFrame with basic node information
    for node in g:
        node_list.append(node)
        degree_list.append(g.degree(node))
        # TODO: Broken
        rad_list.append(1)

    df = pd.DataFrame({
        'node_name': node_list,
        'radius': rad_list,
        'degree': degree_list,
        'cluster': cluster_data
    })

    return df
示例#2
0
def agglomerative_hierarchical_clustering(g, r, measure, num_cluster):
    all_rdds_df = pd.DataFrame()

    for target_one in g:
        rdd_list = []
        for target_two in g:
            rdd_list.append(
                RDD.realworld_distance_compare(g, target_one, target_two,
                                               measure, r))
        all_rdds_df[target_one] = rdd_list

    data = all_rdds_df
    # dend = shc.dendrogram(shc.linkage(data, method='ward'))

    cluster = AgglomerativeClustering(n_clusters=num_cluster,
                                      affinity='euclidean',
                                      linkage='ward')
    cluster_data = cluster.fit_predict(data)

    node_list = []
    degree_list = []
    rad_list = []
    # Populate and construct a DataFrame with basic node information
    for node in g:
        node_list.append(node)
        degree_list.append(g.degree(node))
        # TODO: Broken
        rad_list.append(1)

    df = pd.DataFrame({
        'node_name': node_list,
        'radius': rad_list,
        'degree': degree_list,
        'cluster': cluster_data
    })

    return df