Пример #1
0
def main():
    interaction_types = ['mentions', 'replies', 'retweets']

    for interaction_type in interaction_types:
        edge_list = util.get_edge_list(interaction_type)
        c_list, m_list, p_list = util.get_lists()
        c_df, m_df, p_df = util.get_list_dfs()
        node_lists = [c_list, m_list, p_list]
        cmp_list = util.append_arrays(c_list, m_list, p_list)
        G = gc.create_graph_edge_weights(edge_list)
        G = gc.create_graph_subset(G, cmp_list)

        # Page Rank
        pr = nx.pagerank(G)
        with open('{}_pagerank.csv'.format(interaction_type), 'wb') as csv_file:
            writer = csv.writer(csv_file)
            for key, value in pr.items():
                writer.writerow([key, value])

        # Betweenness centrality
        bc = nx.betweenness_centrality(G)
        with open('{}_betweenness.csv'.format(interaction_type), 'wb') as csv_file:
            writer = csv.writer(csv_file)
            for key, value in bc.items():
                writer.writerow([key, value])

        # Closeness centrality
        cc = nx.closeness_centrality(G)
        with open('{}_closeness.csv'.format(interaction_type), 'wb') as csv_file:
            writer = csv.writer(csv_file)
            for key, value in cc.items():
                writer.writerow([key, value])
Пример #2
0
def attribute_info():
    c_list, m_list, p_list = util.get_lists()
    cmp_list = util.append_arrays(c_list, m_list, p_list)

    interaction_types = ['mentions', 'replies', 'retweets']
    for interaction_type in interaction_types:
        edge_list = util.get_edge_list(interaction_type)
        g = create_graph_edge_weights(edge_list)
        cmp_g = create_graph_subset(g, cmp_list)
        add_types(cmp_g)
        print('{} Assortativity: '.format(interaction_type),
              nx.attribute_assortativity_coefficient(cmp_g, 'type'))
        print('{} Mixing: '.format(interaction_type),
              nx.attribute_mixing_dict(cmp_g, 'type', normalized=True))
Пример #3
0
def main():
    interaction_types = ['mentions', 'replies', 'retweets']

    for interaction_type in interaction_types:
            edge_list = util.get_edge_list(interaction_type)
            c_list, m_list, p_list = util.get_lists()
            c_df, m_df, p_df = util.get_list_dfs()
            node_lists = [c_list, m_list, p_list]
            node_labels = ["celebrities", "media", "politicians", "others"]
            cmp_list = util.append_arrays(c_list, m_list, p_list)
            g = gc.create_graph_edge_weights(edge_list)
            cmp_g = gc.create_graph_subset(g, cmp_list)
            add_colors(cmp_g)
            filename = "{}.json".format(interaction_type)
            json_file = open(filename, 'w')
            print "filename: %s" % filename
            write_graph_to_json(json_file, cmp_g, interaction_type[:3])
            json_file.close()
Пример #4
0
def test_conductance():
    edge_list = util.get_edge_list('replies')
    c_list, m_list, p_list = util.get_lists()
    c_df, m_df, p_df = util.get_list_dfs()
    node_lists = [c_list, m_list, p_list]
    node_labels = ["celebrities", "media", "politicians", "others"]
    cmp_list = util.append_arrays(c_list, m_list, p_list)
    g = gc.create_graph_edge_weights(edge_list)
    cmp_g = gc.create_graph_subset(g, cmp_list)
    nodes = np.asarray(cmp_g.nodes())
    node_lists = [c_list, m_list, p_list]
    am = clustering.am_to_vectors(
        nodes, nx.adjacency_matrix(
            cmp_g, weight='None'))  # Vectors of in and out degrees, unweighted
    clusters_df = clustering.spectral_clustering(nodes, am, 3)

    for i in range(3):
        conductance = conductance_score(cmp_g, clusters_df, str(i))
        print(conductance)
Пример #5
0
def main():
    interaction_types = ['mentions', 'replies', 'retweets']
    f = open("Clustering/Cluster_Info.csv", 'a')
    f2 = open("Clustering/Cluster_Stats.csv", 'a')
    writer = csv.writer(f2, lineterminator='\n')
    writer.writerow([
        'Cluster Method', 'Num Clusters', 'Avg Max Percent', 'Avg Min Percent',
        'Avg Conductance', 'Homogeneity Score', 'Completeness Score', 'V Score'
    ])
    writer = csv.writer(f, lineterminator='\n')
    writer.writerow([
        'Cluster Method', 'Cluster Num', 'Conductance',
        'Clustering Coefficient', 'Percent Celebrities', 'Percent Media',
        'Percent Politicians', 'Number Celebrities', 'Number Media',
        'Number Politicians'
    ])
    f.close()
    f2.close()

    for interaction_type in interaction_types:
        edge_list = util.get_edge_list(interaction_type)
        c_list, m_list, p_list = util.get_lists()
        c_df, m_df, p_df = util.get_list_dfs()
        node_lists = [c_list, m_list, p_list]
        node_labels = ["celebrities", "media", "politicians", "others"]
        cmp_list = util.append_arrays(c_list, m_list, p_list)
        g = gc.create_graph_edge_weights(edge_list)
        cmp_g = gc.create_graph_subset(g, cmp_list)

        # Spectral Clustering
        clusters_nums = [2, 3, 4]
        nodes = np.asarray(cmp_g.nodes())

        am_undir_weight = nx.adjacency_matrix(
            cmp_g.to_undirected(), weight='weight')  # Undirected, weighted
        am_undir_unweight = nx.adjacency_matrix(
            cmp_g.to_undirected(), weight='None')  # Undirected, unweighted
        am_dir_weight = nx.adjacency_matrix(
            cmp_g, weight='weight')  # Outgoing, weighted
        am_dir_unweight = nx.adjacency_matrix(
            cmp_g, weight='None')  # Outgoing, unweighted
        am_sum_weight = am_to_sum(am_dir_weight)
        am_sum_unweight = am_to_sum(am_dir_unweight)
        am_prod_weight = am_to_prod(am_dir_weight)
        am_prod_unweight = am_to_prod(am_dir_unweight)
        am_bib_weight = am_bib(am_dir_weight)
        am_bib_unweight = am_bib(am_dir_unweight)
        am_degdiscount_weight = am_deg_discounted(am_dir_weight)
        am_degdiscount_unweight = am_deg_discounted(am_dir_unweight)

        names = [
            'undir_weight', 'undir_unweight', 'sum_weighted', 'sum_unweighted',
            'prod_weight', 'prod_unweighted', 'bib_weight', 'bib_unweight',
            'deg_discount_weight', 'deg_discount_unweight'
        ]
        ams = [
            am_undir_weight, am_undir_unweight, am_sum_weight, am_sum_unweight,
            am_prod_weight, am_prod_unweight, am_bib_weight, am_bib_unweight,
            am_degdiscount_weight, am_degdiscount_unweight
        ]
        for clusters_num in clusters_nums:
            for i in range(len(names)):
                node_lists = [c_list, m_list, p_list]
                name = names[i]
                am = ams[i]
                print(name, '\n')
                print(am, '\n')
                df = spectral_clustering(nodes, am, clusters_num)
                labeled_df = add_types(
                    df, c_list, m_list, p_list,
                    "{}_{}clusters_{}".format(interaction_type, clusters_num,
                                              name))
                labeled_df = add_labels(
                    df, c_df, m_df, p_df,
                    "{}_{}clusters_{}_labels".format(interaction_type,
                                                     clusters_num, name))
                part_labels = df['Partition']
                k = len(part_labels.unique())
                clusters_matrix = labeled_df
                gc.draw_color_and_shapenodes_df(
                    cmp_g,
                    "Clustering\\Graphs\\spectral_{}_{}clusters_{}".format(
                        interaction_type, k, name),
                    interaction_type,
                    node_lists,
                    node_labels,
                    k,
                    clusters_matrix,
                    weight='weight')