def main(): interaction_types = ['mentions', 'replies', 'retweets'] for interaction_type in interaction_types: edge_list = util.get_edge_list(interaction_type) c_list, m_list, p_list = util.get_lists() c_df, m_df, p_df = util.get_list_dfs() node_lists = [c_list, m_list, p_list] cmp_list = util.append_arrays(c_list, m_list, p_list) G = gc.create_graph_edge_weights(edge_list) G = gc.create_graph_subset(G, cmp_list) # Page Rank pr = nx.pagerank(G) with open('{}_pagerank.csv'.format(interaction_type), 'wb') as csv_file: writer = csv.writer(csv_file) for key, value in pr.items(): writer.writerow([key, value]) # Betweenness centrality bc = nx.betweenness_centrality(G) with open('{}_betweenness.csv'.format(interaction_type), 'wb') as csv_file: writer = csv.writer(csv_file) for key, value in bc.items(): writer.writerow([key, value]) # Closeness centrality cc = nx.closeness_centrality(G) with open('{}_closeness.csv'.format(interaction_type), 'wb') as csv_file: writer = csv.writer(csv_file) for key, value in cc.items(): writer.writerow([key, value])
def attribute_info(): c_list, m_list, p_list = util.get_lists() cmp_list = util.append_arrays(c_list, m_list, p_list) interaction_types = ['mentions', 'replies', 'retweets'] for interaction_type in interaction_types: edge_list = util.get_edge_list(interaction_type) g = create_graph_edge_weights(edge_list) cmp_g = create_graph_subset(g, cmp_list) add_types(cmp_g) print('{} Assortativity: '.format(interaction_type), nx.attribute_assortativity_coefficient(cmp_g, 'type')) print('{} Mixing: '.format(interaction_type), nx.attribute_mixing_dict(cmp_g, 'type', normalized=True))
def main(): interaction_types = ['mentions', 'replies', 'retweets'] for interaction_type in interaction_types: edge_list = util.get_edge_list(interaction_type) c_list, m_list, p_list = util.get_lists() c_df, m_df, p_df = util.get_list_dfs() node_lists = [c_list, m_list, p_list] node_labels = ["celebrities", "media", "politicians", "others"] cmp_list = util.append_arrays(c_list, m_list, p_list) g = gc.create_graph_edge_weights(edge_list) cmp_g = gc.create_graph_subset(g, cmp_list) add_colors(cmp_g) filename = "{}.json".format(interaction_type) json_file = open(filename, 'w') print "filename: %s" % filename write_graph_to_json(json_file, cmp_g, interaction_type[:3]) json_file.close()
def test_conductance(): edge_list = util.get_edge_list('replies') c_list, m_list, p_list = util.get_lists() c_df, m_df, p_df = util.get_list_dfs() node_lists = [c_list, m_list, p_list] node_labels = ["celebrities", "media", "politicians", "others"] cmp_list = util.append_arrays(c_list, m_list, p_list) g = gc.create_graph_edge_weights(edge_list) cmp_g = gc.create_graph_subset(g, cmp_list) nodes = np.asarray(cmp_g.nodes()) node_lists = [c_list, m_list, p_list] am = clustering.am_to_vectors( nodes, nx.adjacency_matrix( cmp_g, weight='None')) # Vectors of in and out degrees, unweighted clusters_df = clustering.spectral_clustering(nodes, am, 3) for i in range(3): conductance = conductance_score(cmp_g, clusters_df, str(i)) print(conductance)
def main(): interaction_types = ['mentions', 'replies', 'retweets'] f = open("Clustering/Cluster_Info.csv", 'a') f2 = open("Clustering/Cluster_Stats.csv", 'a') writer = csv.writer(f2, lineterminator='\n') writer.writerow([ 'Cluster Method', 'Num Clusters', 'Avg Max Percent', 'Avg Min Percent', 'Avg Conductance', 'Homogeneity Score', 'Completeness Score', 'V Score' ]) writer = csv.writer(f, lineterminator='\n') writer.writerow([ 'Cluster Method', 'Cluster Num', 'Conductance', 'Clustering Coefficient', 'Percent Celebrities', 'Percent Media', 'Percent Politicians', 'Number Celebrities', 'Number Media', 'Number Politicians' ]) f.close() f2.close() for interaction_type in interaction_types: edge_list = util.get_edge_list(interaction_type) c_list, m_list, p_list = util.get_lists() c_df, m_df, p_df = util.get_list_dfs() node_lists = [c_list, m_list, p_list] node_labels = ["celebrities", "media", "politicians", "others"] cmp_list = util.append_arrays(c_list, m_list, p_list) g = gc.create_graph_edge_weights(edge_list) cmp_g = gc.create_graph_subset(g, cmp_list) # Spectral Clustering clusters_nums = [2, 3, 4] nodes = np.asarray(cmp_g.nodes()) am_undir_weight = nx.adjacency_matrix( cmp_g.to_undirected(), weight='weight') # Undirected, weighted am_undir_unweight = nx.adjacency_matrix( cmp_g.to_undirected(), weight='None') # Undirected, unweighted am_dir_weight = nx.adjacency_matrix( cmp_g, weight='weight') # Outgoing, weighted am_dir_unweight = nx.adjacency_matrix( cmp_g, weight='None') # Outgoing, unweighted am_sum_weight = am_to_sum(am_dir_weight) am_sum_unweight = am_to_sum(am_dir_unweight) am_prod_weight = am_to_prod(am_dir_weight) am_prod_unweight = am_to_prod(am_dir_unweight) am_bib_weight = am_bib(am_dir_weight) am_bib_unweight = am_bib(am_dir_unweight) am_degdiscount_weight = am_deg_discounted(am_dir_weight) am_degdiscount_unweight = am_deg_discounted(am_dir_unweight) names = [ 'undir_weight', 'undir_unweight', 'sum_weighted', 'sum_unweighted', 'prod_weight', 'prod_unweighted', 'bib_weight', 'bib_unweight', 'deg_discount_weight', 'deg_discount_unweight' ] ams = [ am_undir_weight, am_undir_unweight, am_sum_weight, am_sum_unweight, am_prod_weight, am_prod_unweight, am_bib_weight, am_bib_unweight, am_degdiscount_weight, am_degdiscount_unweight ] for clusters_num in clusters_nums: for i in range(len(names)): node_lists = [c_list, m_list, p_list] name = names[i] am = ams[i] print(name, '\n') print(am, '\n') df = spectral_clustering(nodes, am, clusters_num) labeled_df = add_types( df, c_list, m_list, p_list, "{}_{}clusters_{}".format(interaction_type, clusters_num, name)) labeled_df = add_labels( df, c_df, m_df, p_df, "{}_{}clusters_{}_labels".format(interaction_type, clusters_num, name)) part_labels = df['Partition'] k = len(part_labels.unique()) clusters_matrix = labeled_df gc.draw_color_and_shapenodes_df( cmp_g, "Clustering\\Graphs\\spectral_{}_{}clusters_{}".format( interaction_type, k, name), interaction_type, node_lists, node_labels, k, clusters_matrix, weight='weight')