def create_ARFF_network_metrics_file(g, node_to_score, seeds, arff_file_name, calculate_topological_values=False): delim = "," header = "@RELATION aneurysm\n@ATTRIBUTE id STRING\n@ATTRIBUTE score NUMERIC\n" + \ "@ATTRIBUTE degree INTEGER\n@ATTRIBUTE linker_degree INTEGER\n" + \ "@ATTRIBUTE ld_ratio NUMERIC\n@ATTRIBUTE clustering_coefficient NUMERIC\n" + \ "@ATTRIBUTE betweenness_centrality NUMERIC\n" + \ "@ATTRIBUTE degree2 INTEGER\n@ATTRIBUTE linker_degree2 INTEGER\n" + \ "@ATTRIBUTE ld_ratio2 NUMERIC\n" + \ "@ATTRIBUTE class {involved,not-involved}\n@DATA\n" seeds = set(seeds) if calculate_topological_values: print "Calculating betweenness centrality.." mapB = networkx.betweenness_centrality(g) ##mapB = dict(zip(g.nodes(), range(len(g.nodes())))) if calculate_topological_values: print "Calculating clustering coefficients.." mapC = networkx.clustering(g, with_labels=True) #print "connected component sizes: ", map(len, networkx.connected_components(g)) #cliques = networkx.find_cliques(g) # high computational cost node_to_values = get_node_degree_related_values(g, seeds) f = open(arff_file_name, 'w') f.write(header) for v in g.nodes_iter(): d, ld, d2, ld2 = node_to_values[v] if calculate_topological_values: cc = mapC[v] bc = mapB[v] else: cc = 0.0 bc = 0.0 if d == 0: r1 = 0.0 else: r1 = float(ld) / d if d2 == 0: r2 = 0.0 else: r2 = float(ld2) / d2 if v in seeds: if node_to_score is not None: s = node_to_score[v] else: s = 1 c = "involved" else: s = "?" c = "not-involved" # id score degree linker_degree ld_ratio clustering_coeff betweenness_cent d2 ld2 ld_ratio2 class # v s d ld n1 cc bc d2 ld2 n2 c f.write(("%s" % delim ).join(map(str, [v, s, d, ld, r1, cc, bc, d2, ld2, r2, c])) + "\n") f.close() return
def create_ARFF_network_metrics_file(g, node_to_score, seeds, arff_file_name, calculate_topological_values = False): delim = "," header = "@RELATION aneurysm\n@ATTRIBUTE id STRING\n@ATTRIBUTE score NUMERIC\n" + \ "@ATTRIBUTE degree INTEGER\n@ATTRIBUTE linker_degree INTEGER\n" + \ "@ATTRIBUTE ld_ratio NUMERIC\n@ATTRIBUTE clustering_coefficient NUMERIC\n" + \ "@ATTRIBUTE betweenness_centrality NUMERIC\n" + \ "@ATTRIBUTE degree2 INTEGER\n@ATTRIBUTE linker_degree2 INTEGER\n" + \ "@ATTRIBUTE ld_ratio2 NUMERIC\n" + \ "@ATTRIBUTE class {involved,not-involved}\n@DATA\n" seeds = set(seeds) if calculate_topological_values: print "Calculating betweenness centrality.." mapB = networkx.betweenness_centrality(g) ##mapB = dict(zip(g.nodes(), range(len(g.nodes())))) if calculate_topological_values: print "Calculating clustering coefficients.." mapC = networkx.clustering(g, with_labels=True) #print "connected component sizes: ", map(len, networkx.connected_components(g)) #cliques = networkx.find_cliques(g) # high computational cost node_to_values = get_node_degree_related_values(g, seeds) f = open(arff_file_name, 'w') f.write(header) for v in g.nodes_iter(): d, ld, d2, ld2 = node_to_values[v] if calculate_topological_values: cc = mapC[v] bc = mapB[v] else: cc = 0.0 bc = 0.0 if d == 0: r1 = 0.0 else: r1 = float(ld)/d if d2 == 0: r2 = 0.0 else: r2 = float(ld2)/d2 if v in seeds: if node_to_score is not None: s=node_to_score[v] else: s = 1 c="involved" else: s="?" c="not-involved" # id score degree linker_degree ld_ratio clustering_coeff betweenness_cent d2 ld2 ld_ratio2 class # v s d ld n1 cc bc d2 ld2 n2 c f.write( ("%s" % delim).join( map(str, [v, s, d, ld, r1, cc, bc, d2, ld2, r2, c]) ) + "\n" ) f.close() return
def get_clustering_coefficient_map(g): return networkx.clustering(g, with_labels=True)