def write_graph(G, name): # Generating results try: os.mkdir('results') except FileExistsError as e: print( 'W: results directory already exists. Files will be overwritten.') nx.write_graphml_xml(G, "results/{}.xml".format(name)) nx.drawing.nx_pydot.write_dot(G, "results/{}.dot".format(name))
def main(): root_dir = get_root_dir() # Read edge table from cytoscape for full network edge_table = cytoscape.table.getTable(columns=['shared name'], table='edge') # Read node table node_table = pd.read_csv( os.path.join(os.getcwd(), root_dir, 'data', 'node_table.csv')) # Create a dataframe with columns source and target columns and split the shared name column into these two source_target_list = pd.DataFrame(edge_table['shared name'].str.split( ',', 1).to_list(), columns=['source', 'target']) # Get only shared name and clusterONE columns from node_table reduced_node_table = node_table[['shared name', 'clusterONE']] # Add cluster numbers to corresponding source nodes df = pd.merge(source_target_list, reduced_node_table, copy=True, left_on=['source'], right_on=['shared name']) df = df.rename(columns={'clusterONE': 'clusterONE_source'}) # Add cluster numbers to corresponding target nodes df = pd.merge(df, reduced_node_table, copy=True, left_on=['target'], right_on=['shared name']) df = df.rename(columns={'clusterONE': 'clusterONE_target'}) # Create a new graph using cluster numbers as source and target node names source_target_df = df[['clusterONE_source', 'clusterONE_target']] print(source_target_df) source_target_df.to_csv( os.path.join(os.getcwd(), root_dir, 'results', 'test.csv')) # Get rid of self loops source_target_df.drop( source_target_df.index[source_target_df['clusterONE_source'] == source_target_df['clusterONE_target']], inplace=True) source_target_df.dropna(inplace=True) # Draw a graph g = nx.from_pandas_edgelist(source_target_df, source='clusterONE_source', target='clusterONE_target') # Save graph nx.write_graphml_xml(g, path=os.path.join(os.getcwd(), root_dir, 'results', 'pivot.xml'))
def convert2GraphML(self): fe, ex = os.path.splitext(self.edgelist_file) csv_file = "{}_graph.csv".format(fe) self.gml_file = "{}_graph.graphml".format(fe) file_exist = os.path.isfile(self.gml_file) if not file_exist: # 1. convert to pandas df if self.delim == '#': with open(self.edgelist_file, "rb") as fmain: reader = fmain.readlines() row_appender = [] for i, lines in enumerate(reader): new_line = lines.decode('utf-8') new_line = new_line.lower() new_line = new_line.replace(' ', '-') new_line = new_line.strip().split('#') row_appender.append(new_line) else: with open(self.edgelist_file, "r") as fmain: reader = fmain.readlines() row_appender = [] for i, lines in enumerate(reader): new_line = lines.strip().split(' ') new_ = [new_line[0], new_line[1]] row_appender.append(new_) cat = pd.DataFrame(row_appender, columns=["parent", "child"]) cat.to_csv(csv_file, index=False) # 2. use df object as edgelist to create graphml object load_df = cat self.Graph = nx.from_pandas_edgelist(load_df, 'parent', 'child', create_using=nx.DiGraph) nx.write_graphml_xml(self.Graph, self.gml_file) else: self.Graph = nx.read_graphml(self.gml_file) logging.info("Converted to graphml!")
def build_graph(nodes, property_edges, compose_edges, explain_edges, field_nodes=None, field_edges=None, property_edge_weight=1, compose_edge_weight=0.5, explain_edge_weight=0.1, export_file=None): """ Takes nodes and edges to build a directed, weighted graph. Standard edges have more weight than explain edges :param field_nodes: :param field_edges: :param nodes: a list of tuples of long node name and dict with short names. e.g. long=funder.budget, short=budget :param property_edges: list of tuples of long form nodes, for standard schema relationships :param compose_edges: per references to another property :param explain_edges: per property_edges but for explains cross-references between nodes. :param explain_edge_weight: :param compose_edge_weight: :param property_edge_weight: :param export_file: :return: a networkx graph """ # The usual case is that we have edges from source fields to the schema nodes. The alt-case is for testing. # We treat them as properties, i.e. bidirectional with a weight of one. if field_nodes: nodes = nodes + field_nodes if field_edges: property_edges = property_edges + field_edges g = nx.DiGraph() g.add_nodes_from(nodes) g.add_edges_from(property_edges, weight=property_edge_weight) # create reversed edges for property relationships property_edges_flip = list(map(lambda x: (x[1], x[0]), property_edges)) g.add_edges_from(property_edges_flip, weight=property_edge_weight) g.add_edges_from(compose_edges, weight=compose_edge_weight) g.add_edges_from(explain_edges, weight=explain_edge_weight) if CONTEXT.rule_config.rule_01_causal_relationships.export_graphml: graph_file = os.path.join(CONTEXT.directory, f'{CONTEXT.id}.graphml') logging.info(f'writing graph to {graph_file}') nx.write_graphml_xml(g, graph_file) elif export_file: nx.write_graphml_xml(g, export_file + '.graphml') return g
def main(): seed = 0 # int(time.time()) random.seed(seed) k = 0 while True: #g, sqs = generate_graph_sequences_2(20, 8, 8, seed) g, sqs = get_graph_sequences() nx.write_graphml_xml(g, '/tmp/graph') save_sequences(sqs, '/tmp/sequences') for n, p in nx.kamada_kawai_layout(g).items(): g.nodes[n]['pos'] = p draw( g, sqs, f'seed: {seed} k: {k} out deg: ' f'{min([deg for _, deg in g.out_degree])} | ' f'{sum([deg for _, deg in g.out_degree]) / len(g)} | ' f'{max([deg for _, deg in g.out_degree])}') plt.show() k += 1
def dumpDataset(dataset_name, dataset, lxml=False): # add a folder for the dataset try: os.mkdir(os.getcwd() + '/' + dataset_name) except Exception: return # dump all graphml files for i, graph in enumerate(dataset): if lxml: with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml", 'w') as f: nx.write_graphml_lxml( graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml") else: with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml", 'w') as f: nx.write_graphml_xml( graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
def main(): arguments = docopt(__doc__, version='cluster_result_comparator.py 0.1') output_name = arguments["--output"] result_file_1 = arguments["<result1.clustering>"] result_file_2 = arguments["<result2.clustering>"] # sanity check if os.path.isfile(output_name): print("Error: Result file exists") sys.exit(1) if not os.path.isfile(result_file_1): print("Error: Failed to find " + result_file_1) sys.exit(1) if not os.path.isfile(result_file_2): print("Error: Failed to find " + result_file_2) sys.exit(1) label1 = os.path.basename(result_file_1).replace(".clustering", "_") label2 = os.path.basename(result_file_2).replace(".clustering", "_") # load the spectra to cluster information print("Loading data from " + result_file_1 + "...") spectra_to_cluster_1 = load_spectra_to_cluster(result_file_1, before_cluster_id=label1) print(" " + str(len(spectra_to_cluster_1)) + " spectra loaded") print("Loading data from " + result_file_2 + "...") spectra_to_cluster_2 = load_spectra_to_cluster(result_file_2, before_cluster_id=label2) print(" " + str(len(spectra_to_cluster_2)) + " spectra loaded") # build the network network = build_network(spectra_to_cluster_1, spectra_to_cluster_2, source1=os.path.basename(result_file_1), source2=os.path.basename(result_file_2)) print("Created network with " + str(len(network.nodes)) + " nodes and " + str(len(network.edges)) + " edges") # add node properties print("Adding node properties...") add_node_properties(network, result_file_1, label1) add_node_properties(network, result_file_2, label2) # count the total number of spectra total_spectra = get_number_of_spectra(network) # remove all nodes that are only connected to a single other node (ie. identical clusters) removed_nodes = remove_identical_clusters(network) print("Removed " + str(removed_nodes) + " identical nodes from the graph. " + str(len(network.nodes)) + " nodes remaining.") remaining_spectra = get_number_of_spectra(network) print(" Affecting " + str(remaining_spectra) + "/" + str(total_spectra) + " (" + str(round(remaining_spectra / total_spectra * 100, ndigits=2)) + "%) spectra") # save the GraphML file nx.write_graphml_xml(network, output_name) print("Network (GraphML format) written to " + output_name) print("Displaying comparison network in Cytoscape (must be open)...") display_in_cytoscape(network, network_name=label1 + " vs. " + label2)
Darmstadt.add_edge("70_E1", "146_D31", weight=1) Darmstadt.add_edge("70_E1", "146_D32", weight=1) Darmstadt.add_edge("70_D11", "70_E4", weight=1) Darmstadt.add_edge("70_D12", "70_E3", weight=1) Darmstadt.add_edge("70_D13", "70_E2", weight=1) Darmstadt.add_edge("70_D21", "70_E1", weight=1) Darmstadt.add_edge("70_D21", "70_E4", weight=1) Darmstadt.add_edge("70_D22", "70_E3", weight=1) Darmstadt.add_edge("70_D31", "70_E2", weight=1) Darmstadt.add_edge("70_D32", "70_E1", weight=1) Darmstadt.add_edge("70_D41", "70_E2", weight=1) Darmstadt.add_edge("70_D41", "70_E3", weight=1) Darmstadt.add_edge("70_D42", "70_E1", weight=1) #nx.write_edgelist(Darmstadt,".\\TestDarmstadt.edgelist",data=['weight','signal']) nx.write_graphml_xml(Darmstadt, ".\\TestDarmstadt.graphml", prettyprint=True) #nx.write_gml(Darmstadt,".\\TestDarmstadt.gml") """ # 37 Darmstadt.add_node("37_D111",pos=[],signal=16) Darmstadt.add_node("37_D112",pos=[],signal=17) Darmstadt.add_node("37_D21",pos=[],signal=9) Darmstadt.add_node("37_D22.1",pos=[],signal=10) Darmstadt.add_node("37_D51",pos=[],signal=20) Darmstadt.add_node("37_D52",pos=[],signal=21) Darmstadt.add_node("37_D81",pos=[],signal=13) Darmstadt.add_node("37_D82.1",pos=[],signal=14) Darmstadt.add_node("37_E1",pos=[]) Darmstadt.add_node("37_E2",pos=[]) Darmstadt.add_node("37_E3",pos=[])
graph.nodes[n_j]['supportPSOE'] += int( (int(df_j['Support_PSOE'].iloc[0]) + int(df_k['Support_PSOE'].iloc[0])) * norm_list[i][edge_id]) graph.nodes[n_j]['supportCs'] += int( (int(df_j['Support_Cs'].iloc[0]) + int(df_k['Support_Cs'].iloc[0])) * norm_list[i][edge_id]) graph.nodes[n_j]['supportPodemos'] += int( (int(df_j['Support_Podemos'].iloc[0]) + int(df_k['Support_Podemos'].iloc[0])) * norm_list[i][edge_id]) graph.nodes[n_j]['supportVox'] += int( (int(df_j['Support_VOX'].iloc[0]) + int(df_k['Support_VOX'].iloc[0])) * norm_list[i][edge_id]) # statal political support graph.graph['supportPP'] += graph.nodes[n_j]['supportPP'] graph.graph['supportPSOE'] += graph.nodes[n_j]['supportPSOE'] graph.graph['supportCs'] += graph.nodes[n_j]['supportCs'] graph.graph['supportPodemos'] += graph.nodes[n_j]['supportPodemos'] graph.graph['supportVox'] += graph.nodes[n_j]['supportVox'] #write result print('Writing results ...') result_path = os.path.join(data_path, 'graphs', word_list) if not os.path.exists(result_path): os.makedirs(result_path) for graph in graph_list: file_name = f"graph_{graph.graph['date']}.graphml" nx.write_graphml_xml(graph, os.path.join(result_path, file_name)) print(f"Finished, data stored in: {result_path}")
def exportGraphGraphml(G, filename): print(G.nodes) nx.write_graphml_xml(G, filename, infer_numeric_types=True)
def write_xml(self): G = self.createGraph() filename = "%s.xml" % (self.model.name) nx.write_graphml_xml(G, filename)
# path for the data file in_file = os.path.join(wd, 'data', 'musae_ENGB_edges.csv') # read file G = nx.read_edgelist( in_file, # edgelist file to read delimiter=',', # delimiter between 'from' and 'to' comments='#') # the first line doesn't contain data # %% write dataset in GraphML """ We see two different options: - the GraphML is popular among network scientists; it's based on the XML format; this file format contains metadata on the network - the adjacency list turns to be useful for graphs without data associated with nodes or edges and for nodes that can be meaningfully represented as strings """ # GraphML # --+ path for the data file out_file = os.path.join(wd, 'data', 'to_graphml.xml') # --+ write data nx.write_graphml_xml(G, out_file) # adjacency list # --+ path for the data file out_file = os.path.join(os.getcwd(), 'data', 'to_adjlist.adjlist') # --+ write data nx.write_adjlist(G, out_file)