def copy_layout(from_fname, to_fname): if not from_fname[-4:] =='.gml': from_name +='.gml' if not to_fname[-4:] =='.gml': to_name +='.gml' print 'reading A=', from_fname,'..', g1 = NX.read_gml(from_fname) labels1 = NX.get_node_attributes(g1, 'label') n1 = set(labels1.values()) print len(n1),'nodes' print 'reading B=', to_fname,'..', g2 = NX.read_gml(to_fname) labels2 = NX.get_node_attributes(g2, 'label') n2 = set(labels2.values()) print len(n2),'nodes' intersection = len(n2.intersection(n1)) percent=100.*intersection/len(n2) print 'B.intersect(A)=',intersection,'(%.1f%%)'%percent print 'copying layout..', mapping = {} for L1 in labels1: for L2 in labels2: if labels1[L1]==labels2[L2]: mapping[L1] = L2 break layout = NX.get_node_attributes(g1, 'graphics') attr = dict([ ( mapping[ID], {'x':layout[ID]['x'],'y':layout[ID]['y']} ) for ID in mapping]) NX.set_node_attributes(g2, 'graphics', attr) NX.write_gml(g2, to_fname) print 'done.'
def create_conjunction_graph(self): fallacy_map = { unidecode(key): value for (key, value) in get_fallacy_types() } for contention in Contention.objects.all(): for premise in contention.premises.all(): fallacies = filter(None, premise.reports.values_list( 'fallacy_type', flat=True)) fallacies = [ fallacy_map[unidecode(_f)] for _f in fallacies ] fallacies_set = set(fallacies) for fallacy in fallacies_set: graph.add_edges_from( [ (unidecode(self.normalize(fallacy)), unidecode(self.normalize(_f))) for _f in fallacies_set if _f != fallacy ] ) nx.write_gml(graph, 'conjunction.gml')
def main(): baseDir = "/Users/francis/Documents/cpe480_texts/basicmaterials/chemicals" os.chdir(baseDir) articles = glob.glob('*.txt') graph = nx.Graph() print "Let's get to work!" for index in range(0, len(articles)): start = time.time() article = articles[index] print article, "- (",index + 1,"of", len(articles),")" significant = blib.filterWords(article) blib.handleDocumentNouns(significant["NN"], graph) print article, "done." print "Time Elapsed:", time.time() - start, "seconds" print print # sentiment = determineSemtiment(significant[0]) # Export graph data to a file edgeListFile = "chemicals.graph" print "Writing graph to file...." nx.write_gml(graph, edgeListFile) print "done"
def dag(recipe_folder, config, packages="*", format='gml', hide_singletons=False): """ Export the DAG of packages to a graph format file for visualization """ dag, name2recipes = graph.build(utils.get_recipes(recipe_folder, "*"), config) if packages != "*": dag = graph.filter(dag, packages) if hide_singletons: for node in nx.nodes(dag): if dag.degree(node) == 0: dag.remove_node(node) if format == 'gml': nx.write_gml(dag, sys.stdout.buffer) elif format == 'dot': write_dot(dag, sys.stdout) elif format == 'txt': subdags = sorted(map(sorted, nx.connected_components(dag.to_undirected()))) subdags = sorted(subdags, key=len, reverse=True) singletons = [] for i, s in enumerate(subdags): if len(s) == 1: singletons += s continue print("# subdag {0}".format(i)) subdag = dag.subgraph(s) recipes = [ recipe for package in nx.topological_sort(subdag) for recipe in name2recipes[package]] print('\n'.join(recipes) + '\n') if not hide_singletons: print('# singletons') recipes = [recipe for package in singletons for recipe in name2recipes[package]] print('\n'.join(recipes) + '\n')
def generate_overlap_gml(overlap_data, contained_reads, gml_filename): containment_tolerance = 50 permitted_error_pct = 2 import networkx as nx G=nx.DiGraph() node_in_graph = set() for q_name in overlap_data: if q_name in contained_reads: continue if q_name not in node_in_graph: G.add_node(q_name) targets = overlap_data[ q_name ].hits targets_3prime = [ h for h in targets if h[4][1] < containment_tolerance and h[0] not in contained_reads] targets_5prime = [ h for h in targets if h[3][1] < containment_tolerance and h[0] not in contained_reads] targets_3prime.sort(key = lambda k:k[1]) targets_5prime.sort(key = lambda k:k[1]) if len(targets_3prime) > 0: t = targets_3prime[0] t_name = t[0] if t_name not in node_in_graph: G.add_node(t_name) G.add_edge(q_name, t_name) if len(targets_5prime) > 0: t = targets_5prime[0] t_name = t[0] if t_name not in node_in_graph: G.add_node(t_name) G.add_edge(q_name, t_name) nx.write_gml(G, gml_filename)
def export_graph(G, write_filename): write_dir = "./output/" + write_filename + "/" if not os.path.isdir(write_dir): os.mkdir(write_dir) # Remove pho edge weights for n1 in G.edge: for n2 in G.edge[n1]: G.edge[n1][n2]={} print("\twriting gml") for node in G.nodes_iter(): for key, val in list(G.node[node].items()): G.node[node][key]=int(val) nx.write_gml(G, write_dir + write_filename + ".gml") print("\twriting graphml") nx.write_graphml(G, write_dir + write_filename + ".graphml") print("\twriting edgelist") f = open(write_dir + write_filename + ".edgelist","w") for edge in G.edges_iter(): f.write("\t".join([str(end) for end in list(edge)[:2]])+"\n") f.close() f = open(write_dir + write_filename + ".nodelist","w") print("\twriting nodelist") f.write("\t".join(["node_id"] + node_attributes) + "\n") for node in G.nodes_iter(): f.write("\t".join([str(node)] + [str(G.node[node][attribute]) for attribute in node_attributes]) + "\n")
def write_graph_files(output_path, data_filename, G): if not os.path.exists(output_path): os.makedirs(output_path) # write to GML file gml_filename = os.path.join(output_path, data_filename + "-graph.gml") print("Writing GML file: {}".format(gml_filename)) nx.write_gml(G, gml_filename) # write assignments into a file with a single column assignments_filename = os.path.join(output_path, data_filename + "-assignments.txt") print("Writing assignments: {}".format(assignments_filename)) with open(assignments_filename, "w") as outf: for n in G.nodes_iter(data=True): outf.write("{}\n".format(n[1]["partition"])) # write edge list in a format for MaxPerm, tab delimited edges_maxperm_filename = os.path.join(output_path, data_filename + "-edges-maxperm.txt") print("Writing edge list (for MaxPerm): {}".format(edges_maxperm_filename)) with open(edges_maxperm_filename, "w") as outf: outf.write("{}\t{}\n".format(G.number_of_nodes(), G.number_of_edges())) for e in G.edges_iter(): outf.write("{}\t{}\n".format(*e)) # write edge list in a format for OSLOM, tab delimited edges_oslom_filename = os.path.join(output_path, data_filename + "-edges-oslom.txt") print("Writing edge list (for OSLOM): {}".format(edges_oslom_filename)) with open(edges_oslom_filename, "w") as outf: for e in G.edges_iter(data=True): outf.write("{}\t{}\t{}\n".format(e[0], e[1], e[2]["weight"])) return (edges_maxperm_filename, edges_oslom_filename)
def lei_vs_lei(nedges=None): """ Grafo de todas com todas (leis) """ # Verão original Flávio comentada # curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where peso >300 and lei_id_2>2') # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 20 and lei_id_1 = 1 and lei_id_2 <= 20 limit 0,1000') # curgrafo.execute('select lei_id_1,lei_tipo_1,lei_nome_1,lei_id_2,lei_tipo_2, lei_nome_2, peso from vw_gr_lei_lei where lei_count <= 8 and lei_id_1 <= 20 and lei_id_2 <= 20 limit 0,1000') curgrafo.execute('select lei_id_1,esfera_1,lei_1,lei_id_2,esfera_2, lei_2, peso from vw_gr_lei_lei where lei_count <= 10 and lei_id_1 <= 50 and lei_id_2 <= 200 limit 0,10000') if not nedges: res = curgrafo.fetchall() nedges = len(res) else: res = curgrafo.fetchmany(nedges) eds = [(i[0],i[3],i[6]) for i in res] G = nx.Graph() #eds = [i[:3] for i in res] G.add_weighted_edges_from(eds) print "== Grafo Lei_Lei ==" print "==> Order: ",G.order() print "==> # Edges: ",len(G.edges()) # Adding attributes to nodes for i in res: G.node[i[0]]['esfera'] = i[1] G.node[i[0]]['lei'] = i[2] G.node[i[3]]['esfera'] = i[4] G.node[i[3]]['lei'] = i[5] nx.write_graphml(G,'lei_lei.graphml') nx.write_gml(G,'lei_lei.gml') nx.write_pajek(G,'lei_lei.pajek') nx.write_dot(G,'lei_lei.dot') return G,res
def main(): graph = nx.MultiDiGraph() with open('partiler.csv') as f: reader = csv.DictReader(f) data = list(reader) for row in data: graph.add_node(row['Abbreviation'], label=row['Abbreviation'], type="Party", political_position=row['PoliticalPosition']) for descendant in string_to_list(row['DescendantOf']): graph.add_edge(row['Abbreviation'], descendant, label="Descendant Of", weight=20) for ancestor in string_to_list(row['AncestorOf']): graph.add_edge(row['Abbreviation'], ancestor, label="Ancestor Of", weight=20) for leader in string_to_list(row['Leader']): graph.add_edge(row['Abbreviation'], leader, label="Leader", weight=10) for ideology in string_to_list(row['Ideology']): graph.add_node(ideology, type="Ideology") graph.add_edge(row['Abbreviation'], ideology, label="Ideology", weight=40) nx.write_gml(graph, "data.gml")
def do_graph_from_file_random(f, sep, outname, maxnbline, pos1, pos2): nb_bites = os.path.getsize(f.name) G = nx.DiGraph() i = 0 while i < maxnbline: i += 1 f.seek(0) f.seek(randint(0, nb_bites)) f.readline() line = f.readline() if not line: continue lines = line.split(sep) if len(lines) < 2: continue try: node1 = lines[pos1].encode('utf8').strip('\\/\r\n ') node2 = lines[pos2].encode('utf8').strip('\\/\r\n ') node1 = re.sub('www\.', '', re.sub('http://', '', node1)) node2 = re.sub('www\.', '', re.sub('http://', '', node2)) G.add_edge(node1, node2) except Exception as e: print 'error e:{0} at line:\n\t{0}'.format(e, line) continue nx.write_gml(G, outname.rsplit('.', 1)[0] +'.gml') return G
def betweenness_fracture(infile, outfile, fraction, recalculate = False): """ Removes given fraction of nodes from infile network in reverse order of betweenness centrality (with or without recalculation of centrality values after each node removal) and saves the network in outfile. """ g = networkx.read_gml(infile) m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) n = len(g.nodes()) for i in range(1, n): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.betweenness_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) if i * 1. / n >= fraction: break components = networkx.connected_components(g) component_id = 1 for component in components: for node in component: g.node[node]["component"] = component_id component_id += 1 networkx.write_gml(g, outfile)
def draw_lineage(self, recs, nodecolor="mediumseagreen", edgecolor="lightslateblue", dpi=96, lineage_img="GO_lineage.png", engine="pygraphviz", gml=False, draw_parents=True, draw_children=True): assert engine in GraphEngines if engine == "pygraphviz": G = self.make_graph_pygraphviz(recs, nodecolor, edgecolor, dpi, draw_parents=draw_parents, draw_children=draw_children) else: G = self.make_graph_pydot(recs, nodecolor, edgecolor, dpi, draw_parents=draw_parents, draw_children=draw_children) if gml: import networkx as nx # use networkx to do the conversion pf = lineage_img.rsplit(".", 1)[0] NG = nx.from_agraph(G) if engine == "pygraphviz" else nx.from_pydot(G) del NG.graph['node'] del NG.graph['edge'] gmlfile = pf + ".gml" nx.write_gml(NG, gmlfile) print("GML graph written to {0}".format(gmlfile), file=sys.stderr) print(("lineage info for terms %s written to %s" % ([rec.id for rec in recs], lineage_img)), file=sys.stderr) if engine == "pygraphviz": G.draw(lineage_img, prog="dot") else: G.write_png(lineage_img)
def save_commuting_graph(G): print G.nodes() # print G.edges(data=True) nx.write_gml(G, "/home/sscepano/Project7s/D4D/CI/COMMUTINGNEW/total_commuting_G.gml") # # print GA.nodes() # print GA.edges(data=True) # # nx.write_gml(G, "/home/sscepano/D4D res/allstuff/User movements graphs/communting patterns/1/total_commuting_GA.gml") #v.map_commuting_all(G) #map_communities_and_commutes(G) # G = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G.gml") # # G1 = process_weights(G) # nx.write_gml(G1, "/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml") # # print G1.edges(data=True) # G1 = nx.read_gml("/home/sscepano/D4D res/allstuff/User movements graphs/commuting patterns/1/total_commuting_G_scaled_weights.gml") # print G1.nodes(data=True) # # print G1.nodes(data=True)[1][1]['label'] # map_communities_and_commutes(G1) return
def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"): """ Save the celltype-to-celltype connectivity information in a file. filename -- path of the file to be saved. format -- format to save in. Using GML as GraphML support is not complete in NetworkX. """ start = datetime.now() if format == "gml": nx.write_gml(self.__celltype_graph, filename) elif format == "yaml": nx.write_yaml(self.__celltype_graph, filename) elif format == "graphml": nx.write_graphml(self.__celltype_graph, filename) elif format == "edgelist": nx.write_edgelist(self.__celltype_graph, filename) elif format == "pickle": nx.write_gpickle(self.__celltype_graph, filename) else: raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format)) end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Saved celltype_graph in file %s of format %s in %g s" % (filename, format, delta.seconds + delta.microseconds * 1e-6) ) print "Saved celltype connectivity graph in", filename
def information_diffusion(self, num_nodes, beta): # patients_zero = [random.randint(0,num_nodes) for r in xrange(beta)] # for i in patients_zero: # self.network.node[i]['color'] = 1 # print patients_zero # for i in patients_zero: # for j in self.network.neighbors(i): root_node = random.randint(0, num_nodes - 1) self.network.node[root_node]["color"] = 1 ordered_edges = list(nx.bfs_edges(self.network, root_node)) print ordered_edges t_name = "plots/file_name" count = 0 for i in ordered_edges: count = count + 1 # print self.network.node[i[0]]['color']==1, self.network.node[i[1]]['color']==0 if self.network.node[i[0]]["color"] == 1 and self.network.node[i[1]]["color"] == 0: # probability =100* self.network.node[i[1]]['mew_final']*self.network.edge[i[0]][i[1]]['gossip'] probability = random.random() print i, probability if probability > beta: # print "hello from other side" self.network.node[i[1]]["color"] = 1 if count % 100 == 0: name = t_name + str(count) + ".gml" nx.write_gml(self.network, name)
def usufyToGmlExport(d, fPath): ''' Workaround to export to a gml file. :param d: Data to export. :param fPath: File path. ''' # Reading the previous gml file try: oldData=nx.read_gml(fPath) except UnicodeDecodeError as e: print "UnicodeDecodeError:\t" + str(e) print "Something went wrong when reading the .gml file relating to the decoding of UNICODE." import time as time fPath+="_" +str(time.time()) print "To avoid losing data, the output file will be renamed to use the timestamp as:\n" + fPath + "_" + str(time.time()) print # No information has been recovered oldData = nx.Graph() except Exception as e: # No information has been recovered oldData = nx.Graph() newGraph = _generateGraphData(d, oldData) # Writing the gml file nx.write_gml(newGraph,fPath)
def main(): args = parse_args() network = json.loads(urllib2.unquote(args.json.encode('utf8'))) G = as_network(network) if args.type == 'gml': nx.write_gml(G, sys.stdout) elif args.type == 'png' or args.type == 'pdf': ax = plt.axes(frameon=False) ax.get_yaxis().set_visible(False) ax.get_xaxis().set_visible(False) nodepos = get_positions(G) if 'edges' in network: nx.draw_networkx_edges(G, pos=nodepos, edge_color='0.6', width=get_edge_widths(G)) for (shape, color), nodes in get_node_types(G).iteritems(): nx.draw_networkx_nodes(G, pos=nodepos, nodelist=nodes, node_color=color, ax=ax, linewidths=0.5, node_size=100, node_shape=shape) nx.draw_networkx_labels(G, pos={n: (x, y + 17) for n, (x, y) in nodepos.iteritems()}, labels=nx.get_node_attributes(G, 'label'), font_size=6) bbox = None if G.number_of_nodes() == 1 else 'tight' plt.savefig(sys.stdout, dpi=300, bbox_inches=bbox, format=args.type)
def main2(): number = 19 species,rules,parameterDict,observableList= readBNGXML.parseXML('output{0}.xml'.format(number)) graph = nx.Graph() simpleGraph(graph,species,observableList,number) nx.write_gml(graph,'graph_{0}.gml'.format(number))
def generate_blast_graph(self): evalue_filter = lambda hsp: hsp.evalue < self.evalue file_name = "{}/blast_graph.txt".format(self.blast_output_path) for blast_file in glob.glob(self.blast_data_path): print("working on " + blast_file) # Parse the Blast file qresults = SearchIO.parse(blast_file, 'blast-tab', comments=True) for qresult in qresults: write_line = "" write_line += qresult.id + ":" # Go to the Hit section of query for hit in qresult[:]: if not self.blast_graph.has_node(qresult.id): self.blast_graph.add_node(qresult.id) # Check if Hit has min value filtered_hit = hit.filter(evalue_filter) if filtered_hit is not None: if not self.blast_graph.has_node(filtered_hit.id): self.blast_graph.add_node(filtered_hit.id) # Add Edge between graph nodes self.blast_graph.add_edge(qresult.id, filtered_hit.id) write_line += filtered_hit.id + "," if write_line != "": with open(file_name, "a") as f_handle: f_handle.write(write_line + '\n') # Write GML files if self.generate_gml_files: file_name = "{}/blast_graph.gml".format(self.blast_output_path) with open(file_name, "a") as f_handle: nx.write_gml(self.blast_graph, f_handle)
def __init__(self, inOrOut='none', makeNetwork=False, logName=False, citesIn='../Cites_in', citesOut='../Cites_out'): # Get the absolute directory paths self.dirName = os.path.dirname(__file__) self.citesInDir = os.path.abspath(os.path.join(self.dirName, citesIn)) self.citesOutDir = os.path.abspath(os.path.join(self.dirName, citesOut)) if(logName): self.log = "Starting...\n" self.logName = logName + ' ' else: self.logName = False # Assume all citations will be parsed succesfully self.encounteredUnknownPattern = False self.makeNetwork = makeNetwork if(makeNetwork): # Get lists of file names. self.citeInFiles = self.getCiteFiles(self.citesInDir) self.citeOutFiles = self.getCiteFiles(self.citesOutDir) self.G = nx.DiGraph() self.workDictionary = {} self.humanDescription = {} self.bwbTitles = {} self.sparql = SparqlHelper.SparqlHelper() # Parse the citations for incoming or outgoing or both or none if inOrOut == 'both': self.parseCitations('out') self.parseCitations('in') elif inOrOut == 'in' or inOrOut == 'out': self.parseCitations(inOrOut) # Write the log to disk if logging is enabled if(logName): self.writeLog() # Save network to disk if a network was made if(makeNetwork): t = time.strftime("%Y-%m-%d_%H_%M_%S") fileName = os.path.normpath("./{}/Graph_{}".format(self.dirName, t)) nx.write_gml(self.G, fileName + '.gml') pickle.dump(self.G, open(fileName + '.pickle', 'w')) print '\nDumped graph at: "' + fileName + '", (.pickle and .gml)' fileName = os.path.normpath("./{}/Work_URIs_{}.pickle".format(self.dirName, t)) pickle.dump(self.workDictionary, open(fileName, 'w')) print '\nDumped work URIs at: "' + fileName + '"' fileName = os.path.normpath("./{}/bwb_titles_{}.pickle".format(self.dirName, t)) pickle.dump(self.bwbTitles, open(fileName, 'w')) print '\nDumped BWB titles at: "' + fileName + '"' fileName = os.path.normpath("./{}/human_descriptions_{}.pickle".format(self.dirName, t)) pickle.dump(self.humanDescription, open(fileName, 'w')) print '\nDumped human entity descriptions at: "' + fileName + '"'
def getGraph(fileRef): data = getFiles(fileName) nodes = getNodes(data) edges = getEdges(data) graph = createNetwork(edges, nodes) gml_output_path = os.path.join('output', 'network', fileRef. split('.')[0]. split('/')[1] + '.gml') print "Writing GML file to %s" % gml_output_path nx.write_gml(graph, gml_output_path) net_output_path = os.path.join('output', 'network', fileRef. split('.')[0]. split('/')[1] + '.net') print "Writing net file to %s" % net_output_path nx.write_pajek(graph, net_output_path) params = (graph.number_of_nodes(), graph.number_of_edges()) print "Graph has %s nodes, %s edges" % params print
def draw_graph(nodes_degree, edges_weight): """ draws and also exports a graph according to the nodes size and edges weight tzhat are provided nodes_degree: list of nodes degree edges_weight: list of edges degree """ N = len(nodes_degree) # get graph size G = nx.Graph() # define an empty graph # add edges link_id = 1 for i in xrange(1,N): for j in xrange(0,i): if edges_weight[link_id] > .01: G.add_edge(i, j, weight=edges_weight[link_id]) link_id += 1 node_size = [n * 100 for n in nodes_degree] # set nodes size edges_weight, weights = zip(*nx.get_edge_attributes(G,'weight').items()) # set edges size pos = nx.spring_layout(G) # set position # draw the graph nx.draw(G, pos, node_color='#A0CBE2', edgelist=edges_weight, edge_color=weights, width=4, edge_cmap=PL.cm.Reds, with_labels=True, node_size=node_size) PL.savefig("../data/graphs/text_graph_%d.png" % N) # save as png nx.write_gml(G,"../data/graphs/test_graph_%d.gml" % N) PL.show() # display
def grafo_usuarios_relacionados(usuarios_comunidades):#utf-8 meetups = {}#meetup e usuarios for usuario_meetups in usuarios_comunidades: usuario = usuario_meetups['usuario'] meetups_usuario = usuario_meetups['meetups'] for meetup in meetups_usuario: if meetups.get(meetup): temp = meetups.get(meetup) temp.append(usuario) meetups[meetup] = temp else: meetups[meetup] = [usuario] G = nx.Graph() for meetup, usuarios in meetups.items(): permutacoes = itertools.permutations(usuarios, 2) for aresta in permutacoes: no_anterior = aresta[0] no_sucessor = aresta[1] if G.get_edge_data(no_anterior,no_sucessor, default=0) == 0:#adicionar label G.add_edge(no_anterior, no_sucessor, weight=1) else: G[no_anterior][no_sucessor]['weight'] = G[no_anterior][no_sucessor]['weight'] + 1 #exibe_arestas_peso(G) nx.write_gml(G, "resultados/grafos/grafo_usuarios_relacionadas.gml")
def write_edge2cid(e2c,filename, outFile, delimiter="\t"): # write edge2cid three-column file f = open(filename+".edge2comm.txt",'w') c2c = dict( (c,i+1) for i,c in enumerate(sorted(list(set(e2c.values())))) ) # ugly... #print c2c for e,c in sorted(e2c.iteritems(), key=itemgetter(1)): f.write( "%s%s%s%s%s\n" % (str(e[0]),delimiter,str(e[1]),delimiter,str(c2c[c])) ) f.close() cid2edges,cid2nodes = defaultdict(set),defaultdict(set) # faster to recreate here than for edge,cid in e2c.iteritems(): # to keep copying all dicts cid2edges[cid].add( edge ) # during the linkage... cid2nodes[cid] |= set(edge) cid2edges,cid2nodes = dict(cid2edges),dict(cid2nodes) # write list of edges for each comm, each comm on its own line f,g = open(filename+".comm2edges.txt", 'w'),open(filename+".comm2nodes.txt", 'w') for cid in sorted(cid2edges.keys()): nodes,edges = map(str,cid2nodes[cid]), ["%s,%s" % (ni,nj) for ni,nj in cid2edges[cid]] f.write( "\t".join(edges) ); f.write("\n") g.write( "\t".join([str(cid)] + nodes) ); g.write("\n") f.close(); g.close() #print e2c e2cRelabel=dict((edge,c2c.get(id)) for (edge, id) in e2c.items()) g=nx.Graph() g.add_edges_from(e2cRelabel.keys()) nx.set_edge_attributes(g,"label",e2cRelabel) nx.write_gml(g,outFile)
def export_singleSTG(self,pSid,Type,Isomorphy,localIGs,NNF,filename,initialRules): # TODO: localIGs # TODO: filename = Unterordner mit Dateien oder Dateiname filename+Suffix? # TODO: GML nested network? filename = str(filename) if not filename[-4:]==".gml": filename += ".gml" self.set_initialRules(initialRules) self.set_initialStates() found=False for i, pSet in enumerate(self._psc.get_parameterSets()): if i==pSid: found = True break if not found: self.message("MC: STG with index %i does not exist. Choose index between 0 and %i."%(pSid,i), "error") return ts = TS.TransitionSystem(self, pSet) ts.compute_destination() if Type=="SCC": ts.computeSCCDAG(Isomorphy,NNF) nx.write_gml(ts._sccdag._nxSCCDAG,filename) if NNF: filename = filename[-4:]+".nnf" han=open(filename,"w") han.write(ts._sccdag._nnfNN) han.close() elif Type=="allStates": nx.write_gml(ts.getDestinySTG(),filename) else: self.message("Choose either SCC Graph or Full State Graph.","error")
def main(): G = nx.Graph() reader = csv.reader(open('similarity2.csv', 'rb'), delimiter=',') next(reader) for edge in reader: my_list = [edge[0], edge[1], float(edge[2])] G.add_edge(my_list[0], my_list[1], weight=my_list[2]) # FIXME: k unused # k = math.sqrt(1.0 / len(G.nodes())) partition = community.best_partition(G) # pos = fa2.forceatlas2_layout(G, iterations=ITERATIONS, nohubs=nohubs, linlog=True) pos = nx.spring_layout(G, iterations=ITERATIONS) colors = [community_colors.get(partition[node], '#000000') for node in G.nodes()] nx.draw_networkx_nodes(G, pos, node_color=colors, node_size=NODE_SIZE) nx.draw_networkx_edges(G, pos, width=EDGE_WIDTH, alpha=EDGE_ALPHA) nx.draw_networkx_labels(G, pos, font_size=NODE_LABEL_FONT_SIZE, alpha=NODE_ALPHA) # nx.draw_networkx(G,pos=pos, node_color=colors) nx.write_gml(G, 'graph.gml') fig = plt.gcf() fig.set_size_inches(OUT_WIDTH / dpi, OUT_HEIGHT / dpi) plt.savefig('fa2.png', dpi=dpi)
def export_topology(topology, filename='topology.gml'): """Write the topology to a file in Graph Modelling Language (GML) format Filenames ending in .bz2 or .gz will be compressed """ nx.write_gml(topology, filename)
def network_layout(gmt_fn, outfn=None): ## make a Graph object and write to gml for Gephi to ## do the layout d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) m = adj_matrix > 0.2 # degrees = adj_matrix.sum(axis=0) adj_matrix = adj_matrix * m.astype(int) G = nx.from_numpy_matrix(adj_matrix) print 'G: ',G.number_of_edges(), G.number_of_nodes() for i in range(adj_matrix.shape[0]): # G.node[i]['size'] = degrees[i] # G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]]) G.node[i]['size'] = G.degree(i) G.node[i]['id'] = umls_ids_kept[i] if outfn is not None: nx.write_gml(G, outfn) return G
def crawl(self, n=None, dump=None): '''Launch crawler on n pages if n != None, otherwise, it stops when all webpages have been explored''' if n != None: self.nPages = n print "Start crawling ", self.startURL while (self.nPages == None and len(self.crawling) > 0) or (self.nPages != None and len(self.crawled) <= self.nPages): self.currentId += 1 if dump != None and (self.currentId)%dump == 0: # Dump intermediary graph in case of crash or interrupt nx.write_gml(self.G, 'graph_%06d.gml' % self.currentId) self.dumpToFile(self.crawled, 'crawled_%06d.p' % self.currentId) self.dumpToFile(self.crawling, 'crawling_%06d.p'% self.currentId) currentURL = self.crawling.pop(0) print "Crawling page %d of %d:"%(self.currentId, len(self.crawling + self.crawled)), currentURL.encode('ascii','xmlcharrefreplace') self.crawled.append(currentURL) # Get a list of new links from the current page dirtyLinks = self.getLinks(currentURL) cleanLinks = self.cleanLinks(dirtyLinks, currentURL) newLinks = list(set(cleanLinks) - set(self.crawling + self.crawled)) self.crawling += newLinks print '%d of %d new links found on the current page'%(len(newLinks), len(cleanLinks)) # Build network self.G = self.updateNetwork(self.G, currentURL, cleanLinks) self.dumpGraph() return
def ministro_ministro(G): """ Cria um grafo de ministros conectados de acordo com a sobreposição de seu uso da legislação Construido a partir to grafo ministro_lei """ GM = nx.Graph() for m in G: try: int(m) except ValueError:# Add only if node is a minister if m != "None": GM.add_node(m.decode('utf-8')) # Add edges for n in GM: for m in GM: if n == m: continue if GM.has_edge(n,m) or GM.has_edge(m,n): continue # Edge weight is the cardinality of the intersection each node neighbor set. w = len(set(nx.neighbors(G,n.encode('utf-8'))) & set(nx.neighbors(G,m.encode('utf-8')))) #encode again to allow for matches if w > 5: GM.add_edge(n,m,{'weight':w}) # abreviate node names GMA = nx.Graph() GMA.add_weighted_edges_from([(o.replace('MIN.','').strip(),d.replace('MIN.','').strip(),di['weight']) for o,d,di in GM.edges_iter(data=True)]) P.figure() nx.draw_spectral(GMA) nx.write_graphml(GMA,'ministro_ministro.graphml') nx.write_gml(GMA,'ministro_ministro.gml') nx.write_pajek(GMA,'ministro_ministro.pajek') nx.write_dot(GMA,'ministro_ministro.dot') return GMA
def main(): #Takes a single GFF input, generates a graph and merges with a pre-existing graph args = get_options() # create directory if it isn't present already if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) args.input_dir = os.path.join(args.input_dir, "") args.output_dir = os.path.join(args.output_dir, "") # Create temporary directory temp_dir = os.path.join(tempfile.mkdtemp(dir=args.output_dir), "") directories = [args.input_dir, temp_dir] gff_file = [args.input_gff] filename = os.path.basename(args.input_gff).split(".")[0] if not args.quiet: print("Processing input") process_prokka_input(gff_list=gff_file, output_dir=temp_dir, filter_seqs=args.filter_invalid, quiet=args.quiet, n_cpu=args.n_cpu, table=args.table) cd_hit_out = temp_dir + "combined_protein_cdhit_out.txt" run_cdhit(input_file=temp_dir + "combined_protein_CDS.fasta", output_file=cd_hit_out, id=args.id, quiet=args.quiet, n_cpu=args.n_cpu) if not args.quiet: print("Generating network") single_gml, centroid_contexts_single, seqid_to_centroid_single = generate_network( cluster_file=cd_hit_out + ".clstr", data_file=temp_dir + "gene_data.csv", prot_seq_file=temp_dir + "combined_protein_CDS.fasta", all_dna=args.all_seq_in_graph) if not args.quiet: print("Reformatting network") reformat_network(single_gml=single_gml, output_dir=temp_dir, isolateName=filename) merge_graphs(directories=directories, temp_dir=temp_dir, len_dif_percent=args.len_dif_percent, pid=args.id, family_threshold=args.family_threshold, length_outlier_support_proportion=args. length_outlier_support_proportion, merge_para=args.merge_paralogs, output_dir=args.output_dir, min_edge_support_sv=args.min_edge_support_sv, aln=args.aln, alr=args.alr, core=args.core, merge_single=True, depths=[1], n_cpu=args.n_cpu, quiet=args.quiet) G = nx.read_gml(args.output_dir + "final_graph.gml") for index, name in enumerate( G.graph['isolateNames'] ): #Corrects isolate name for single gff being returned as list if name == 'x': G.graph['isolateNames'][index] = filename nx.write_gml(G, args.output_dir + "final_graph.gml") #remove temporary directory if dirty = True if not args.dirty: shutil.rmtree(temp_dir) sys.exit(0)
normal_node = n for h in phis: for w in wires: quality_temp = [] bad_meme_num_temp = [] for iter_ in range(run_times): fname = nodescreen_times.format(iter_, n, m, p, percent_bots, w, h, alpha, mu) fp = open(fname) temp = pickle.load(fp) fp.close() quality = np.mean(temp, axis=1).tolist() bad_meme_num = np.sum(temp == 0, axis=1).tolist() quality_temp.append(quality) bad_meme_num_temp.append(bad_meme_num) avg_quality = np.mean(quality_temp, axis=0).tolist() avg_bad_meme_num = np.mean(bad_meme_num_temp, axis=0).tolist() # add attribute to network fname = coupled_gml.format(n, m, p, percent_bots, w, int(h), alpha, str(mu).lower()) g = nx.read_gml(os.path.join(coupled_gml_dir, fname)) for node_ in g.nodes(): isbot = 0 if int(node_) <= normal_node else 1 bad_num = avg_bad_meme_num[int(node_)] avg_qua = avg_quality[int(node_)] g.node[node_]['bot'] = isbot g.node[node_]['badnum'] = bad_num g.node[node_]['avgqua'] = avg_qua nx.write_gml(g, save_dir + '/' + fname[:-4]+'.gml')
# -*- coding: utf-8 -*- """ Created on Thu May 14 10:28:45 2020 @author: 98kes """ import networkx as nx G = nx.DiGraph() Gene = [1, 2, 3, 4] G.add_nodes_from(Gene) G.add_edges_from([(1, 3)], signal='i') G.add_edges_from([(2, 3), (3, 4)], signal='a') nx.write_gml(G, "test.gml") GeneDict = { 1: None, 2: ["3P", "6P", "2P", "3P-6P-2P"], 3: ["3P", "5P", "3P-4P"], 4: None } removeNodeList = [] for key in GeneDict.keys(): if GeneDict[key] != None: removeNodeList.append(key) #for rm in removeNodeList: # for value in GeneDict[rm]:
add(from_user, from_id, to_user, to_id, "retweet") if options.min_subgraph_size or options.max_subgraph_size: g_copy = G.copy() for g in networkx.connected_component_subgraphs(G): if options.min_subgraph_size and len(g) < options.min_subgraph_size: g_copy.remove_nodes_from(g.nodes()) elif options.max_subgraph_size and len(g) > options.max_subgraph_size: g_copy.remove_nodes_from(g.nodes()) G = g_copy if output.endswith(".gexf"): networkx.write_gexf(G, output) elif output.endswith(".gml"): networkx.write_gml(G, output) elif output.endswith(".dot"): nx_pydot.write_dot(G, output) elif output.endswith(".json"): json.dump(to_json(G), open(output, "w"), indent=2) elif output.endswith(".html"): graph_data = json.dumps(to_json(G), indent=2) html = """<!DOCTYPE html> <meta charset="utf-8"> <script src="https://platform.twitter.com/widgets.js"></script> <script src="https://d3js.org/d3.v4.min.js"></script> <script src="https://code.jquery.com/jquery-3.1.1.min.js"></script> <style>
def main(): args = get_options(sys.argv[1:]) # Check cd-hit is installed check_cdhit_version() # make sure trailing forward slash is present args.output_dir = os.path.join(args.output_dir, "") # Create temporary directory temp_dir = os.path.join(tempfile.mkdtemp(dir=args.output_dir), "") # check if input is a file containing filenames if len(args.input_files) == 1: files = [] for line in args.input_files[0]: files.append(open(line.strip(), 'rU')) args.input_files = files if args.verbose: print("pre-processing gff3 files...") # convert input GFF3 files into summary files process_prokka_input(args.input_files, args.output_dir, args.n_cpu) # Cluster protein sequences using cdhit cd_hit_out = args.output_dir + "combined_protein_cdhit_out.txt" run_cdhit(input_file=args.output_dir + "combined_protein_CDS.fasta", output_file=cd_hit_out, id=args.id, s=args.len_dif_percent, n_cpu=args.n_cpu) if args.verbose: print("generating initial network...") # generate network from clusters and adjacency information G, centroid_contexts = generate_network( cluster_file=cd_hit_out + ".clstr", data_file=args.output_dir + "gene_data.csv", prot_seq_file=args.output_dir + "combined_protein_CDS.fasta", all_dna=args.all_seq_in_graph) # merge paralogs if args.verbose: print("Processing paralogs...") G = collapse_paralogs(G, centroid_contexts) # write out pre-filter graph in GML format for node in G.nodes(): G.node[node]['size'] = len(set(G.node[node]['members'])) G.node[node]['genomeIDs'] = ";".join(conv_list( G.node[node]['members'])) G.node[node]['geneIDs'] = ";".join(conv_list(G.node[node]['seqIDs'])) G.node[node]['degrees'] = G.degree[node] for edge in G.edges(): G.edges[edge[0], edge[1]]['genomeIDs'] = ";".join( conv_list(G.edges[edge[0], edge[1]]['members'])) nx.write_gml(G, args.output_dir + "pre_filt_graph.gml") if args.verbose: print("collapse mistranslations...") # clean up translation errors G = collapse_families(G, outdir=temp_dir, dna_error_threshold=0.98, correct_mistranslations=True, n_cpu=args.n_cpu, quiet=(not args.verbose))[0] if args.verbose: print("collapse gene families...") # collapse gene families G, distances_bwtn_centroids, centroid_to_index = collapse_families( G, outdir=temp_dir, family_threshold=args.family_threshold, correct_mistranslations=False, n_cpu=args.n_cpu, quiet=(not args.verbose)) if args.verbose: print("triming contig ends...") # re-trim low support trailing ends G = trim_low_support_trailing_ends(G, min_support=args.min_trailing_support, max_recursive=args.trailing_recursive) # identify possible family level paralogs if args.verbose: print("identifying potentialy highly variable genes...") G = identify_possible_highly_variable( G, cycle_threshold_max=20, cycle_threshold_min=args.cycle_threshold_min, size_diff_threshold=0.5) if args.verbose: print("refinding genes...") # find genes that Prokka has missed G = find_missing(G, args.input_files, dna_seq_file=args.output_dir + "combined_DNA_CDS.fasta", prot_seq_file=args.output_dir + "combined_protein_CDS.fasta", gene_data_file=args.output_dir + "gene_data.csv", remove_by_consensus=args.remove_by_consensus, search_radius=args.search_radius, prop_match=args.refind_prop_match, pairwise_id_thresh=args.id, merge_id_thresh=max(0.8, args.family_threshold), n_cpu=args.n_cpu) # remove edges that are likely due to misassemblies (by consensus) # merge again in case refinding has resolved issues G = collapse_families(G, outdir=temp_dir, family_threshold=args.family_threshold, correct_mistranslations=False, n_cpu=args.n_cpu, quiet=(not args.verbose), distances_bwtn_centroids=distances_bwtn_centroids, centroid_to_index=centroid_to_index)[0] if args.clean_edges: G = clean_misassembly_edges( G, edge_support_threshold=args.edge_support_threshold) # if requested merge paralogs if args.merge_paralogs: G = merge_paralogs(G) isolate_names = [ os.path.splitext(os.path.basename(x.name))[0] for x in args.input_files ] G.graph['isolateNames'] = isolate_names mems_to_isolates = {} for i, iso in enumerate(isolate_names): mems_to_isolates[str(i)] = iso if args.verbose: print("writing output...") # write out roary like gene_presence_absence.csv # get original annotaiton IDs, lengts and whether or # not an internal stop codon is present orig_ids = {} ids_len_stop = {} with open(args.output_dir + "gene_data.csv", 'r') as infile: next(infile) for line in infile: line = line.split(",") orig_ids[line[2]] = line[3] ids_len_stop[line[2]] = (len(line[4]), "*" in line[4][1:-3]) G = generate_roary_gene_presence_absence(G, mems_to_isolates=mems_to_isolates, orig_ids=orig_ids, ids_len_stop=ids_len_stop, output_dir=args.output_dir) #Write out presence_absence summary generate_summary_stats(output_dir=args.output_dir) # add helpful attributes and write out graph in GML format for node in G.nodes(): G.node[node]['size'] = len(set(G.node[node]['members'])) G.node[node]['genomeIDs'] = ";".join(conv_list( G.node[node]['members'])) G.node[node]['geneIDs'] = ";".join(conv_list(G.node[node]['seqIDs'])) G.node[node]['degrees'] = G.degree[node] for edge in G.edges(): G.edges[edge[0], edge[1]]['genomeIDs'] = ";".join( conv_list(G.edges[edge[0], edge[1]]['members'])) nx.write_gml(G, args.output_dir + "final_graph.gml") # write pan genome reference fasta file generate_pan_genome_reference(G, output_dir=args.output_dir, split_paralogs=False) # write out csv indicating the mobility of each gene # generate_gene_mobility(G, output_dir=args.output_dir) # write out common structural differences in a matrix format generate_common_struct_presence_absence( G, output_dir=args.output_dir, mems_to_isolates=mems_to_isolates, min_variant_support=args.min_edge_support_sv) #Write out core/pan-genome alignments if args.aln == "pan": if args.verbose: print("generating pan genome MSAs...") generate_pan_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu, args.alr, isolate_names) core_nodes = get_core_gene_nodes(G, args.core, len(args.input_files)) concatenate_core_genome_alignments(core_nodes, args.output_dir) elif args.aln == "core": if args.verbose: print("generating core genome MSAs...") generate_core_genome_alignment(G, temp_dir, args.output_dir, args.n_cpu, args.alr, isolate_names, args.core, len(args.input_files)) # remove temporary directory shutil.rmtree(temp_dir) return
add = random.choice(range(size_layers[i])) edges[layers[i][add]].extend([j]) for i in range(0, juming_neurons): layer = random.choice(range(2, number_of_layers - 1)) from_layer = random.choice(range(0, layer - 1)) first_neuron = random.choice(layers[layer]) second_layer = random.choice(layers[from_layer]) edges[first_neuron].extend([second_layer]) #count layers for i in range(number_of_nodes): model.add_node(i) if node_layers[i] == 0: continue for node_edge in edges[i]: model.add_edge(i, node_edge) f = open("./generated input/genr" + str(n * 50) + "nor.txt", "w") for i in range(200): inp = numpy.random.randint(1, 4, size=inputsize) out = numpy.random.randint(1, 2, size=outputsize) f.write(numpy.array2string(inp, max_line_width=(2 * inputsize + 1))) f.write(" ") f.write(numpy.array2string(out, max_line_width=(2 * outputsize + 1))) f.write("\n") f.close() nx.write_gml(model, "./generated graphs/genr" + str(n * 50) + "nor.gml")
def writeGML( graph, fname ) : nx.write_gml (graph, fname)
# Convert it into a undirected graph g = nx.DiGraph() cx = adj_matrix.tocoo() for i, j, val in zip(cx.row, cx.col, cx.data): if val > 0: g.add_edge(str(i), str(j)) assert g.number_of_nodes() == N, "There is an error, the number of nodes mismatched, {} != {}".format(N, g.number_of_nodes()) assert g.number_of_edges() == E, "There is an error, the number of edges mismatched, {} != {}".format(E, g.number_of_edges()) if directed is False: g = g.to_undirected() print("The graph was converted into undirected!") print("Number of nodes: {}".format(g.number_of_nodes())) print("Number of edges: {}".format(g.number_of_edges())) # Read clusters values = {node: [] for node in g.nodes()} cx = cluster_matrix.tocoo() for i, k, val in zip(cx.row, cx.col, cx.data): if val > 0: values[str(i)].append(str(k)) nx.set_node_attributes(g, name="clusters", values=values) # Finally, save the file nx.write_gml(g, output_file)
# For the given node, add the domain to the correct list and increase # the correct counter def update_list(C, index, list_name, counter_name, domain): try: C.node[index][list_name].append(domain) C.node[index][counter_name] += 1 except KeyError: C.node[index][list_name] = [domain] C.node[index][counter_name] = 1 # Store list of sites, cookies and scripts in each community node def add_labels(C, partitions): for domain in partitions: update_list(C, partitions[domain], 'domains', 'domaincount', domain) if __name__ == '__main__': print("Reading in graph file.") [G, types] = pickle.load(open('output/undirectional_hyperlink.graph', 'rb')) dendrogram = community.generate_dendrogram(G) partitions = community.partition_at_level(dendrogram, 0) print(community.modularity(partitions, G)) NX = community.induced_graph(partitions, G) add_labels(NX, partitions) NX.remove_edges_from(NX.selfloop_edges()) nx.write_gml(NX, 'smallest_communities.gml')
def writeGraph(G, output_file, graph_type, file_format='autodetect'): """Write a graph to a file Parameters ----------- G : networkx.Graph (or similar) output_file: file object the output file to which the graph is written. If it is a string then the graph is written to a file with that string as filename. Otherwise if ``output_file`` is a file object (or a text stream), the graph is written there. graph_type: string in {"simple","digraph","dag","bipartite"} see also :py:func:`cnfformula.graph.supported_formats` file_format: string, optional The file format that the parser should expect to receive. See also :py:func:`cnfformula.graph.supported_formats`. By default it tries to autodetect it from the file name extension (when applicable). Returns ------- None Raises ------ ValueError raised when either ``output_file`` is neither a file object nor a string, or when ``graph_type`` and ``file_format`` are invalid choices. IOError it is impossible to write on the ``output_file`` See Also -------- readGraph """ # file name instead of file object if isinstance(output_file, (str, unicode)): with open(output_file, 'w') as file_handle: return writeGraph(G, file_handle, graph_type, file_format) _, file_format = _process_graph_io_arguments(output_file, graph_type, file_format, False) if file_format == 'dot': find_write_dot()(G, output_file) elif file_format == 'gml': networkx.write_gml(G, output_file) elif file_format == 'kthlist': _write_graph_kthlist_format(G, output_file, bipartition=(graph_type == 'bipartite')) elif file_format == 'dimacs': _write_graph_dimacs_format(G, output_file) elif file_format == 'matrix': _write_graph_matrix_format(G, output_file) else: raise RuntimeError( "[Internal error] Format {} not implemented".format(file_format))
network_data = np.genfromtxt(network_file, delimiter=',',dtype=('U100', 'U100', float)) weights=[] G = nx.DiGraph() for line in network_data: left_node = line[0] right_node = line[1] weight = float(line[2]) weights.append(weight) G.add_edge(left_node, right_node, weight= weight) pos=nx.spring_layout(G) colors = np.array(weights) cmap = plt.get_cmap('hsv') vmin = min(colors) vmax = max(colors) nx.draw(G, pos, node_color='grey', edge_color=colors, width=2, edge_cmap=cmap,node_size=10, with_labels=False, font_size=8, vmin=vmin, vmax=vmax) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax)) sm._A = [] plt.colorbar(sm) for key in pos.keys(): x,y = pos[key] plt.text(x,y+0.1,s=key, fontsize=6, bbox=dict(facecolor='red', alpha=0.5),horizontalalignment='center') nx.write_gml(G, "weighted-network-all.gml") plt.show()
for val, num_id in sorted(G.get_symtab().items(), key=operator.itemgetter(1)): GML.node[num_id]['label'] = remove_chars(val) GML.node[num_id]['gen_id'] = num_id if labels: for edge, val in sorted(G.get_edge_labels().items(), key=operator.itemgetter(0)): for k, v in val.iteritems(): GML.edge[edge[0]][edge[1]][k] = remove_chars(v) for vertex_id, val in sorted(G.get_node_labels().items(), key=operator.itemgetter(0)): for k, v in val.iteritems(): GML.node[vertex_id][k] = remove_chars(v) nx.write_gml(GML.to_undirected(), output) return output def write_dimacs_like(G, output, symtab=True, labels=True, gtfs_filename='', descriptor='', edge_char='e ', isolated=False): output.write( 'c Contains a public transit graph extracted from GTFS file\n') output.write('c original_input_file: %s\n' % gtfs_filename) if symtab:
def write_to_gml(Graph,filename): nx.write_gml(Graph,filename)
def layout(graph, ref_g_id, cut_edges_out, ignore_high_var, add_reference_edges): g = nx.read_gml(graph) #look up table for name vs node id mapping = create_mapping(g, ref_g_id) gene_order = [ int(mapping.loc[n, "gene_id"].split("_")[2]) for n in mapping.index ] max_dist = max(gene_order) if ignore_high_var: g = remove_var_edges(g) if add_reference_edges: g = add_ref_edges(g, mapping) #write gml with reference edges to disk to be used in cytoscape instead of the original final_graph.gml nx.write_gml(g, "with_ref_" + graph) name_dict = dict([(G.nodes[n]['name'], n) for n in list(g)]) #set capacity for edges for the min cut algorithm as the weight of that edge for e in g.edges: try: g.edges[e]["capacity"] = g.edges[e]["weight"] except: g.edges[e]["capacity"] = 1 #store edges to be taken out of the graph cut_edges = [] i = 0 cur_try = 0 #iterate over all reference nodes in mapping table while i < len(mapping.index): n = mapping.index[i] print(i) if n not in name_dict: i += 1 continue nid = name_dict[n] visited = set([nid]) sink = {"sink": None} queue = add_to_queue(g, nid, g.neighbors(nid), visited, sink, mapping, ref_g_id, max_dist) #depth first search last_target = None while len(queue) != 0: target = queue.pop(0) visited.add(target) neighbors = g.neighbors(target) #for each reference node explore all edges that lead to non-reference nodes queue = queue + add_to_queue(g, nid, neighbors, visited, sink, mapping, ref_g_id, max_dist) last_target = None #did we find a long-range connection? if sink["sink"] is not None: print("found path") visited.add(sink["sink"]) s_t_graph = function.induced_subgraph(g, visited) s_t_graph = nx.Graph(s_t_graph) #the induced graph could contain reference edges which need to be removed remove = [] for e in s_t_graph.edges: if ref_g_id in G.nodes[e[0]]['genomeIDs'].split(";") \ and ref_g_id in G.nodes[e[1]]['genomeIDs'].split(";"): g2g1 = dict([(j.split("_")[0], j.split("_")[1]) for j in G.nodes[e[0]]['geneIDs'].split(";")]) g2g2 = dict([(j.split("_")[0], j.split("_")[1]) for j in G.nodes[e[1]]['geneIDs'].split(";")]) if g2g1[ref_g_id] == "refound" or g2g2[ ref_g_id] == "refound": continue else: n1 = mapping.loc[G.nodes[e[0]]["name"]][0] n2 = mapping.loc[G.nodes[e[1]]["name"]][0] if abs(int(n1.split("_")[2]) - int(n2.split("_")[2])) < 100: remove.append(e) s_t_graph.remove_edges_from(remove) #print some info about that long-range connection #print(n) #print(nid, sink["sink"]) #min cut between the two reference nodes cut = [] cut_weight, partitions = nx.algorithms.flow.minimum_cut( s_t_graph, nid, sink["sink"]) for p1_node in partitions[0]: for p2_node in partitions[1]: if s_t_graph.has_edge(p1_node, p2_node): cut.append((p1_node, p2_node)) #cardinality cut TODO make this an option #cut = cuts.minimum_edge_cut(s_t_graph, nid, sink["sink"]) for e in cut: print(G.nodes[e[0]]['name'], G.nodes[e[1]]['name']) cut_edges.append(e) #delete cut edges from the graph if len(cut) == 0: #something happened as no min cut can be found i += 1 sys.exit( "no min cut could be found; sorry this shouldn't happen") g.remove_edges_from(cut) sink["sink"] = None #there may be more paths from that node -> apply again on the same node else: #all nodes explored; move on i += 1 sink["sink"] = None #write cut edges to disk with open(cut_edges_out, "w") as f: f.write("shared name\tis_cut_edge\n") for e in cut_edges: f.write("%s (interacts with) %s\t1\n" % (e[0], e[1])) f.write("%s (interacts with) %s\t1\n" % (e[1], e[0]))
def construct_flow_graph_2(state: State, query: IQueryObjects, filt: Union[str, dict], format: str = "visjs"): """ Prepare a graph from which conclusions about factors can be extracted Example: 1) Obtain "s", the serialized state from Redis or from a test file 2) state = deserialize_state(s) 3) query = BasicQuery(state) # Create a Query and execute a query 4) construct_solve_graph(state, query, None) :param state: State :param query: A IQueryObjects instance (which has been already injected the state) :param filt: A filter to be passed to the query instance :param format: VisJS, GML, ... :return: """ include_processors = False # For completeness (not clarity...), include processors nodes, as a way to visualize grouped factors will_write = True # For debugging purposes, affects how the properties attached to nodes and edges are elaborated expand_factors_graph = False # Expand transformation between FactorTypes into instances of Factors # Format for different node types stated_factor_no_observation = dict(graphics={'fill': "#999900"}) # Golden stated_factor_some_observation = dict(graphics={'fill': "#ffff00"}) # Yellow qq_attached_to_factor = dict(graphics={ 'fill': "#eeee00", "type": "ellipse" }) # Less bright Yellow non_stated_factor = dict(graphics={'fill': "#999999"}) a_processor = dict(graphics={"type": "hexagon", "color": "#aa2211"}) # Format for different edge types edge_from_factor_type = dict(graphics={ "fill": "#ff0000", "width": 1, "targetArrow": "standard" }) edge_processor_to_factor = dict(graphics={ "fill": "#ff00ff", "width": 3, "targetArrow": "standard" }) edge_factors_flow = dict(graphics={ "fill": "#000000", "width": 5, "targetArrow": "standard" }) edge_factors_scale = dict(graphics={ "fill": "#333333", "width": 3, "targetArrow": "standard" }) edge_factors_relative_to = dict(graphics={ "fill": "#00ffff", "width": 3, "targetArrow": "standard" }) edge_factor_value = dict(graphics={ "fill": "#aaaaaa", "width": 1, "targetArrow": "standard" }) glb_idx, p_sets, hh, datasets, mappings = get_case_study_registry_objects( state) # Obtain the information needed to elaborate the graph objs = query.execute([ Processor, Factor, FactorType, FactorTypesRelationUnidirectionalLinearTransformObservation, FactorsRelationScaleObservation, FactorsRelationDirectedFlowObservation ], filt) # 1) Graphical Representation: BOX -- BOX # # 2) Internal (not for end-users), pseudo-code: # # Processor1 <- Factor1 -> FactorType0 # Processor2 <- Factor2 -> FactorType0 # Processor3 <- Factor3 -> FactorType1 # Processor3 <- Factor4 -> FactorType0 # Factor1 <- FactorsRelationDirectedFlowObservation(0.4) -> Factor2 # Factor1 <- FactorsRelationDirectedFlowObservation(0.6) -> Factor4 # Factor1 <- FactorQuantitativeObservation(5.3 m²) # FactorType0 -> FactorTypesRelationUnidirectionalLinearTransformObservation(ctx) -> FactorType1 # Factor4 -> w1 -> Factor3 # Factor5 -> w2 -> Factor3 # # Index quantitative observations. # Also, mark Factors having QQs (later this will serve to color differently these nodes) qqs = {} qq_cont = 0 factors_with_some_observation = set() for o in find_quantitative_observations(glb_idx): # Index quantitative observations. if "relative_to" in o.attributes and o.attributes["relative_to"]: continue # Do not index intensive quantities, because they are translated as edges in the graph if o.factor in qqs: lst = qqs[o.factor] else: lst = [] qqs[o.factor] = lst lst.append(o) # Mark Factors having QQs (later this will serve to color differently these nodes) factors_with_some_observation.add(o.factor) # ---- MAIN GRAPH: Factors and relations between them -------------------------------------------------------------- the_node_names_set = set() # -- Nodes: "Factor"s passing the filter, and QQs associated to some of the Factors n = [] e = [] f_types = {} # Contains a list of Factors for each FactorType p_factors = {} # Contains a list of Factors per Processor7 rel_to_observations = set( ) # Set of FactorObservation having "relative_to" property defined factors = create_dictionary() # Factor_ID -> Factor for f in objs[Factor]: f_id = get_factor_id(f, prd=glb_idx) factors[f_id] = f # Dictionary Factor_ID -> Factor # f_types if f.taxon in f_types: lst = f_types[f.taxon] else: lst = [] f_types[f.taxon] = lst lst.append(f) # p_factors if f.processor in p_factors: lst = p_factors[f.processor] else: lst = [] p_factors[f.processor] = lst lst.append(f) # Add Node to graph the_node_names_set.add(f_id) if will_write: n.append((f_id, stated_factor_some_observation if f in factors_with_some_observation else stated_factor_no_observation)) if f in qqs: for qq in qqs[f]: if not ("relative_to" in qq.attributes and qq.attributes["relative_to"]): # value = str(qq.value) # str(qq_cont) + ": " + str(qq.value) value_node_name = f_id + " " + str(qq.value) n.append((value_node_name, qq_attached_to_factor)) e.append((value_node_name, f_id, { "w": "", "label": "", **edge_factor_value })) qq_cont += 1 else: rel_to_observations.add(qq) else: qqs2 = [ qq for qq in qqs if not ("relative_to" in qq.attributes and qq.attributes["relative_to"]) ] d = dict(factor=factor_to_dict(f), observations=qqs[f_id] if f_id in qqs2 else []) n.append((f_id, d)) # -- Edges # "Relative to" relation (internal to the Processor) -> Intensive to Extensive for o in rel_to_observations: if "relative_to" in o.attributes and o.attributes["relative_to"]: # Parse "defining_factor", it can be composed of the factor name AND the unit defining_factor = o.attributes["relative_to"] ast = parser_field_parsers.string_to_ast( parser_field_parsers.factor_unit, defining_factor) factor_type = ast_to_string(ast["factor"]) unit_name = ast["unparsed_unit"] ureg(unit_name) f_id = get_factor_id(o.factor, prd=glb_idx) # Check that "f_id" exists in the nodes list (using "factors") factors[f_id] # If "defining_factor" exists in the processor, ok. If not, create it. # Find factor_type in the processor factor_name = get_processor_id( o.factor.processor) + ":" + factor_type factors[factor_name] e.append((factor_name, f_id, { "w": o.value.expression, "label": o.value.expression, **edge_factors_relative_to })) # Directed Flows between Factors for df in objs[FactorsRelationDirectedFlowObservation]: sf = get_factor_id(df.source_factor, prd=glb_idx) tf = get_factor_id(df.target_factor, prd=glb_idx) # Check that both "sf" and "tf" exist in the nodes list (using "factors") factors[sf] factors[tf] weight = df.weight if df.weight else "1" e.append((sf, tf, {"w": weight, "label": weight, **edge_factors_flow})) # Scale Flows between Factors for df in objs[FactorsRelationScaleObservation]: sf = get_factor_id(df.origin, prd=glb_idx) tf = get_factor_id(df.destination, prd=glb_idx) # Check that both "sf" and "tf" exist in the nodes list (using "factors") factors[sf] factors[tf] weight = str(df.quantity) if df.quantity else "1" e.append((sf, tf, { "w": weight, "label": weight, **edge_factors_scale })) # TODO Consider Upscale relations # e.append((..., ..., {"w": upscale_weight, "label": upscale_weight, **edge_factors_upscale})) # -- Create the graph factors_graph = nx.DiGraph() factors_graph.add_nodes_from(n) factors_graph.add_edges_from(e) # nx.write_gml(factors_graph, "/home/rnebot/IntermediateGraph.gml") # ---- AUXILIARY GRAPH: FACTOR TYPES AND THEIR INTERRELATIONS ---- n = [] e = [] # -- Nodes: "FactorType"s passing the filter for ft in objs[FactorType]: n.append((get_factor_type_id(ft), dict(factor_type=ft))) # -- Edges # Hierarchy and expressions stated in the hierarchy ft_in = { } # Because FactorTypes cannot be both in hierarchy AND expression, marks if it has been specified one was, to raise an error if it is specified also the other way for ft in objs[FactorType]: ft_id = get_factor_type_id(ft) if ft.expression: if ft not in ft_in: # TODO Create one or more relations, from other FactorTypes (same Hierarchy) to this one # TODO The expression can only be a sum of FactorTypes (same Hierarchy) ft_in[ft] = "expression" # TODO Check that both "ft-id" and "..." exist in the nodes list (keep a temporary set) # weight = ... # e.append((ft_id, ..., {"w": weight, "label": weight, "origin": ft, "destination": ...})) if ft.parent: if ft.parent not in ft_in or (ft.parent in ft_in and ft_in[ft.parent] == "hierarchy"): # Create an edge from this FactorType ft_in[ft.parent] = "hierarchy" parent_ft_id = get_factor_type_id(ft.parent) # TODO Check that both "ft-id" and "parent_ft_id" exist in the nodes list (keep a temporary set) # Add the edge e.append((ft_id, parent_ft_id, { "w": "1", "origin": ft, "destination": ft.parent })) else: raise Exception( "The FactorType '" + ft_id + "' has been specified by an expression, it cannot be parent." ) # Linear transformations for f_rel in objs[ FactorTypesRelationUnidirectionalLinearTransformObservation]: origin = get_factor_type_id(f_rel.origin) destination = get_factor_type_id(f_rel.destination) e.append((origin, destination, { "w": f_rel.weight, "label": f_rel.weight, "origin": f_rel.origin, "destination": f_rel.destination })) # ---- Create FACTOR TYPES graph ---- factor_types_graph = nx.DiGraph() factor_types_graph.add_nodes_from(n) factor_types_graph.add_edges_from(e) # ---- EXPAND "FACTORS GRAPH" with "FACTOR TYPE" RELATIONS ---- sg_list = [] # List of modified (augmented) subgraphs if expand_factors_graph: # The idea is: clone a FactorTypes subgraph if a Factor instances some of its member nodes # This cloning process can imply creating NEW Factors the_new_node_names_set = set() # Obtain weak components of the main graph. Each can be considered separately # for sg in nx.weakly_connected_component_subgraphs(factors_graph): # For each subgraph # print("--------------------------------") # for n in sg.nodes(): # print(n) # ---- Weakly connected components of "factor_types_graph" ---- factor_types_subgraphs = list( nx.weakly_connected_component_subgraphs(factor_types_graph)) for sg in nx.weakly_connected_component_subgraphs( factors_graph): # For each subgraph sg_list.append(sg) # Consider each Factor of the subgraph unprocessed_factors = set(sg.nodes()) while unprocessed_factors: # For each UNPROCESSED Factor tmp = unprocessed_factors.pop( ) # Get next unprocessed "factor name" if tmp not in factors: # QQ Observations are in the graph and not in "factors". The same with Processors continue f_ = factors[tmp] # Obtain Factor from "factor name" ft_id = get_factor_type_id( f_) # Obtain FactorType name from Factor # Iterate through FactorTypes and check if the Factor appears for sg2 in factor_types_subgraphs: # Each FactorTypes subgraph if ft_id in sg2: # If the current Factor is in the subgraph if len( sg2.nodes() ) > 1: # If the FactorType subgraph has at least two nodes # CLONE FACTOR TYPES SUBGRAPH # Nodes. Create if not present already n = [] e = [] for n2, attrs in sg2.nodes().items( ): # Each node in the FactorTypes subgraph ft_ = attrs["factor_type"] f_id = get_factor_id(f_.processor, ft_, prd=glb_idx) if f_id not in sg: # If the FactorType is not # Create Factor, from processor and ft_ -> f_new _, _, f_new = find_or_create_observable( state, name=f_id, source="solver") factors[f_id] = f_new if f_id not in the_node_names_set: if will_write: n.append((f_id, non_stated_factor)) else: d = dict( factor=factor_to_dict(f_new), observations=[]) n.append((f_id, d)) if f_id not in the_node_names_set: the_new_node_names_set.add(f_id) the_node_names_set.add(f_id) else: unprocessed_factors.discard(f_id) # Edges. Create relations between factors for r2, w_ in sg2.edges().items(): # Find origin and destination nodes. Copy weight. Adapt weight? If it refers to a FactorType, instance it? origin = get_factor_id(f_.processor, w_["origin"], prd=glb_idx) destination = get_factor_id(f_.processor, w_["destination"], prd=glb_idx) if origin in the_new_node_names_set or destination in the_new_node_names_set: graphics = edge_from_factor_type else: graphics = {} e.append((origin, destination, { "w": w_["w"], "label": w_["w"], **graphics })) sg.add_nodes_from(n) sg.add_edges_from(e) break # for sg in sg_list: # print("--------------------------------") # for n in sg.nodes(): # print(n) # Recompose the original graph if sg_list: factors_graph = nx.compose_all(sg_list) else: pass ##factors_graph = nx.DiGraph() # ---- # Add "Processor"s just as a way to visualize grouping of factors (they do not influence relations between factors) # - if include_processors: n = [] e = [] for p in objs[Processor]: p_id = get_processor_id(p) if will_write: n.append((p_id, a_processor)) else: n.append((p_id, processor_to_dict(p))) # Edges between Processors and Factors for f in p_factors[p]: f_id = get_factor_id(f, prd=glb_idx) e.append((p_id, f_id, edge_processor_to_factor)) factors_graph.add_nodes_from(n) factors_graph.add_edges_from(e) # # for ft in objs[FactorType]: # if ft.parent: # # Check which Factors are instances of this FactorType # if ft in f_types: # for f in f_types[ft]: # # Check if the processor contains the parent Factor # processor_factors = p_factors[f.processor] # if ft.parent not in processor_factors: # factor_data = (f.processor, ft) # else: # factor_data = None # create_factor = f in qqs # If there is some Observation # create_factor = True # Force creation # # # # Consider the creation of a relation # # Consider also the creation of a new Factor (a new Node for now): if the child has some observation for sure (maybe a child of the child had an observation, so it is the same) # ft_id = # ft_id = # Plot graph to file # import matplotlib.pyplot as plt # ax = plt.subplot(111) # ax.set_title('Soslaires Graph', fontsize=10) # nx.draw(factors_graph, with_labels=True) # plt.savefig("/home/rnebot/Graph.png", format="PNG") # GML File # nx.write_gml(factors_graph, "/home/rnebot/Graph.gml") ret = None if format == "visjs": # Assign IDs to nodes. Change edges "from" and "to" accordingly ids_map = create_dictionary() id_count = 0 for node in factors_graph.nodes(data=True): sid = str(id_count) node[1]["id"] = sid ids_map[node[0]] = sid id_count += 1 vis_nodes = [] vis_edges = [] for node in factors_graph.nodes(data=True): d = dict(id=node[1]["id"], label=node[0]) if "shape" in node[1]: # circle, ellipse, database, box, diamond, dot, square, triangle, triangleDown, text, star d["shape"] = node[1]["shape"] else: d["shape"] = "box" if "color" in node[1]: d["color"] = node[1]["color"] vis_nodes.append(d) for edge in factors_graph.edges(data=True): f = ids_map[edge[0]] t = ids_map[edge[1]] d = {"from": f, "to": t, "arrows": "to"} data = edge[2] if "w" in data: d["label"] = data["w"] d["font"] = {"align": "horizontal"} vis_edges.append(d) ret = str({"nodes": vis_nodes, "edges": vis_edges}) elif format == "gml": ret1 = io.BytesIO() nx.write_gml(factors_graph, ret1) ret = ret1.getvalue() ret1.close() return ret # #########################################################################3 # GEXF File # nx.write_gexf(factors_graph, "/home/rnebot/Graph.gexf") # Legend graph n = [] e = [] n.append(("Factor with Observation", stated_factor_some_observation)) n.append(("Factor with No Observation", stated_factor_no_observation)) if include_processors: n.append(("Processor", a_processor)) n.append(("Factor from FactorType", non_stated_factor)) n.append(("QQ Observation", qq_attached_to_factor)) n.append(("QQ Intensive Observation", qq_attached_to_factor)) e.append(("A Factor", "Another Factor", { "label": "Flow between Factors, attaching the weight", **edge_factors_flow })) e.append(("Factor #1", "Factor #2", { "label": "Relation from a FactorType", **edge_from_factor_type })) if include_processors: e.append(("Processor", "A Factor", { "label": "Link from Processor to Factor", **edge_processor_to_factor })) e.append(("A Factor", "Same Factor in another processor", { "label": "Upscale a Factor in two processors", **edge_factors_upscale })) e.append(("Factor with Observation", "QQ Intensive Observation", { "label": "Observation proportional to extensive value of factor same processor", **edge_factors_relative_to })) e.append(("QQ Observation", "A Factor", { "label": "A QQ Observation", **edge_factor_value })) factors_graph = nx.DiGraph() factors_graph.add_nodes_from(n) factors_graph.add_edges_from(e)
print("网络共被分为 ", len(rs), " 个类") i = 1 #聚类完成,为每一个节点更新标签 for g in rs: for n in g.node: net.nodes[n + 1]['class'] = i print(n + 1, end=" "), print("\n") i += 1 color = [net.nodes[i + 1]['class'] for i in range(len(net.node))] #设定节点的颜色 networkx.draw(net, with_labels=True, node_color=color) plt.show() networkx.write_gml(net, 'karate_group.gml') print("") """ 模拟数据 """ testGraph() for k2 in [2, 4, 6, 8]: x = [] y = [] for I in range(50): Ggraph, Ggroup = generateNetwork(k2) rs = BDA(Ggraph, 0.15 + I * 0.5 / 50) nmia = [] for g in range(len(rs)): for i in rs[g].node:
import networkx as nx import os import matplotlib.pyplot as plt import snowball_sampling # 如果文件存在,则删除 def remove_file(filename): if os.path.exists(filename): os.remove(filename) dump_file_base = 'dumped_graph' G = nx.krackhardt_kite_graph() # GML格式写入和读取 GML_file = dump_file_base + '.gml' remove_file(GML_file) nx.write_gml(G, GML_file) G2 = nx.read_gml(GML_file) # 确保两者是相同的 # assert(G.edges() == G2.edges()) nx.draw_networkx(G2) plt.show()
def exportGraph(): global graphs for graph in graphs: nx.write_gml(graphs[graph], "parseGraphs/{0}.gml".format(unTokenName(graph)))
for j in range(0,30): SquareX = (Grid.node[i]['xcoord']-Grid.node[j]['xcoord'])**2 SquareY = (Grid.node[i]['ycoord']-Grid.node[j]['ycoord'])**2 DistanceDF.loc[j+(i*30)] = [i,j, math.sqrt(SquareX+SquareY)] #k connections for i in range(0,30): Subframe = DistanceDF.loc[DistanceDF['From']==i] Subframe = Subframe[Subframe.To != i] Subframe = Subframe[Subframe.To <=29] Connect = Subframe.nsmallest(NWS_k,'Distance') for index, row in Connect.iterrows(): if (int(row['To']) != i) and (Grid.has_edge(i, int(row['To']))==False): Grid.add_edge(i, int(row['To']), key=1, length = float(row['Distance']), Type = "Road") #p connections for i in range(0,30): j=i while j < 30: rand = np.random.uniform(0,1) if (rand <= NWS_p) and (i!=j) and (Grid.has_edge(i,j)==False): Grid.add_edge(i,j,key=1, length=float(math.sqrt((Grid.node[i]['xcoord']-Grid.node[j]['xcoord'])**2+(Grid.node[i]['ycoord']-Grid.node[j]['ycoord'])**2)), Type="Road") j=j+1 #connecting generators for i in range(0,len(Grid.nodes)): if 'gen' in Grid.node[i]['name']: Grid.add_edge(i, list(Grid.neighbors(i))[0],key=1, length = 0, Type="Road") nx.draw(Grid) selected_edges = [(u,v) for u,v,e in Grid.edges(data=True) if e['Type'] == 'Road'] H = nx.Graph(selected_edges) nx.draw(H) nx.write_gml(Grid, "Bus30WithDataMK2.gml")
seed_species=exp_data.species.sig.id_list, # genes seed species all_measured_list=exp_data.species.id_list, # all data measured use_biogrid=True, # expand with biogrid use_hmdb=True, # expand with hmdb use_reactome=True, # expand with reactome use_signor=True, # expand with signor trim_source_sink=True, # remove all source and sink nodes not measured save_name='Data/cisplatin_based_network_new' ) # Load the network, note that it is returned above but for future use # we will use load in network = nx.read_gpickle('Data/cisplatin_based_network.p') utils.add_data_to_graph(network, exp_data) print("Saving network") # write to GML for cytoscape or other program nx.write_gml( network, os.path.join(os.path.dirname(__file__), 'Data', 'cisplatin_network_w_attributes.gml') ) # write to gpickle for fast loading in python nx.write_gpickle( network, os.path.join(os.path.dirname(__file__), 'Data', 'cisplatin_based_network.p'), )
def _write_gml(G, path): """ Wrapper around nx.write_gml """ import networkx as nx return nx.write_gml(G, path, stringizer=str)
import os import networkx as nx # polblogs # add "multigraph 1" to polblogs.gml in advance. if not os.path.exists('polblogs/giant_component.gml'): g = nx.read_gml('polblogs/polblogs.gml', label='id') g = g.to_undirected() components = nx.connected_components(g) giant_component = nx.Graph(g.subgraph(max(components, key=len))) nx.write_gml(giant_component, 'polblogs/giant_component.gml') # polbooks if not os.path.exists('polbooks/giant_component.gml'): g = nx.read_gml('polbooks/polbooks.gml', label='id') mapping = dict(l=0, n=1, c=2) for n in g: g.nodes[n]['value'] = mapping[g.nodes[n]['value']] components = nx.connected_components(g) giant_component = nx.Graph(g.subgraph(max(components, key=len))) nx.write_gml(giant_component, 'polbooks/giant_component.gml') # cora if not os.path.exists('cora/giant_component.gml'): g = nx.Graph() vertices = [] with open('cora/cora.node_labels', 'r') as f: for line in f.readlines(): line = line.strip() vertex, label = line.split(',') vertices.append((int(vertex)-1, {'value': int(label)}))
delimiter=',') communities, graph, Q = phenograph.cluster(data) numpy.savetxt( '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/Communities/Bendall.fcs.csv', communities, delimiter=",") mmwrite( '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/MTX/Bendall.fcs.graph.mtx', graph) text_file = open( '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/Q/Bendall.fcs.Q.txt', "w") text_file.write(str(Q)) text_file.close() G = networkx.Graph(graph) networkx.write_pajek( G, path= '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/NET/Bendall.fcs.graph.net', encoding='UTF-8') networkx.write_gml( G, path= '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/GML/Bendall.fcs.graph.gml' ) networkx.write_edgelist( G, path= '/home/developer/Data/Bendall/CUR/BHT.SNE.2D/PhenoGraph/Edgelist/Bendall.fcs.graph.edgelist', delimiter='\t')
get_ipython().run_cell_magic('javascript', '', '// We load the d3.js library from the Web.\nrequire.config({paths:\n {d3: "http://d3js.org/d3.v3.min"}});\nrequire(["d3"], function(d3) {\n // The code in this block is executed when the\n // d3.js library has been loaded.\n\n // First, we specify the size of the canvas\n // containing the visualization (size of the\n // <div> element).\n var width = 300, height = 300;\n\n // We create a color scale.\n var color = d3.scale.category10();\n\n // We create a force-directed dynamic graph layout.\n var force = d3.layout.force()\n .charge(-120)\n .linkDistance(30)\n .size([width, height]);\n\n // In the <div> element, we create a <svg> graphic\n // that will contain our interactive visualization.\n var svg = d3.select("#d3-example").select("svg")\n if (svg.empty()) {\n svg = d3.select("#d3-example").append("svg")\n .attr("width", width)\n .attr("height", height);\n }\n\n // We load the JSON file.\n d3.json("graph.json", function(error, graph) {\n // In this block, the file has been loaded\n // and the \'graph\' object contains our graph.\n\n // We load the nodes and links in the\n // force-directed graph.\n force.nodes(graph.nodes)\n .links(graph.links)\n .start();\n\n // We create a <line> SVG element for each link\n // in the graph.\n var link = svg.selectAll(".link")\n .data(graph.links)\n .enter().append("line")\n .attr("class", "link");\n\n // We create a <circle> SVG element for each node\n // in the graph, and we specify a few attributes.\n var node = svg.selectAll(".node")\n .data(graph.nodes)\n .enter().append("circle")\n .attr("class", "node")\n .attr("r", 5) // radius\n .style("fill", function(d) {\n // The node color depends on the club.\n return color(d.club);\n })\n .call(force.drag);\n\n // The name of each node is the node number.\n node.append("title")\n .text(function(d) { return d.name; });\n\n // We bind the positions of the SVG elements\n // to the positions of the dynamic force-directed\n // graph, at each time step.\n force.on("tick", function() {\n link.attr("x1", function(d){return d.source.x})\n .attr("y1", function(d){return d.source.y})\n .attr("x2", function(d){return d.target.x})\n .attr("y2", function(d){return d.target.y});\n\n node.attr("cx", function(d){return d.x})\n .attr("cy", function(d){return d.y});\n });\n });\n});') # In[109]: G=Gvid G.graph G.add_node(1, score='151') G.add_nodes_from([3], score='28') G.add_edge(1, 2, weight=4.7 ) G.add_edges_from([(3,4),(4,5)], color='red') G.add_edges_from([(1,2,{'color':'blue'}), (2,3,{'weight':4})]) G[1][2]['weight'] = 4.7 G.edge[1][2]['weight'] = 4 nx.write_gml(G,"path.to.file") mygraph=nx.read_gml("path.to.file") fig, ax = plt.subplots(1, 1, figsize=(8, 6)); nx.draw_spectral(G, ax=ax) nx.draw_networkx(G, ax=ax) plt.show() nx.write_gexf(G, 'Gvid.gexf') # In[110]: title_sig_dict = {} score_dict = {} id_dict = {} url_dict = {} comms_num_dict = {}
def output(self, filename): networkx.write_gml(self.DG, filename)
import networkx as nx import matplotlib.pyplot as plt G = nx.Graph() # G.add_node_from("1") G.add_nodes_from([str(i) for i in range(8)]) G.add_edge("1", "2") G.add_edge("1", "3") G.add_edge("1", "0") G.add_edge("2", "3") G.add_edge("0", "2") G.add_edge("3", "0") G.add_edge("4", "5") G.add_edge("4", "6") G.add_edge("4", "7") G.add_edge("5", "6") G.add_edge("5", "7") G.add_edge("6", "7") nx.write_gml(G, "easy_graph.gml") # print(G.number_of_nodes()) # print(G.number_of_edges()) # for each in list(G.nodes()): # print(each)
def main(): parser = optparse.OptionParser( description= 'Fits a hierarchical random graph (HRG) model to a network. Saves the model to a file in graph markup language (GML) format.', prog='hrg-fit.py', usage='%prog [options] GRAPH_EDGELIST_FILE') parser.add_option( '-s', '--num-steps', action='store', type=int, default=100000, help='The number of MCMC steps to take (default=100000).') parser.add_option( '-t', '--nodetype', action='store', type='choice', choices=[int, str], default=int, help= 'The type of the nodes in the edgelist file; "int" or "str" (default="int")' ) (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() return 1 G = nx.read_edgelist(args[0], nodetype=options.nodetype) name = os.path.splitext(args[0])[0] hrg_file = name + '-hrg.gml' print("HRG model will be saved as " + hrg_file + ".") D = Dendrogram.from_graph(G) bestL = initL = D.graph['L'] prevL = bestL bestI = 0 print_status("step", "L", "best L", "MC step", "deltaL") for i in range(1, options.num_steps): taken = D.monte_carlo_move() t = '' if taken: t = '*' if D.graph['L'] > bestL: bestL = D.graph['L'] bestI = i nx.write_gml(D, hrg_file) print_status("[" + str(i) + "]", "%.3f" % bestL, "%.3f" % bestL, t, "%.3f" % D.deltaL) elif i % 4096 == 0: print_status("[" + str(i) + "]", "%.3f" % D.graph['L'], "%.3f" % bestL, t, "%.3f" % D.deltaL) prevL = D.graph['L'] if i % 10 == 0: sys.stdout.flush() print("Step number of last best fit " + str(bestI) + ".") print("HRG model was saved as " + hrg_file + ".") return 0
def main(filename_residents, filename_AttgFellow): G = nx.Graph() ####################### #####Adding the nodes:# ####################### #adding nodes for the Residents: list_residents = [] result_residents = csv.reader(open(filename_residents, 'r'), delimiter=',') reader = csv.DictReader(open(filename_residents)) csv_resident_headers = reader.fieldnames particular_time = timedelta(0) list_lists_rows_start_stop = [[1, 2, 3], [4, 5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]] for row in result_residents: if row != csv_resident_headers: # i exclude the first row (how to do it more efficintly???) resident = str(row[0]) list_time_weekdays = [] if resident not in list_residents: list_residents.append(resident) G.add_node(resident) G.node[resident]["type"] = "R" G.node[resident]["working_times"] = [] # Assigning start-end working times and teams as attributes for Residents: for shift in list_lists_rows_start_stop: list1 = [] start = row[shift[0]] stop = row[shift[1]] # CLOSED interval to describe the shifts if len(shift) == 3: team = row[shift[2]] #rotation start-stop else: team = [1, 2] #float start-stop if len(start) > 0: part_times = start.split("/") mm = int(part_times[0]) dd = int(part_times[1]) yy = int(part_times[2]) date_start_week = datetime(yy, mm, dd) part_times = stop.split("/") mm = int(part_times[0]) dd = int(part_times[1]) yy = int(part_times[2]) date_end_week = datetime(yy, mm, dd) list1.append(date_start_week) list1.append(date_end_week) list1.append(team) G.node[resident]["working_times"].append(list1) #adding nodes for the Attendings and Fellows: result_att_fellows = csv.reader(open(filename_AttgFellow, 'rb'), delimiter=',') reader = csv.DictReader(open(filename_AttgFellow)) csv_att_fellow_headers = reader.fieldnames list_colums_att = [1, 3, 6, 7] # colums with Attending last_names list_colums_fellow = [2, 4, 8] # colums with Fellow last_names list_atts = [] list_fellows = [] for row in result_att_fellows: if row != csv_att_fellow_headers: # i exclude the first row (how to do it more efficintly???) for i in list_colums_att: att = str(row[i]) if len(att) > 0: #just in case the field is empty if "," in att: # in some fields we have: att1 mm/dd-dd, att2 mm/dd-dd parts = att.split(" ") atts = [] atts.append(parts[0]) atts.append(parts[2]) for a in atts: if a not in list_atts: list_atts.append(a) G.add_node(a) G.node[a]["type"] = "A" G.node[a]["working_times"] = [] if a in list_fellows: print "attending:", a, "listed before as fellow" else: if att not in list_atts: list_atts.append(att) G.add_node(att) G.node[att]["type"] = "A" G.node[att]["working_times"] = [] if att in list_fellows: print "attending:", att, "listed before as fellow" for i in list_colums_fellow: fellow = str(row[i]) if len(fellow) > 0: #just in case the field is empty if fellow not in list_fellows: list_fellows.append(fellow) G.add_node(fellow) G.node[fellow]["type"] = "F" G.node[fellow]["working_times"] = [] if fellow in list_atts: print "fellows:", fellow, "listed before as attending" # Assigning start-end working times and teams as attributes for Atts & Fellows: if len(row[0]) > 0: att1 = str(row[1]) # weekday personnel fellow1 = str(row[2]) att2 = str(row[3]) fellow2 = str(row[4]) w_att1 = str(row[6]) # weekend personnel w_att2 = str(row[7]) w_fellow = str(row[8]) #print att1,fellow1,att2,fellow2, w_att1, w_att2,w_fellow list_time_weekdays = [] list_time_weekend = [] start_week = row[0] part_times = start_week.split("/") mm = int(part_times[0]) dd = int(part_times[1]) yy = 2000 + int(part_times[2]) date_start_week = datetime(yy, mm, dd) end_week = row[5] # CLOSED interval to describe the shifts part_times = end_week.split("/") mm = int(part_times[0]) dd = int(part_times[1]) yy = 2000 + int(part_times[2]) date_end_week = datetime(yy, mm, dd) - timedelta(days=1) date_start_weeked = datetime(yy, mm, dd) date_end_weeked = datetime(yy, mm, dd) + timedelta( days=1) # CLOSED interval to describe the shifts list_time_weekdays.append(date_start_week) list_time_weekdays.append(date_end_week) list_time_weekend.append(date_start_weeked) list_time_weekend.append(date_end_weeked) list1 = list( list_time_weekdays) #OJO!!! forma correcta de copiar lista list1.append(1) # para que sean INDEPENDIENTES!! G.node[att1]["working_times"].append(list1) G.node[fellow1]["working_times"].append(list1) list2 = list(list_time_weekdays) list2.append(2) G.node[fellow2]["working_times"].append(list2) list3 = list(list_time_weekend) list3.append(1) G.node[w_att1]["working_times"].append(list3) list4 = list(list_time_weekend) list4.append(2) G.node[w_att2]["working_times"].append(list4) list5 = list(list_time_weekend) list5.append([1, 2]) G.node[w_fellow]["working_times"].append(list5) if "/" not in att2: # (for most of the rows, the normal ones) G.node[att2]["working_times"].append(list2) else: # (for the few special rows: att2 mm/dd-dd, aat2 mm/dd-dd) if "," in att2: parts = att2.split(" ") att2a = parts[0] att2b = parts[2] date1 = parts[1] date2 = parts[3] list_time_weekdays_special_att2 = [] part_times = date1.split("/") mm = int(part_times[0]) part_times[1] = part_times[1].strip(",") dd_start = int(part_times[1].split("-")[0]) dd_end = int(part_times[1].split("-")[1]) yy = 2010 date_start_week_special_att2 = datetime( yy, mm, dd_start) date_end_week_special_att2 = datetime( yy, mm, dd_end) # CLOSED interval to describe the shifts list_time_weekdays_special_att2.append( date_start_week_special_att2) list_time_weekdays_special_att2.append( date_end_week_special_att2) list_time_weekdays_special_att2.append(2) G.node[att2a]["working_times"].append( list_time_weekdays_special_att2) list_time_weekdays_special_att2 = [] part_times = date2.split("/") mm = int(part_times[0]) dd_start = int(part_times[1].split("-")[0]) dd_end = int(part_times[1].split("-")[1]) date_start_week_special_att2 = datetime( yy, mm, dd_start) date_end_week_special_att2 = datetime( yy, mm, dd_end) # CLOSED interval to describe the shifts list_time_weekdays_special_att2.append( date_start_week_special_att2) list_time_weekdays_special_att2.append( date_end_week_special_att2) list_time_weekdays_special_att2.append(2) G.node[att2b]["working_times"].append( list_time_weekdays_special_att2) elif "/" in att2: parts = att2.split("/") att2a = parts[0] att2b = parts[1] G.node[att2a]["working_times"].append(list2) G.node[att2b]["working_times"].append(list2) else: #for the first row, that only has weekend info: w_att1 = str(row[6]) # weekend personnel w_att2 = str(row[7]) w_fellow = str(row[8]) list_time_weekend = [] start_weeked = row[5] # CLOSED interval to describe the shifts part_times = start_weeked.split("/") mm = int(part_times[0]) dd = int(part_times[1]) yy = 2000 + int(part_times[2]) date_start_weeked = datetime(yy, mm, dd) date_end_weeked = datetime(yy, mm, dd) + timedelta( days=1) # CLOSED interval to describe the shifts list_time_weekend.append(date_start_weeked) list_time_weekend.append(date_end_weeked) list3 = list(list_time_weekend) list3.append(1) G.node[w_att1]["working_times"].append(list3) list4 = list(list_time_weekend) list4.append(2) G.node[w_att2]["working_times"].append(list4) list5 = list(list_time_weekend) list5.append([1, 2]) G.node[w_fellow]["working_times"].append(list5) # for n in G.nodes(): # print n,G.node[n]["working_times"],"\n" print "total # of residents:", len(list_residents) print "total # of fellows:", len(list_fellows) print "total # of attendings:", len(list_atts) print "total # doctors:", len(G.nodes()) ###################### ## Adding the links: # ###################### # Between Attendings and Fellows: result_att_fellows = csv.reader(open(filename_AttgFellow, 'rb'), delimiter=',') reader = csv.DictReader(open(filename_AttgFellow)) csv_att_fellow_headers = reader.fieldnames for row in result_att_fellows: if row != csv_att_fellow_headers: if len(row[1]) > 0: att1 = row[1] fellow1 = row[2] G.add_edge(att1, fellow1) att2 = row[3] fellow2 = row[4] if "," in att2: #in some fields we have: att1 mm/dd-dd, att2 mm/dd-dd parts = att2.split(" ") att2a = parts[0] att2b = parts[2] G.add_edge(att2a, fellow2) G.add_edge(att2b, fellow2) elif "/" in att2: # in some fields we have: att1/att2 parts = att2.split("/") att2a = parts[0] att2b = parts[1] G.add_edge(att2a, fellow2) G.add_edge(att2b, fellow2) else: G.add_edge(att2, fellow2) w_att1 = row[6] w_att2 = row[7] w_fellow = row[8] G.add_edge(w_att1, w_fellow) G.add_edge(w_att2, w_fellow) #Links Between Fellows and Residents: for resident in list_residents: for shift_r in G.node[resident][ "working_times"]: # i go over the list of lists start_r = shift_r[0] stop_r = shift_r[1] if type(shift_r[2]) == list: team_r = shift_r[2] else: try: team_r = int(shift_r[2]) except ValueError: # SOME RESIDENTS DO NOT HAVE A TEAM!!!!Marinelli, Mathur, Pirotte and Patel S team_r = [1, 2] #print "\nr:",resident, start_r, stop_r, team_r for fellow in list_fellows: for shift_f in G.node[fellow][ "working_times"]: # i go over the list of lists start_f = shift_f[0] stop_f = shift_f[1] if type(shift_f[2]) == list: team_f = shift_f[2] else: team_f = int(shift_f[2]) # print "f",fellow,"'s shift:",shift_f#,type(team_f),type(team_r)#,"and resident",resident,"'s shift:",shift_r if ((start_f >= start_r) and (start_f <= stop_r)): if ((team_r == team_f) or (type(team_r) == list) or (type(team_f) == list)): # print " f:",fellow, start_f, stop_f, team_f G.add_edge(resident, fellow) if G.node[resident]["type"] == G.node[fellow][ "type"]: print G.node[resident]["type"], "--", G.node[ fellow]["type"], "line 461" elif ((stop_f >= start_r) and (stop_f <= stop_r)): if ((team_r == team_f) or (type(team_r) == list) or (type(team_f) == list)): #print " f:",fellow, start_f, stop_f, team_f G.add_edge(resident, fellow) if G.node[resident]["type"] == G.node[fellow][ "type"]: print G.node[resident]["type"], "--", G.node[ fellow]["type"], "line 470" ################################## #writing the network into a file:# ################################## network_name = filename_residents.split("/")[-1] network_name = network_name.split(".csv")[0] nx.write_gml(G, "../Results/Doctors_network.gml" ) # you run the code from Idea-Spread-Hospital/Code ######################## # plotting the network:# ######################## for n in G.nodes( ): # i remove the attributes that are list, cos pygraphviz doesnt like it # and i remove the residents without any working times. if len(G.node[n]["working_times"]) > 0: G.node[n]["working_times"] = None else: # print n,G.node[n]["type"], "is going down cos doesnt have working times!" G.remove_node(n) nx.write_gml(G, "../Results/Doctors_network_without_working_times.gml" ) # you run the code from Idea-Spread-Hospital/Code print "\n# links:", len(G.edges()), "# nodes:", len(G.nodes()) setA = set(list_atts) setF = set(list_fellows) setR = set(list_residents) interceptAF = setA & setF print "interception A-F", interceptAF interceptAR = setA & setR print "interception A-R", interceptAR interceptFR = setF & setR print "interception F-R", interceptFR
def main(argv=None): if argv is None: argv = sys.argv # parse arguments parser = argparse.ArgumentParser( prog="genER", description='generate random graphs from G(n,p)') parser.add_argument('n', type=int, help='number of nodes') parser.add_argument('p', type=float, help='probability of an edge') parser.add_argument('output', help='output filename') parser.add_argument( '--deg', action='store_true', help='interpret p as expected degree, i.e. p <- p/(n-1)') parser.add_argument('-pI', type=float, help='probability of an inhibitory neuron', default=0.0) parser.add_argument('-gE', type=float, help='conductance of excitatory (E) synapses, nS', default=2.5) parser.add_argument('-gI', type=float, help='conductance of inhibitory (I) synapses, nS', default=2.5) parser.add_argument('--balanced', action='store_true', help='set gI for a balanced network, given gE and pI') parser.add_argument('-pCS', type=float, help='probability of CS neuron (default: %(default)s)', default=0.00) parser.add_argument('-pCI', type=float, help='probability of CI neuron (default: %(default)s)', default=0.25) parser.add_argument('-pTS', type=float, help='probability of TS neuron (default: %(default)s)', default=0.45) args = parser.parse_args(argv[1:]) n = args.n if args.deg: p = 1.0 * args.p / (n - 1) # compute p for kavg = args.p else: p = args.p pI = args.pI assert 0 <= pI and pI <= 1, 'pI out of range' gE = args.gE gI = args.gI if args.balanced and pI != 0: gI = (pI - 1) * gE / gI assert args.pCS >= 0 and args.pCS <= 1, 'pCS out of range' assert args.pCI >= 0 and args.pCI <= 1, 'pCI out of range' assert args.pTS >= 0 and args.pTS <= 1, 'pTS out of range' sumPTypes = args.pCS + args.pCI + args.pTS assert sumPTypes <= 1, \ 'probabilities do not sum to <= 1' pTypes = [args.pCS, args.pCI, args.pTS, 1 - sumPTypes] graph = er_prebot(n, p, pTypes, pI, gE, gI) nx.write_gml(graph, args.output)
while True: used.append(node) next = getStrongestEdge(node, direction, used) if next is None: break path.append( next ) if next.qstrand == '-': if direction == SUPPORTFLAGS.left: direction = SUPPORTFLAGS.right elif direction == SUPPORTFLAGS.right: direction = SUPPORTFLAGS.left node = next.qname paths.append(path) for p in paths: if len(p) == 0: continue for i in p: if not i.tname.startswith("ref"): print i.tname.split('/')[1],i.tstrand,"\t", else: print i.tname,i.tstrand,"\t", print if __name__ == '__main__': reads = sys.argv[1] fasta = FastaFile(reads) #blasr(reads, reads, 4) ovl = m5ToOvlGraph(fasta.keys(), "out.m5") ovlSimplify(ovl) nx.write_gml(ovl, "ovl.gml")