def test_rdflib_to_graphtool(): try: from graph_tool import util as gt_util except ImportError: raise SkipTest("couldn't find graph_tool") from rdflib.extras.external_graph_libs import rdflib_to_graphtool g = Graph() a, b, l = URIRef('a'), URIRef('b'), Literal('l') p, q = URIRef('p'), URIRef('q') edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] for t in edges: g.add(t) mdg = rdflib_to_graphtool(g) assert len(list(mdg.edges())) == 4 vpterm = mdg.vertex_properties['term'] va = gt_util.find_vertex(mdg, vpterm, a)[0] vb = gt_util.find_vertex(mdg, vpterm, b)[0] vl = gt_util.find_vertex(mdg, vpterm, l)[0] assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] epterm = mdg.edge_properties['term'] assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1 mdg = rdflib_to_graphtool( g, e_prop_names=[text_type('name')], transform_p=lambda s, p, o: {text_type('name'): text_type(p)}) epterm = mdg.edge_properties['name'] assert len(list(gt_util.find_edge(mdg, epterm, text_type(p)))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, text_type(q)))) == 1
def test_rdflib_to_graphtool(): try: from graph_tool import util as gt_util except ImportError: raise SkipTest("couldn't find graph_tool") from rdflib.extras.external_graph_libs import rdflib_to_graphtool g = Graph() a, b, l = URIRef("a"), URIRef("b"), Literal("l") p, q = URIRef("p"), URIRef("q") edges = [(a, p, b), (a, q, b), (b, p, a), (b, p, l)] for t in edges: g.add(t) mdg = rdflib_to_graphtool(g) assert len(list(mdg.edges())) == 4 vpterm = mdg.vertex_properties["term"] va = gt_util.find_vertex(mdg, vpterm, a)[0] vb = gt_util.find_vertex(mdg, vpterm, b)[0] vl = gt_util.find_vertex(mdg, vpterm, l)[0] assert (va, vb) in [(e.source(), e.target()) for e in list(mdg.edges())] epterm = mdg.edge_properties["term"] assert len(list(gt_util.find_edge(mdg, epterm, p))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, q))) == 1 mdg = rdflib_to_graphtool( g, e_prop_names=[str("name")], transform_p=lambda s, p, o: {str("name"): str(p)}) epterm = mdg.edge_properties["name"] assert len(list(gt_util.find_edge(mdg, epterm, str(p)))) == 3 assert len(list(gt_util.find_edge(mdg, epterm, str(q)))) == 1
def run(filename, output, header_bool, sub, obj, pred, props, undirected, strong): # import modules locally import csv import sys from graph_tool import load_graph_from_csv from graph_tool.util import find_edge from graph_tool.topology import label_components from kgtk.exceptions import KGTKException from kgtk.cli_argparse import KGTKArgumentParser def find_pred_position(sub, pred, obj): if pred < sub and pred < obj: return pred elif (pred > sub and pred < obj) or (pred < sub and pred > obj): return pred - 1 else: return pred - 2 try: header = ['node1', 'label', 'node2'] label = 'c' + str(find_pred_position(sub, pred, obj)) g = load_graph_from_csv(filename, not (undirected), skip_first=not (header_bool), hashed=True, csv_options={'delimiter': '\t'}, ecols=(sub, obj)) es = [] if props: properties = props.split(',') for e in properties: es += (find_edge(g, g.edge_properties[label], e)) g.clear_edges() g.add_edge_list(list(set(es))) comp, hist = label_components(g, directed=strong) if output: f = open(output, 'w') wr = csv.writer(f, quoting=csv.QUOTE_NONE, delimiter="\t", escapechar="\n", quotechar='') wr.writerow(header) for v, c in enumerate(comp): wr.writerow( [g.vertex_properties['name'][v], 'connected_component', c]) f.close() else: sys.stdout.write('%s\t%s\t%s\n' % ('node1', 'label', 'node2')) for v, c in enumerate(comp): sys.stdout.write('%s\t%s\t%s\n' % (g.vertex_properties['name'][v], 'connected_component', str(c))) except: raise KGTKException
def _get_edges(self, flows): edges = [] for flow in flows: e = util.find_edge(self.graph, self.graph.ep['id'], flow.id) if len(e) > 0: edges.append(e[0]) else: # shouldn't happen if graph is up to date raise Exception(f'graph is missing flow {flow.id}') return edges
def translate_to_db(self): # ToDo: filter for changes # store edges (flows) to database strat_flows = [] changed_edges = util.find_edge(self.graph, self.graph.ep['changed'], True) for edge in changed_edges: new_amount = self.graph.ep.amount[edge] # get the related FractionFlow flow = FractionFlow.objects.get(id=self.graph.ep.id[edge]) material = self.graph.ep.material[edge] process = self.graph.ep.process[edge] if process == -1: process = None waste = self.graph.ep.waste[edge] hazardous = self.graph.ep.hazardous[edge] # new flow is marked with strategy relation # (no seperate strategy fraction flow needed) if flow.strategy is not None: flow.amount = new_amount flow.hazardous = hazardous flow.material_id = material flow.waste = waste flow.process_id = process flow.save() # changed flow gets a related strategy fraction flow holding changes else: ex = StrategyFractionFlow.objects.filter( fractionflow=flow, strategy=self.strategy) # if there already was a modification, overwrite it if len(ex) == 1: strat_flow = ex[0] strat_flow.amount = new_amount strat_flow.material_id = material strat_flow.waste = waste strat_flow.hazardous = hazardous strat_flow.process_id = process strat_flow.save() elif len(ex) > 1: raise Exception('more than StrategyFractionFlow ' 'found per flow. This should not happen.') else: strat_flow = StrategyFractionFlow(strategy=self.strategy, amount=new_amount, fractionflow=flow, material_id=material, waste=waste, hazardous=hazardous, process_id=process) strat_flows.append(strat_flow) StrategyFractionFlow.objects.bulk_create(strat_flows)
def process(self): input_kr: KgtkReader = KgtkReader.open( self.input_file_path, error_file=self.error_file, who="input", options=self.input_reader_options, value_options=self.value_options, verbose=self.verbose, very_verbose=self.very_verbose, ) input_key_columns: typing.List[int] = self.get_key_columns( input_kr, "input") label_col_idx = input_key_columns[1] label = '{}{}'.format('c', label_col_idx) g = load_graph_from_csv(str(input_kr.file_path), not (self.undirected), skip_first=not (self.no_header), hashed=True, csv_options={'delimiter': '\t'}, ecols=(input_key_columns[0], input_key_columns[2])) es = [] header = ['node1', 'label', 'node2'] if self.properties: properties = self.properties.split(',') for e in properties: es += (find_edge(g, g.edge_properties[label], e)) g.clear_edges() g.add_edge_list(list(set(es))) comp, hist = label_components(g, directed=self.strong) ew: KgtkWriter = KgtkWriter.open(header, self.output_file_path, mode=input_kr.mode, require_all_columns=False, prohibit_extra_columns=True, fill_missing_columns=True, gzip_in_parallel=False, verbose=self.verbose, very_verbose=self.very_verbose) for v, c in enumerate(comp): ew.write([ g.vertex_properties['name'][v], 'connected_component', str(c) ])
def _chain_flows(self, referenced_flows, possible_new_targets, formula, new_material=None, new_process=None, prepend=True, new_waste=-1, new_hazardous=-1): ''' creates new flows based on given referenced flows and prepends (prepend==True) or appends (prepend==False) them if new flows already exist, changes existing ones instead returns new/changed flows and deltas in same order as flows ToDo: almost the same as shift_flows(), generalize! ''' if formula.is_absolute: raise ValueError( 'Formula for PrependFlow and AppendFlow must be relative') new_flows = [] deltas = [] ids = referenced_flows.values_list('destination') if prepend\ else referenced_flows.values_list('origin') actors_kept = Actor.objects.filter(id__in=ids) closest_dict = self.find_closest_actor(actors_kept, possible_new_targets) # create new flows and add corresponding edges for flow in referenced_flows: kept_id = flow.destination_id if prepend \ else flow.origin_id # no target actor found within range if kept_id not in closest_dict: continue # get new target out of dictionary new_id = closest_dict[kept_id] new_vertex = self._get_vertex(new_id) delta = formula.calculate_delta(flow.strategy_amount) # the edge corresponding to the referenced flow edges = util.find_edge(self.graph, self.graph.ep['id'], flow.id) if len(edges) > 1: raise ValueError("FractionFlow.id ", flow.id, " is not unique in the graph") elif len(edges) == 0: print("Cannot find FractionFlow.id ", flow.id, " in the graph") continue edge = edges[0] new_edge_args = [new_vertex, edge.source()] if prepend \ else [edge.target(), new_vertex] new_edge = self.graph.edge(*new_edge_args) # create a new fractionflow for the implementation flow in db, # setting id to None creates new one when saving # while keeping attributes of original model; # the new flow is added with zero amount and to be changed # by calculated delta new_flow = copy_django_model(flow) new_flow.id = None new_flow.amount = 0 if prepend: new_flow.destination_id = new_flow.origin_id new_flow.origin_id = new_id else: new_flow.origin_id = new_flow.destination_id new_flow.destination_id = new_id if new_material: new_flow.material = new_material if new_process: new_flow.process = new_process if new_waste >= 0: new_flow.waste = new_waste == 1 if new_hazardous >= 0: new_flow.hazardous = new_hazardous == 1 # strategy marks flow as new flow new_flow.strategy = self.strategy new_flow.save() # create the edge in the graph new_edge = self.graph.add_edge(*new_edge_args) self.graph.ep.id[new_edge] = new_flow.id self.graph.ep.amount[new_edge] = 0 self.graph.ep.material[new_edge] = new_flow.material.id # process doesn't have to be set, missing attributes # are marked with -1 in graph (if i remember correctly?) self.graph.ep.process[new_edge] = \ new_flow.process.id if new_flow.process is not None else - 1 self.graph.ep.waste[new_edge] = new_flow.waste self.graph.ep.hazardous[new_edge] = new_flow.hazardous new_flows.append(new_flow) deltas.append(delta) return new_flows, deltas
def _shift_flows(self, referenced_flows, possible_new_targets, formula, new_material=None, new_process=None, shift_origin=True, reduce_reference=True, new_waste=-1, new_hazardous=-1): ''' creates new flows based on given referenced flows and redirects them to target actor (either origin or destinations are changing) referenced_flows are reduced by amout of new flows if reduce_reference is True, otherwise they stay untouched returns flows to be changed in order of change and the deltas added to be to each flow in walker algorithm in same order as flows ''' changed_ref_flows = [] new_flows = [] changed_ref_deltas = [] new_deltas = [] # the actors to keep (not shifted) ids = referenced_flows.values_list('destination') if shift_origin\ else referenced_flows.values_list('origin') actors_kept = Actor.objects.filter(id__in=ids) # actors in possible new targets that are closest closest_dict = self.find_closest_actor(actors_kept, possible_new_targets) if formula.is_absolute: formula.set_total(referenced_flows) # create new flows and add corresponding edges for flow in referenced_flows: kept_id = flow.destination_id if shift_origin \ else flow.origin_id # no target actor found within range if kept_id not in closest_dict: continue # get new target out of dictionary new_id = closest_dict[kept_id] new_vertex = self._get_vertex(new_id) delta = formula.calculate_delta(flow.strategy_amount) delta = min(delta, flow.strategy_amount) # the edge corresponding to the referenced flow # (the one to be shifted) edges = util.find_edge(self.graph, self.graph.ep['id'], flow.id) if len(edges) > 1: raise ValueError("FractionFlow.id ", flow.id, " is not unique in the graph") elif len(edges) == 0: print("Cannot find FractionFlow.id ", flow.id, " in the graph") continue edge = edges[0] new_edge_args = [new_vertex, edge.target()] if shift_origin \ else [edge.source(), new_vertex] new_edge = self.graph.edge(*new_edge_args) # create a new fractionflow for the implementation flow in db, # setting id to None creates new one when saving # while keeping attributes of original model; # the new flow is added with zero amount and to be changed # by calculated delta new_flow = copy_django_model(flow) new_flow.id = None new_flow.amount = 0 if shift_origin: new_flow.origin_id = new_id else: new_flow.destination_id = new_id if new_material: new_flow.material = new_material if new_process: new_flow.process = new_process if new_waste >= 0: new_flow.waste = new_waste == 1 if new_hazardous >= 0: new_flow.hazardous = new_hazardous == 1 # strategy marks flow as new flow new_flow.strategy = self.strategy new_flow.save() # create the edge in the graph new_edge = self.graph.add_edge(*new_edge_args) self.graph.ep.id[new_edge] = new_flow.id self.graph.ep.amount[new_edge] = 0 self.graph.ep.material[new_edge] = new_flow.material.id # process doesn't have to be set, missing attributes # are marked with -1 in graph (if i remember correctly?) self.graph.ep.process[new_edge] = \ new_flow.process.id if new_flow.process is not None else - 1 self.graph.ep.waste[new_edge] = new_flow.waste self.graph.ep.hazardous[new_edge] = new_flow.hazardous new_flows.append(new_flow) new_deltas.append(delta) # reduce (resp. increase) the referenced flow by the same amount if reduce_reference: changed_ref_flows.append(flow) changed_ref_deltas.append(-delta) # new flows shall be created before modifying the existing ones return new_flows + changed_ref_flows, new_deltas + changed_ref_deltas
def get_edges_by_edge_prop(g, p, v): return find_edge(g, prop=g.properties[('e', p)], match=v)
def makeGraph(self,img,dia,xScale,yScale): print 'Building Graph Data Structure' start=time.time() G = Graph(directed=False) vprop=G.new_vertex_property('object') eprop=G.new_edge_property('object') epropW=G.new_edge_property("int32_t") avgScale=(xScale+yScale)/2 test=np.where(img==True) ss = np.shape(test) cccc=0 percentOld=0.0 print str(np.round(percentOld,1))+'%' for (i,j) in zip(test[1],test[0]): cccc+=1 percent=(float(cccc)/float(ss[1]))*100 if percentOld+10< percent: print str(np.round(percent,1))+'%' percentOld=percent nodeNumber1 = (float(i)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}): v1=gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale})[0] else: v1=G.add_vertex() vprop[G.vertex(v1)]={'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale} try: if img[j,i+1] == True: nodeNumber2 = (float(i+1)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except: pass try: if img[j,i-1] == True: nodeNumber2 = (float(i-1)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except:pass try: if img[j + 1,i] == True: nodeNumber2 = (float(i)*yScale,float(j+1)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except:pass try: if img[j - 1,i] == True: nodeNumber2 = (float(i)*yScale,float(j-1)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except: pass # print '100.0%' print 'selecting largest connected component' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop l = gt.label_largest_component(G) print(l.a) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) print '# edges' print(u.num_edges()) print 'building graph finished in: '+str(time.time()-start)+'s' return u
def gn(g, odir, focus=None): """ Takes graph and uses Girvan Newman to slowly break graph down. Can operate in faster mode that constrains view to single graph. Creates new output_directory "odir" in which JSON graphs are placed, as well as names of clusters, and index mapping tests to clusters. ----------------------------------------- g: graph_tool graph odir: output_directory focus: if True, only looks at clusters with vertex named "focus" """ total_edges = g.num_edges() vprint(INFO, 'Applying Girvan Newman algorithm on %i edges...' % (g.num_edges())) if focus: vprint(INFO, 'Focused on %s' % (focus)) ### Pull some properties of the graph out weight = g.ep['weight'] name = g.vp['name'] ### If focus, make sure focus actually there! if focus: if focus not in name: raise KeyboardInterrupt ### Initialize output vprint(INFO, 'Output: %s' % (os.path.abspath(odir))) if not os.path.exists(odir): vprint(INFO, '\tDirectory did not exist! Created.') os.mkdir(odir) json_name = os.path.join(odir, "%i_%i.json") text_name = os.path.join(odir, "%i_%i.txt") idx_name = os.path.join(odir, "index.csv") ### Create new property for graph & configure for fast g.ep["ebc"] = g.new_edge_property("float") g.set_fast_edge_removal(True) ### Initialize variables for tracking connected components ### and indexing if focus: cc_cts = [g.num_vertices()] else: cc_cts = [0] index = dict([(name[v], [(0, 0)]) for v in g.vertices()]) ### Begin Girvan Newman algorithm _, _ = gt_bt(g, eprop=g.ep["ebc"], weight=weight, norm=False) while g.num_edges() != 0: ### Get & remove edge of max() betweenness; recalc betweenness maxedge = find_edge(g, g.ep["ebc"], g.ep["ebc"].a.max())[0] g.ep["ebc"] = g.new_edge_property("float") g.remove_edge(maxedge) _, _ = gt_bt(g, eprop=g.ep["ebc"], weight=weight, norm=False) ### If we're in a focused situation, find relevant cc if focus: cc_lbl = gt_cc_out(g, focus) cc_ct = cc_lbl.a.sum() g.set_vertex_filter(cc_lbl) # Mask all other edge/verts ### If edge removed creates new clusters if cc_ct != cc_cts[-1]: iter_num = len(cc_cts) cc_cts.append(cc_ct) ### Write sections gt_to_json(g, json_name % (iter_num, 0)) out = open(text_name % (i - iter_num, 0), 'w') for v in g.vertices(): if cc_lbl[v]: out.write(name[v] + '\n') index[name[v]].append((iter_num, 0)) ### Otherwise... else: ### First, gather connected components cc_lbl, _ = gt_cc(g, directed=False) cc_ct = cc_lbl.a.max() ### If we've generated new clusters... if cc_ct != cc_cts[-1]: iter_num = len(cc_cts) cc_cts.append(cc_ct) ### Create a filter for each cc label filters = dict([(i, g.new_vertex_property("bool")) for i in range(cc_lbl.a.max() + 1)]) for v in g.vertices(): lbl = cc_lbl[v] filters[lbl][v] = True for (i, f) in filters.iteritems(): if f.a.sum() == 1: index[name[v]].append((iter_num, i)) continue g.set_vertex_filter(f) gt_to_json(g, json_name % (iter_num, i)) out = open(text_name % (iter_num, i), 'w') for v in g.vertices(): if f[v]: out.write(name[v] + '\n') index[name[v]].append((iter_num, i)) g.set_vertex_filter(None) # Progress bar progress_bar(total_edges - g.num_edges(), total_edges, 100) ### Final step: break down index into legible file progress_bar_complete(total_edges) vprint(INFO, 'Making index file.') m = np.ones((len(index), len(cc_cts))) * -1 ordered_keys = sorted(index.keys()) for (key, i) in zip(ordered_keys, range(len(index))): for (iter_num, clust_num) in index[key]: m[i, iter_num] = clust_num write_mtx(idx_name, m, ordered_keys, range(len(cc_cts))) vprint(INFO, 'Girvan Newman Complete!') return None
def process(self): input_kr: KgtkReader = KgtkReader.open( self.input_file_path, error_file=self.error_file, who="input", options=self.input_reader_options, value_options=self.value_options, verbose=self.verbose, very_verbose=self.very_verbose, ) input_key_columns: typing.List[int] = self.get_key_columns( input_kr, "input") label_col_idx = input_key_columns[1] label = input_kr.column_names[label_col_idx] g = load_graph_from_kgtk(input_kr, directed=not self.undirected) es = [] header = ['node1', 'label', 'node2'] if self.properties: properties = self.properties.split(',') for e in properties: es += (find_edge(g, g.edge_properties[label], e)) g.clear_edges() g.add_edge_list(list(set(es))) comp, hist = label_components(g, directed=self.strong) ew: KgtkWriter = KgtkWriter.open(header, self.output_file_path, mode=input_kr.mode, require_all_columns=False, prohibit_extra_columns=True, fill_missing_columns=True, gzip_in_parallel=False, verbose=self.verbose, very_verbose=self.very_verbose) clusters: typing.MutableMapping[str, typing.List[str]] = dict() cluster_id: str name: str v: int for v, c in enumerate(comp): name = g.vertex_properties['name'][v] cluster_id = str(c) if cluster_id not in clusters: clusters[cluster_id] = [name] else: clusters[cluster_id].append(name) trimmed_clusters: typing.MutableMapping[str, typing.List[str]] = dict() for cluster_id in clusters.keys(): if len(clusters[cluster_id]) >= self.minimum_cluster_size: trimmed_clusters[cluster_id] = clusters[cluster_id] named_clusters: typing.MutableMapping[ str, typing.List[str]] = self.name_clusters(trimmed_clusters) for cluster_id in sorted(named_clusters.keys()): for name in sorted(named_clusters[cluster_id]): ew.write([name, 'connected_component', cluster_id]) ew.close()
def makeGraph(self, img, dia, xScale, yScale): print 'Building Graph Data Structure' start = time.time() G = Graph(directed=False) vprop = G.new_vertex_property('object') eprop = G.new_edge_property('object') epropW = G.new_edge_property("int32_t") avgScale = (xScale + yScale) / 2 test = np.where(img == True) ss = np.shape(test) cccc = 0 percentOld = 0.0 print str(np.round(percentOld, 1)) + '%' for (i, j) in zip(test[1], test[0]): cccc += 1 percent = (float(cccc) / float(ss[1])) * 100 if percentOld + 10 < percent: print str(np.round(percent, 1)) + '%' percentOld = percent nodeNumber1 = (float(i) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale }): v1 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale })[0] else: v1 = G.add_vertex() vprop[G.vertex(v1)] = { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale } try: if img[j, i + 1] == True: nodeNumber2 = (float(i + 1) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j, i - 1] == True: nodeNumber2 = (float(i - 1) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j + 1, i] == True: nodeNumber2 = (float(i) * yScale, float(j + 1) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j - 1, i] == True: nodeNumber2 = (float(i) * yScale, float(j - 1) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass # print '100.0%' print 'selecting largest connected component' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop l = gt.label_largest_component(G) print(l.a) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) print '# edges' print(u.num_edges()) print 'building graph finished in: ' + str(time.time() - start) + 's' return u
def gn(g, odir, focus = None): """ Takes graph and uses Girvan Newman to slowly break graph down. Can operate in faster mode that constrains view to single graph. Creates new output_directory "odir" in which JSON graphs are placed, as well as names of clusters, and index mapping tests to clusters. ----------------------------------------- g: graph_tool graph odir: output_directory focus: if True, only looks at clusters with vertex named "focus" """ total_edges = g.num_edges() vprint(INFO, 'Applying Girvan Newman algorithm on %i edges...'%(g.num_edges())) if focus: vprint(INFO, 'Focused on %s'%(focus)) ### Pull some properties of the graph out weight = g.ep['weight'] name = g.vp['name'] ### If focus, make sure focus actually there! if focus: if focus not in name: raise KeyboardInterrupt ### Initialize output vprint(INFO, 'Output: %s'%(os.path.abspath(odir))) if not os.path.exists(odir): vprint(INFO, '\tDirectory did not exist! Created.') os.mkdir(odir) json_name = os.path.join(odir, "%i_%i.json") text_name = os.path.join(odir, "%i_%i.txt") idx_name = os.path.join(odir, "index.csv") ### Create new property for graph & configure for fast g.ep["ebc"] = g.new_edge_property("float") g.set_fast_edge_removal(True) ### Initialize variables for tracking connected components ### and indexing if focus: cc_cts = [g.num_vertices()] else: cc_cts = [0] index = dict( [(name[v], [(0,0)]) for v in g.vertices()] ) ### Begin Girvan Newman algorithm _, _ = gt_bt(g, eprop = g.ep["ebc"], weight = weight, norm = False) while g.num_edges() != 0: ### Get & remove edge of max() betweenness; recalc betweenness maxedge = find_edge(g, g.ep["ebc"], g.ep["ebc"].a.max())[0] g.ep["ebc"] = g.new_edge_property("float") g.remove_edge(maxedge) _, _ = gt_bt(g, eprop = g.ep["ebc"], weight = weight, norm = False) ### If we're in a focused situation, find relevant cc if focus: cc_lbl = gt_cc_out(g, focus) cc_ct = cc_lbl.a.sum() g.set_vertex_filter(cc_lbl) # Mask all other edge/verts ### If edge removed creates new clusters if cc_ct != cc_cts[-1]: iter_num = len(cc_cts) cc_cts.append(cc_ct) ### Write sections gt_to_json(g, json_name%(iter_num, 0)) out = open(text_name%(i - iter_num, 0), 'w') for v in g.vertices(): if cc_lbl[v]: out.write(name[v] + '\n') index[name[v]].append((iter_num, 0)) ### Otherwise... else: ### First, gather connected components cc_lbl, _ = gt_cc(g, directed = False) cc_ct = cc_lbl.a.max() ### If we've generated new clusters... if cc_ct != cc_cts[-1]: iter_num = len(cc_cts) cc_cts.append(cc_ct) ### Create a filter for each cc label filters = dict([(i, g.new_vertex_property("bool")) for i in range(cc_lbl.a.max()+1)]) for v in g.vertices(): lbl = cc_lbl[v] filters[lbl][v] = True for (i, f) in filters.iteritems(): if f.a.sum() == 1: index[name[v]].append((iter_num,i)) continue g.set_vertex_filter(f) gt_to_json(g, json_name%(iter_num, i)) out = open(text_name%(iter_num, i), 'w') for v in g.vertices(): if f[v]: out.write(name[v] + '\n') index[name[v]].append((iter_num,i)) g.set_vertex_filter(None) # Progress bar progress_bar(total_edges - g.num_edges(), total_edges, 100) ### Final step: break down index into legible file progress_bar_complete(total_edges) vprint(INFO, 'Making index file.') m = np.ones( (len(index), len(cc_cts) ) ) * -1 ordered_keys = sorted(index.keys()) for (key, i) in zip(ordered_keys, range(len(index))): for (iter_num, clust_num) in index[key]: m[i, iter_num] = clust_num write_mtx(idx_name, m, ordered_keys, range(len(cc_cts))) vprint(INFO, 'Girvan Newman Complete!') return None