def QueryCollapsedGraphs(dataset, request_type, fuzzy): """ Find motifs in the graphs with collapsed node sequences. Saves the motifs to file @params dataset: dataset to find motifs in the collapsed sequences @params request_type: request for this particular set of traces @params fuzzy: can this motif be fuzzy? """ # create the directory structure if not os.path.exists('motifs'): os.mkdir('motifs') if not os.path.exists('motifs/subgraphs'): os.mkdir('motifs/subgraphs') if not os.path.exists('motifs/subgraphs/{}'.format(dataset)): os.mkdir('motifs/subgraphs/{}'.format(dataset)) # read the frequent subgraphs for this dataset/request type subgraphs = IdentifyFrequentSubgraphs(dataset, request_type, True, fuzzy) # read all of the traces traces = ReadTraces(dataset, request_type, None) for iv, trace in enumerate(traces): # start statistics start_time = time.time() # write the motifs to disk if fuzzy: output_filename = 'motifs/subgraphs/{}/{}-motifs-fuzzy-collapsed-complete.motifs'.format(dataset, trace.base_id) else: output_filename = 'motifs/subgraphs/{}/{}-motifs-collapsed-complete.motifs'.format(dataset, trace.base_id) if os.path.exists(output_filename): continue # reduced nodes to nodes is a funciton to go from the reduced node space to the original graph, reduced_nodes_to_nodes = ConvertCollapsedGraph2GraphTool(trace, fuzzy) motifs = [] for motif_index, subgraph in enumerate(subgraphs): motif = ConvertSubGraph2GraphTool(subgraph) # use graph tool to find all motif occurrences vertex_maps = gt.subgraph_isomorphism(motif, graph, vertex_label=(motif.vp.label, graph.vp.label)) # go through all of the found motif patterns for vertex_map in vertex_maps: nodes = [] # add all of the nodes that belong to each vertex (collapsed nodes) for reduced_node in vertex_map: for node in reduced_nodes_to_nodes[reduced_node]: assert (not trace.nodes[node] in nodes) nodes.append(trace.nodes[node]) motifs.append(Motif(trace, nodes, motif_index)) WriteMotifs(output_filename, motifs) # print statistics print ('Mined traces for {} in {:0.2f} seconds.'.format(trace.base_id, time.time() - start_time))
def QueryTraces(dataset, request_type): """ Find all occurrences for each motif for this dataset/request_type comboination. @params dataset: the dataset to mine for frequent sub graphs @params request_type: the request type for this set of traces """ # create the directory structure if not os.path.exists('motifs'): os.mkdir('motifs') if not os.path.exists('motifs/subgraphs'): os.mkdir('motifs/subgraphs') if not os.path.exists('motifs/subgraphs/{}'.format(dataset)): os.mkdir('motifs/subgraphs/{}'.format(dataset)) # start statistics start_time = time.time() # read the frequent subgraphs for this dataset/request type subgraphs = IdentifyFrequentSubgraphs(dataset, request_type, False, False) # read all of the traces and mine the graph traces = ReadTraces(dataset, request_type, None) for iv, trace in enumerate(traces): # start statistics start_time = time.time() # skip over the file if it already exists output_filename = 'motifs/subgraphs/{}/{}-motifs-complete.motifs'.format(dataset, trace.base_id) if os.path.exists(output_filename): continue graph = ConvertTrace2GraphTool(dataset, trace) motifs = [] for motif_index, subgraph in enumerate(subgraphs): motif = ConvertSubGraph2GraphTool(subgraph) # use graph tool to find all motif occurrences vertex_maps = gt.subgraph_isomorphism(motif, graph, vertex_label=(motif.vp.label, graph.vp.label)) # go through all of the found motif patterns for vertex_map in vertex_maps: nodes = [] for node in vertex_map: nodes.append(trace.nodes[node]) motifs.append(Motif(trace, nodes, motif_index)) # write the motifs to disk WriteMotifs(output_filename, motifs) # print statistics print ('Mined traces for {} in {:0.2f} seconds.'.format(trace.base_id, time.time() - start_time))
def assert_graph_equal(expect, other): """ Test if the graph is equal to the given one. This is intended mostly for testing purposes. """ __tracebackhide__ = True assert expect.num_vertices() == other.num_vertices(),\ "Number of vertices differ expected %d, found %d\n%s\n%s\n\n%s\n%s" % ( expect.num_vertices(), other.num_vertices(), dump_vertices(expect), dump_edges(expect), dump_vertices(other), dump_edges(other)) n_vertices = expect.num_vertices() # check if the graphs are isomorphic # there must exist an isomorphism for which the vertex # data matches, so we check all of them. isomaps = subgraph_isomorphism(expect, other, subgraph=False) assert len(isomaps) > 0, "Graph topology differ:\n%s\n%s\n\n%s\n%s" % ( dump_vertices(expect), dump_edges(expect), dump_vertices(other), dump_edges(other)) errors = [] for isomap in isomaps: try: # check that vertices match for v in expect.vertices(): assert_vertex_equal(expect, v, other, isomap[v]) for v_out in v.out_neighbours(): e = expect.edge(v, v_out) f = other.edge(isomap[v], isomap[v_out]) assert_edge_equal(expect, e, other, f) except AssertionError as ex: errors.append((v, ex)) continue else: break else: # all isomaps raised AssertionErrors msg = "Graphs do not have an isomorphism for which vertices match.\n" msg += "Failures for each isomorphism:\n" for e, iso in zip(errors, isomaps): msg += "Isomap: %s\n" % [iso[i] for i in range(n_vertices)] msg += "vertex %d: %s\n\n" % e msg += "Expected graph:\n%s\n%s\n\n" % (dump_vertices(expect), dump_edges(expect)) msg += "Found graph:\n%s\n%s\n\n" % (dump_vertices(other), dump_edges(other)) pytest.fail(msg)
def index_query_graphs(q_gs, f_sgs): fvectors = np.zeros((len(q_gs), len(f_sgs))) fv = 0 for f_sg in f_sgs: qv = 0 for q_g in q_gs: # subgraph isomorphisms return list of maps, i.e we check for empty if not a subgraph # check subgraph parameter in function below if len( gt.subgraph_isomorphism( f_sg, q_g, max_n=1, vertex_label=(f_sg.vertex_properties["molecule"], q_g.vertex_properties["molecule"]), edge_label=(f_sg.edge_properties["bond"], q_g.edge_properties["bond"]), induced=False, subgraph=True, generator=False)) > 0: fvectors[qv][fv] = 1 qv += 1 fv += 1 return fvectors
query_fvectors = index_query_graphs(qgs, fgs) database_graphs = get_database_graph_objects(outfile) mapping = get_map(mapfile) result = {} for i in range(query_fvectors.shape[0]): result[i] = [] for j in range(fvectors.shape[0]): if (np.all(query_fvectors[i] == np.logical_and( fvectors[j], query_fvectors[i]))): x = gt.subgraph_isomorphism( qgs[i], database_graphs[j], max_n=1, vertex_label=( qgs[i].vertex_properties['molecule'], database_graphs[j].vertex_properties['molecule']), edge_label=(qgs[i].edge_properties['bond'], database_graphs[j].edge_properties['bond']), induced=False, subgraph=True, generator=False) result[i].append(str(mapping[str(j)])) qs = len(result) with open(output_filename, 'w') as f: for i in range(qs): f.write('\t'.join(result[i]) + '\n')