Пример #1
0
def QueryCollapsedGraphs(dataset, request_type, fuzzy):
    """
    Find motifs in the graphs with collapsed node sequences. Saves the motifs to
    file
    @params dataset: dataset to find motifs in the collapsed sequences
    @params request_type: request for this particular set of traces
    @params fuzzy: can this motif be fuzzy?
    """
    # create the directory structure
    if not os.path.exists('motifs'):
        os.mkdir('motifs')
    if not os.path.exists('motifs/subgraphs'):
        os.mkdir('motifs/subgraphs')
    if not os.path.exists('motifs/subgraphs/{}'.format(dataset)):
        os.mkdir('motifs/subgraphs/{}'.format(dataset))

    # read the frequent subgraphs for this dataset/request type
    subgraphs = IdentifyFrequentSubgraphs(dataset, request_type, True, fuzzy)
    # read all of the traces
    traces = ReadTraces(dataset, request_type, None)

    for iv, trace in enumerate(traces):
        # start statistics
        start_time = time.time()

        # write the motifs to disk
        if fuzzy: output_filename = 'motifs/subgraphs/{}/{}-motifs-fuzzy-collapsed-complete.motifs'.format(dataset, trace.base_id)
        else: output_filename = 'motifs/subgraphs/{}/{}-motifs-collapsed-complete.motifs'.format(dataset, trace.base_id)
        if os.path.exists(output_filename): continue

        # reduced nodes to nodes is a funciton to go from the reduced node space to the original
        graph, reduced_nodes_to_nodes = ConvertCollapsedGraph2GraphTool(trace, fuzzy)
        motifs = []

        for motif_index, subgraph in enumerate(subgraphs):
            motif = ConvertSubGraph2GraphTool(subgraph)

            # use graph tool to find all motif occurrences
            vertex_maps = gt.subgraph_isomorphism(motif, graph, vertex_label=(motif.vp.label, graph.vp.label))

            # go through all of the found motif patterns
            for vertex_map in vertex_maps:
                nodes = []
                # add all of the nodes that belong to each vertex (collapsed nodes)
                for reduced_node in vertex_map:
                    for node in reduced_nodes_to_nodes[reduced_node]:
                        assert (not trace.nodes[node] in nodes)
                        nodes.append(trace.nodes[node])

                motifs.append(Motif(trace, nodes, motif_index))

        WriteMotifs(output_filename, motifs)

        # print statistics
        print ('Mined traces for {} in {:0.2f} seconds.'.format(trace.base_id, time.time() - start_time))
Пример #2
0
def QueryTraces(dataset, request_type):
    """
    Find all occurrences for each motif for this dataset/request_type comboination.
    @params dataset: the dataset to mine for frequent sub graphs
    @params request_type: the request type for this set of traces
    """
    # create the directory structure
    if not os.path.exists('motifs'):
        os.mkdir('motifs')
    if not os.path.exists('motifs/subgraphs'):
        os.mkdir('motifs/subgraphs')
    if not os.path.exists('motifs/subgraphs/{}'.format(dataset)):
        os.mkdir('motifs/subgraphs/{}'.format(dataset))

    # start statistics
    start_time = time.time()

    # read the frequent subgraphs for this dataset/request type
    subgraphs = IdentifyFrequentSubgraphs(dataset, request_type, False, False)

    # read all of the traces and mine the graph
    traces = ReadTraces(dataset, request_type, None)

    for iv, trace in enumerate(traces):
        # start statistics
        start_time = time.time()

        # skip over the file if it already exists
        output_filename = 'motifs/subgraphs/{}/{}-motifs-complete.motifs'.format(dataset, trace.base_id)
        if os.path.exists(output_filename): continue

        graph = ConvertTrace2GraphTool(dataset, trace)
        motifs = []

        for motif_index, subgraph in enumerate(subgraphs):
            motif = ConvertSubGraph2GraphTool(subgraph)

            # use graph tool to find all motif occurrences
            vertex_maps = gt.subgraph_isomorphism(motif, graph, vertex_label=(motif.vp.label, graph.vp.label))

            # go through all of the found motif patterns
            for vertex_map in vertex_maps:
                nodes = []
                for node in vertex_map:
                    nodes.append(trace.nodes[node])
                motifs.append(Motif(trace, nodes, motif_index))

        # write the motifs to disk
        WriteMotifs(output_filename, motifs)

        # print statistics
        print ('Mined traces for {} in {:0.2f} seconds.'.format(trace.base_id, time.time() - start_time))
Пример #3
0
def assert_graph_equal(expect, other):
    """
    Test if the graph is equal to the given one.
    This is intended mostly for testing purposes.
    """
    __tracebackhide__ = True
    assert expect.num_vertices() == other.num_vertices(),\
        "Number of vertices differ expected %d, found %d\n%s\n%s\n\n%s\n%s" % (
            expect.num_vertices(), other.num_vertices(),
            dump_vertices(expect), dump_edges(expect),
            dump_vertices(other), dump_edges(other))
    n_vertices = expect.num_vertices()

    # check if the graphs are isomorphic
    # there must exist an isomorphism for which the vertex
    # data matches, so we check all of them.
    isomaps = subgraph_isomorphism(expect, other, subgraph=False)
    assert len(isomaps) > 0, "Graph topology differ:\n%s\n%s\n\n%s\n%s" % (
        dump_vertices(expect), dump_edges(expect), dump_vertices(other),
        dump_edges(other))
    errors = []
    for isomap in isomaps:
        try:
            # check that vertices match
            for v in expect.vertices():
                assert_vertex_equal(expect, v, other, isomap[v])
                for v_out in v.out_neighbours():
                    e = expect.edge(v, v_out)
                    f = other.edge(isomap[v], isomap[v_out])
                    assert_edge_equal(expect, e, other, f)
        except AssertionError as ex:
            errors.append((v, ex))
            continue
        else:
            break
    else:
        # all isomaps raised AssertionErrors
        msg = "Graphs do not have an isomorphism for which vertices match.\n"
        msg += "Failures for each isomorphism:\n"
        for e, iso in zip(errors, isomaps):
            msg += "Isomap: %s\n" % [iso[i] for i in range(n_vertices)]
            msg += "vertex %d: %s\n\n" % e
        msg += "Expected graph:\n%s\n%s\n\n" % (dump_vertices(expect),
                                                dump_edges(expect))
        msg += "Found graph:\n%s\n%s\n\n" % (dump_vertices(other),
                                             dump_edges(other))
        pytest.fail(msg)
def index_query_graphs(q_gs, f_sgs):
    fvectors = np.zeros((len(q_gs), len(f_sgs)))
    fv = 0
    for f_sg in f_sgs:
        qv = 0
        for q_g in q_gs:
            # subgraph isomorphisms return list of maps, i.e we check for empty if not a subgraph
            # check subgraph parameter in function below
            if len(
                    gt.subgraph_isomorphism(
                        f_sg,
                        q_g,
                        max_n=1,
                        vertex_label=(f_sg.vertex_properties["molecule"],
                                      q_g.vertex_properties["molecule"]),
                        edge_label=(f_sg.edge_properties["bond"],
                                    q_g.edge_properties["bond"]),
                        induced=False,
                        subgraph=True,
                        generator=False)) > 0:
                fvectors[qv][fv] = 1
            qv += 1
        fv += 1
    return fvectors
    query_fvectors = index_query_graphs(qgs, fgs)

    database_graphs = get_database_graph_objects(outfile)
    mapping = get_map(mapfile)

    result = {}
    for i in range(query_fvectors.shape[0]):
        result[i] = []
        for j in range(fvectors.shape[0]):
            if (np.all(query_fvectors[i] == np.logical_and(
                    fvectors[j], query_fvectors[i]))):
                x = gt.subgraph_isomorphism(
                    qgs[i],
                    database_graphs[j],
                    max_n=1,
                    vertex_label=(
                        qgs[i].vertex_properties['molecule'],
                        database_graphs[j].vertex_properties['molecule']),
                    edge_label=(qgs[i].edge_properties['bond'],
                                database_graphs[j].edge_properties['bond']),
                    induced=False,
                    subgraph=True,
                    generator=False)

                result[i].append(str(mapping[str(j)]))

    qs = len(result)
    with open(output_filename, 'w') as f:
        for i in range(qs):
            f.write('\t'.join(result[i]) + '\n')