Пример #1
0
def perform_rpq(graph,
                regex_automaton,
                start_lst,
                end_lst,
                use_tc_method_adj=False):
    query_dict = regex_automaton.to_GrB_matrix()
    graph_dict = graph.graph_dict
    tmp_graph_dict = {}
    num_vert = 0

    # Getting intersection with kronecker product
    for label in query_dict:
        tmp_graph_dict[label] = graph_dict[label].kronecker(query_dict[label])
        if num_vert == 0:
            num_vert = tmp_graph_dict[label].ncols

    # To GrB matrix
    tmp = LabelGraph()
    tmp.graph_dict = tmp_graph_dict
    tmp.num_vert = num_vert
    result = tmp.to_GrB_matrix()

    # Transform double index to single value
    def coord_to_index(coord):
        v_graph, v_regex = coord
        return v_graph * regex_automaton.num_vert + v_regex

    start_states = set(
        map(coord_to_index,
            product(range(graph.num_vert), regex_automaton.start_states)))
    final_states = set(
        map(coord_to_index,
            product(range(graph.num_vert), regex_automaton.final_states)))

    if (not use_tc_method_adj):
        reachability_matrix_ = get_transitive_closure(result).select(
            lib.GxB_NONZERO)
    else:
        reachability_matrix_ = get_transitive_closure_adj(result).select(
            lib.GxB_NONZERO)
    reachability_matrix = Matrix.sparse(BOOL, graph.num_vert, graph.num_vert)
    print("Started r_m\n")

    for v_i, v_j, _ in zip(
            *reachability_matrix_.select(lib.GxB_NONZERO).to_lists()):
        if (v_i in start_states) and (v_j in final_states):
            # Getting initial graph vertex from index in result matrix
            v_from = v_i // regex_automaton.num_vert
            v_to = v_j // regex_automaton.num_vert
            # Debug output
            reachability_matrix[v_from, v_to] = True
    return (reachability_matrix, reachability_matrix_.nvals)
Пример #2
0
def test_rpq():
    for i in range(n_tests):
        # read Regex from file
        regex_string = open(os.path.join(DATA_DIR, f'regex_{i}.txt'),
                            'r').read()
        regex_automaton = RegexAutomaton(regex_string)

        # read GrB matrix from file
        graph = LabelGraph().from_txt(os.path.join(DATA_DIR, f'graph_{i}.txt'))
        start_lst = list(range(graph.num_vert))
        end_lst = list(range(graph.num_vert))

        res_matrix = perform_rpq(graph, regex_automaton, start_lst, end_lst,
                                 True)[0]
        reachability = set()
        with open(os.path.join(DATA_DIR, f'reachability_{i}.txt'), 'r') as f:
            for line in f:
                v, to = line.split(' ')
                reachability.add((int(v), int(to)))
        assert (res_matrix.nvals == len(reachability))
        for v, to, _ in zip(*res_matrix.to_lists()):
            if res_matrix[v, to] is True:
                assert ((v, to) in reachability)
            else:
                assert ((v, to) not in reachability)
Пример #3
0
def test_cfpq_grammar_2():
    for i in range(NUM_GRAPHS):
        g_2 = GrammarCNF.from_text(TEST_GRAMMARS[1])

        for i in range(NUM_GRAPHS):
            graph = LabelGraph().from_txt(
                os.path.join(DATA_DIR, f'graph_{i}.txt'))
            result = cfpq_matrix_mult(graph, g_2)

            expected = set()
            with open(os.path.join(DATA_DIR, f'expected_{1}_{i}.txt'),
                      'r') as f:
                for line in f:
                    v, to = line.split(' ')
                    expected.add((int(v), int(to)))

            edges = set(LabelGraph.get_reachable(result))
            assert edges == expected
def test_cfpq_brackets():
    for i in range(NUM_GRAPHS):
        brackets = CFGWrapper.from_text(TEST_GRAMMARS[0])

        for i in range(NUM_GRAPHS):
            graph = LabelGraph().from_txt(
                os.path.join(DATA_DIR, f'graph_{i}.txt'))
            result = cfpq_tensor_product(graph, brackets)

            expected = set()
            with open(os.path.join(DATA_DIR, f'expected_{0}_{i}.txt'),
                      'r') as f:
                for line in f:
                    v, to = line.split(' ')
                    expected.add((int(v), int(to)))

            edges = set(LabelGraph.get_reachable(result))
            assert edges == expected
Пример #5
0
def cfpq_matrix_mult(g: LabelGraph, cfg: GrammarCNF):
    num_vert = g.num_vert
    if (num_vert == 0):
        return Matrix.sparse(BOOL, num_vert, num_vert)
    result = LabelGraph()
    start_sym = cfg.start_symbol
    result.num_vert = num_vert
    for variable in cfg.variables:
        result.graph_dict[variable] = Matrix.sparse(BOOL, num_vert, num_vert)

    for label in g.graph_dict:
        term = Terminal(label)

        result.graph_dict[term] = g.graph_dict[label].dup()
        for v_from, v_to in g.get_edges(label):
            for production in cfg.productions:
                if (len(production.body) == 1 and production.body[0] == term):
                    head = production.head
                    result.graph_dict[head][v_from, v_to] = True

    if cfg.generate_epsilon():
        for v in g.vertices:
            result.graph_dict[start_sym][v, v] = True

    matrix_changing = True
    with semiring.LOR_LAND_BOOL:
        while matrix_changing:
            matrix_changing = False
            for production in cfg.pair_productions:
                head = production.head
                body = production.body
                prev_nvals = result.graph_dict[head].nvals
                tmp = result.graph_dict[body[0]] @ result.graph_dict[body[1]]
                result.graph_dict[head] = result.graph_dict[head] + tmp
                if (prev_nvals != result.graph_dict[head].nvals):
                    matrix_changing = True

    return result.graph_dict[start_sym]
Пример #6
0
def main():
    parser = argparse.ArgumentParser(description='Basic graph DB')
    parser.add_argument(
        '--graph', required=True, type=str,
        help='path to graph file'
    )
    parser.add_argument(
        '--regex', required=True, type=str,
        help='path to regex file'
    )
    parser.add_argument(
        '--start', required=False, type=str,
        help='path to given starting vertices'
    )
    parser.add_argument(
        '--end', required=False, type=str,
        help='path to given end vertices'
    )
    args = parser.parse_args()

    # read Regex from file
    regex_string = open(args.regex, 'r').read()
    regex_automaton = RegexAutomaton(regex_string)

    # read GrB matrix from file
    graph = LabelGraph().from_txt(args.graph)

    # read start and end vertices
    start = []
    if (args.start is not None):
        with open(args.start, 'r') as f:
            for line in f:
                start.append(int(line))
    else:
        start = list(range(graph.num_vert))

    end = []
    if (args.end is not None):
        with open(args.end, 'r') as f:
            for line in f:
                end.append(int(line))
    else:
        end = list(range(graph.num_vert))

    perform_rpq(graph, regex_automaton, start, end)
Пример #7
0
def test_cfpq_empty_graph():
    brackets_cnf = GrammarCNF.from_text(TEST_GRAMMARS[0])
    result = cfpq_matrix_mult(LabelGraph(), brackets_cnf)
    expected = set()
    edges = set(LabelGraph.get_reachable(result))
    assert edges == expected
Пример #8
0
def cfpq_hellings(g: LabelGraph, cfg: GrammarCNF):
    num_vert = g.num_vert
    start_sym = cfg.start_symbol
    result = LabelGraph()
    result.num_vert = num_vert
    m = deque()

    for variable in cfg.variables:
        result.graph_dict[variable] = Matrix.sparse(BOOL, num_vert, num_vert)

    if cfg.generate_epsilon():
        for v in range(num_vert):
            result.graph_dict[start_sym][v, v] = True

    for label in g.graph_dict:
        term = Terminal(label)
        result.graph_dict[term] = g.graph_dict[label].dup()
        for v_from, v_to in g.get_edges(label):
            for production in cfg.productions:
                if (len(production.body) == 1 and production.body[0] == term):
                    head = production.head
                    result.graph_dict[head][v_from, v_to] = True

    for label in result.graph_dict:
        for i, j in result.get_edges(label):
            m.append((label, i, j))

    # 3rd step: cfpq on modified matrix
    while m:
        var, v, u = m.popleft()
        for var_left in result.graph_dict:
            for v_new, v_ in result.get_edges(var_left):
                if (v_ == v):
                    for production in cfg.pair_productions:
                        if (production.body[1] == var
                                and production.body[0] == var_left):
                            if (v_new, u) not in result.get_edges(
                                    production.head):
                                result.graph_dict[production.head][v_new,
                                                                   u] = True
                                m.append((production.head, v_new, u))
        for var_right in result.graph_dict:
            for u_, u_new in result.get_edges(var_right):
                if (u_ == u):
                    for production in cfg.pair_productions:
                        if (production.body[1] == var_right
                                and production.body[0] == var):
                            if (v, u_new) not in result.get_edges(
                                    production.head):
                                result.graph_dict[production.head][
                                    v, u_new] = True
                                m.append((production.head, v, u_new))
    return result.graph_dict[start_sym]
Пример #9
0
def cfpq_tensor_product(g: LabelGraph, cfg: GrammarCNF):
    rfa = RFA().from_cfg(cfg)
    # Resulting matrix initialization
    result = LabelGraph()
    result.num_vert = g.num_vert
    # Empty matrix case
    if (g.num_vert == 0):
        return Matrix.sparse(BOOL, g.num_vert, g.num_vert)
    result.graph_dict = {
        label: g.graph_dict[label].dup()
        for label in g.graph_dict
    }
    for label in rfa.graph_dict:
        if label not in result.graph_dict:
            result.graph_dict[label] = Matrix.sparse(BOOL, g.num_vert,
                                                     g.num_vert)
    for term in cfg.terminals:
        if term.value not in result.graph_dict:
            result.graph_dict[term.value] = Matrix.sparse(
                BOOL, g.num_vert, g.num_vert)
    # Loops for epsilon productions
    for p in cfg.productions:
        if p.body == []:
            for v in g.vertices:
                result.graph_dict[p.head.value][v, v] = True

    matrix_changing = True

    tc = None
    while matrix_changing:
        matrix_changing = False
        tmp_graph_dict = {}
        num_vert = 0
        # Getting intersection
        for label in rfa.graph_dict:
            tmp_graph_dict[label] = result.graph_dict[label].kronecker(
                rfa.graph_dict[label])
            if num_vert == 0:
                num_vert = tmp_graph_dict[label].ncols
        # To GrB matrix
        tmp = LabelGraph()
        tmp.graph_dict = tmp_graph_dict
        tmp.num_vert = num_vert
        intersection = tmp.to_GrB_matrix()

        # Transitive closure
        old_nvals = 0 if tc is None else tc.nvals
        tc = get_transitive_closure(intersection)

        for s, o in LabelGraph.get_reachable(tc):
            # Get coordinates
            s_m, s_rfa = s // rfa.num_vert, s % rfa.num_vert
            o_m, o_rfa = o // rfa.num_vert, o % rfa.num_vert

            if s_rfa in rfa.start_states and o_rfa in rfa.final_states:
                label = rfa.var_by_vertices[(s_rfa, o_rfa)]
                result.graph_dict[label][s_m, o_m] = True
        if old_nvals != tc.nvals:
            matrix_changing = True

    return result.graph_dict[cfg.start_symbol.value]
def test_cfpq_empty_graph():
    brackets_cnf = CFGWrapper.from_text(TEST_GRAMMARS[0])
    result = cfpq_tensor_product(LabelGraph(), brackets_cnf)
    expected = set()
    edges = set(LabelGraph.get_reachable(result))
    assert edges == expected
Пример #11
0
for d_name in GRAPH_DIRS:
    with open(f'query_benchmarks/{d_name}_bench.csv', 'w') as res_f:
        graph_filename = glob.glob(f"{bench_prefix}/{d_name}/*.txt")[0]
        for regex_filename in os.listdir(f'{bench_prefix}/{d_name}/regexes/'):
            if regex_filename in collected:
                print(f'{regex_filename} already done')
            else:
                print(f'Running {d_name} -- {regex_filename}...')
                regex_ = os.path.join(f'{bench_prefix}/{d_name}/regexes/', regex_filename)
                # read Regex from file
                regex_string = open(regex_, 'r').read()
                regex_automaton = RegexAutomaton(regex_string)

                # read GrB matrix from file
                graph = LabelGraph().from_txt(graph_filename)

                # benchmarking 2 methods of transitive closure
                for tc_method in range(2):
                    # read start and end vertices
                    start = list(range(graph.num_vert))
                    end = list(range(graph.num_vert))
                    times = []
                    nvals = 0
                    print(f'Running method {tc_method}...\n')
                    for i in range(5):
                        print(f'Running {i} time...\n')
                        start = time.time_ns()
                        nvals = perform_rpq(graph, regex_automaton,
                                            start, end, bool(tc_method))[1]