Exemplo n.º 1
0
 def partition_map(partition: List[List[int]]) -> Tuple[RDFGraph, ...]:
     rval = []
     for part in partition:
         if len(part) == 1 and part[0] >= t_list_len:
             rval.append(RDFGraph())
         else:
             rval.append(RDFGraph([t_list[e] for e in part if e < t_list_len]))
     return tuple(rval)
 def test_rdf_graph(self):
     x = RDFGraph([(EX.issue1, EX.count, Literal(17))])
     self.assertEqual(1, len(x))
     x = RDFGraph([(EX.issue1, EX.count, Literal(17)),
                   (EX.issue1, EX.count, Literal(17))])
     self.assertEqual(1, len(x))
     x = RDFGraph([(EX.issue1, EX.count, Literal(17)),
                   RDFTriple((EX.issue1, EX.count, Literal(17)))])
     self.assertEqual(1, len(x))
     _, g = setup_test(None, rdf_1)
     x = RDFGraph(g)
     self.assertEqual(rdf_out, str(x))
Exemplo n.º 3
0
 def test_large_partition(self):
     # The reason for this test is to be certain that we get generators all the way through.  This test
     # will take forever if, somewhere in the process, we actually realize the whole partition
     g = Graph()
     g.parse(data=rdf_header, format="turtle")
     for i in range(25):
         g.add((EX['s' + str(i)], RDF.type, EX.thing))
     rdfg = RDFGraph(g)
     part1 = partition_t(rdfg, 20)
     # Skip to the 100th element in the partition
     [next(part1) for _ in range(100)]
     self.assertEqual(
         [{
             'http://schema.example/s0', 'http://schema.example/s1',
             'http://schema.example/s10', 'http://schema.example/s11',
             'http://schema.example/s12', 'http://schema.example/s13'
         }, {'http://schema.example/s14'}, {'http://schema.example/s15'},
          {'http://schema.example/s16'}, {'http://schema.example/s17'},
          {'http://schema.example/s18'}, {'http://schema.example/s19'},
          {'http://schema.example/s2'}, {'http://schema.example/s20'},
          {'http://schema.example/s21'}, {'http://schema.example/s22'},
          {'http://schema.example/s23'}, {'http://schema.example/s24'},
          {'http://schema.example/s3'}, {'http://schema.example/s4'},
          {'http://schema.example/s9'}, {'http://schema.example/s5'},
          {'http://schema.example/s8'}, {'http://schema.example/s6'},
          {'http://schema.example/s7'}], [{str(list(e)[0])
                                           for e in part}
                                          for part in next(part1)])
     part2 = partition_t(rdfg, 1)
     self.assertEqual(1, sum(1 for _ in part2))
     part3 = partition_t(rdfg, 25)
     self.assertEqual(1, sum(1 for _ in part3))
Exemplo n.º 4
0
    def __init__(self, cntxt: Context, T: RDFGraph, expr: ShExJ.EachOf) -> None:
        """ Create an evaluator for expr and T

        :param cntxt: evaluation context
        :param T: List of triples to evaluate
        :param expr: expression to evaluate against
        """
        # tripleExpr = Union["EachOf", "OneOf", "TripleConstraint", tripleExprLabel]
        #
        # For each tripleExpr in expressions deteremine the set of applicable predicates and their
        # corresponding triples.
        #
        #       Case 1: predicate occurs in exactly one expression and expression references exactly one predicate
        #                   Evaluate and return false if fail
        #       Case 2: predicate occurs two or more expressions and all expressions reference exactly one predicate
        #                   Permute predicate over expressions until a passing condition is found
        #       Case 3: expression references two or more predicates and all referenced predicates occur only once
        #                   Evaluate with set of all predicates and return false if fail
        #       Case 4: predicate occurs in two or more expressions and at least one of the referenced expressions
        self.expressions: List[ShExJ.tripleExpr] = []

        self.predicate_to_expression_nums: Dict[IRIREF, List[int]] = {}
        self.expression_num_predicates: List[Set[IRIREF]] = []
        self.predicate_graph: Dict[IRIREF, RDFGraph] = {}

        for e in expr.expressions:
            expr_num = len(self.expressions)
            self.expressions.append(e)
            self.expression_num_predicates.append(predicates_in_tripleexpr(e, cntxt))
            for p in self.expression_num_predicates[expr_num]:
                self.predicate_to_expression_nums.setdefault(p, []).append(expr_num)
                if p not in self.predicate_graph:
                    self.predicate_graph[p] = RDFGraph([t for t in T if str(t.p) == str(p)])
def matchesCardinality(cntxt: Context,
                       T: RDFGraph,
                       expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel],
                       c: DebugContext,
                       extras: Optional[Set[URIRef]] = None) -> bool:
    """ Evaluate cardinality expression

    expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and
    T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn,
    matches(Tn, expr, m) by the remaining rules in this list.
    """
    # TODO: Cardinality defaults into spec
    min_ = expr.min if expr.min is not None else 1
    max_ = expr.max if expr.max is not None else 1

    cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}"
    if c.debug and (min_ != 0 or len(T) != 0):
        print(f"{cardinality_text} matching {len(T)} triples")
    if min_ == 0 and len(T) == 0:
        return True
    if isinstance(expr, ShExJ.TripleConstraint):
        if len(T) < min_:
            if len(T) > 0:
                _fail_triples(cntxt, T)
                cntxt.fail_reason = f"   {len(T)} triples less than {cardinality_text}"
            else:
                cntxt.fail_reason = f"   No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}"
            return False

        # Don't include extras in the cardinality check
        if extras:
            must_match = RDFGraph([
                t for t in T if t.p not in extras
            ])  # The set of things NOT consumed in extra
        else:
            must_match = T
        if 0 <= max_ < len(must_match):
            # Don't do a cardinality check
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples exceeds max {cardinality_text}"
            return False
        elif len(must_match):
            return all(
                matchesTripleConstraint(cntxt, t, expr) for t in must_match)
        else:
            return any(matchesTripleConstraint(cntxt, t, expr) for t in T)
    else:
        for partition in _partitions(T, min_, max_):
            if all(matchesExpr(cntxt, part, expr) for part in partition):
                return True
        if min_ != 1 or max_ != 1:
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples cannot be partitioned into {cardinality_text} passing groups"
        return False
Exemplo n.º 6
0
    def evaluate(self, cntxt: Context) -> bool:
        from pyshex.shape_expressions_language.p5_5_shapes_and_triple_expressions import matches

        for p, expr_nums in self.predicate_to_expression_nums.items():
            if all(len(self.expression_num_predicates[expr_num]) == 1 for expr_num in expr_nums):
                if len(expr_nums) == 1:
                    # Case 1: unique predicate/expression combo
                    if not matches(cntxt, self.predicate_graph[p], self.expressions[expr_nums[0]]):
                        return False
                else:
                    # Case 2: several expressions match exactly one predicate -- split the triples
                    successful_combination = False
                    for partition in partition_t(self.predicate_graph[p], len(expr_nums)):
                        if all(matches(cntxt, t, self.expressions[e_num]) for t, e_num in zip(partition, expr_nums)):
                            successful_combination = True
                            break
                    if not successful_combination:
                        return False

        for expr_num in range(0, len(self.expression_num_predicates)):
            predicates = self.expression_num_predicates[expr_num]
            if len(predicates) > 1:

                # Case 3: Expression matches multiple predicates but each predicate referenced only once
                # Build a composite graph of all triples and evaluate it
                target = RDFGraph()
                for p in predicates:
                    if len(self.predicate_to_expression_nums[p]) == 1:
                        target.update(self.predicate_graph[p])
                if target and not matches(cntxt, target, self.expressions[expr_num]):
                    return False

                for p in predicates:
                    if len(self.predicate_to_expression_nums[p]) > 1:
                        predicates, expressions = self._predicate_closure(p)
                        target = RDFGraph()
                        for predicate in predicates:
                            target.update(self.predicate_graph[predicate])
                        successful_combination = True
                        for partition in partition_t(target, len(expressions)):
                            if all(matches(cntxt, t, self.expressions[e_num])
                                   for t, e_num in zip(partition, expressions)):
                                successful_combination = True
                                break
                        if not successful_combination:
                            return False
        return True
def valid_remainder(cntxt: Context, n: Node, matchables: RDFGraph,
                    S: ShExJ.Shape) -> bool:
    """
    Let **outs** be the arcsOut in remainder: `outs = remainder ∩ arcsOut(G, n)`.

    Let **matchables** be the triples in outs whose predicate appears in a TripleConstraint in `expression`. If
    `expression` is absent, matchables = Ø (the empty set).

    * There is no triple in **matchables** which matches a TripleConstraint in expression

    * There is no triple in **matchables** whose predicate does not appear in extra.

    * closed is false or unmatchables is empty

    :param cntxt: evaluation context
    :param n: focus node
    :param matchables: non-matched triples
    :param S: Shape being evaluated
    :return: True if remainder is valid
    """
    # TODO: Update this and satisfies to address the new algorithm
    # Let **outs** be the arcsOut in remainder: `outs = remainder ∩ arcsOut(G, n)`.
    outs = arcsOut(cntxt.graph, n).intersection(matchables)

    # predicates that in a TripleConstraint in `expression`
    predicates = predicates_in_expression(S, cntxt)

    # Let **matchables** be the triples in outs whose predicate appears in predicates. If
    # `expression` is absent, matchables = Ø (the empty set).
    matchables = RDFGraph(t for t in outs if str(t.p) in predicates)

    # There is no triple in **matchables** which matches a TripleConstraint in expression
    if matchables and S.expression is not None:
        tes = triple_constraints_in_expression(S.expression, cntxt)
        for m in matchables:
            if any(matchesTripleConstraint(cntxt, m, te) for te in tes):
                return False

    # There is no triple in **matchables** whose predicate does not appear in extra.
    extras = {iriref_to_uriref(e)
              for e in S.extra} if S.extra is not None else {}
    if any(t.p not in extras for t in matchables):
        return False

    # closed is false or unmatchables is empty.
    return not S.closed.val or not bool(outs - matchables)
Exemplo n.º 8
0
 def test_partition_t(self):
     t1 = RDFTriple((EX.Alice, EX.shoeSize, Literal(30,
                                                    datatype=XSD.integer)))
     t2 = RDFTriple((EX.Alice, RDF.type, EX.Teacher))
     t3 = RDFTriple((EX.Alice, RDF.type, EX.Person))
     t4 = RDFTriple((EX.SomeHat, EX.owner, EX.Alice))
     t5 = RDFTriple((EX.TheMoon, EX.madeOf, EX.GreenCheese))
     g = Graph()
     g0 = RDFGraph(g)
     self.assertEqual([(RDFGraph(), RDFGraph())], list(partition_t(g0, 2)))
     g.add(t1)
     g1 = RDFGraph(g)
     self.assertEqual([(g1, g0), (g0, g1)], list(partition_t(g1, 2)))
     g.add(t2)
     g2 = RDFGraph(g)
     self.assertEqual([(g1, RDFGraph((t2, ))), (RDFGraph((t2, )), g1),
                       (g2, g0), (g0, g2)], list(partition_t(g2, 2)))
Exemplo n.º 9
0
    def test_partition_2(self):
        # Len(partition) == 2**len(graph)
        g = Graph()
        grdf = RDFGraph(g)
        x11 = list(partition_2(
            grdf))  # partition_2 is a generator - you can only do it once
        self.assertEqual(1, len(x11))
        self.assertEqual([(RDFGraph(), RDFGraph())], x11)
        x12 = list(partition_t(grdf, 2))
        self.assertEqual(x11, x12)

        triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .""")
        g = Graph()
        g.parse(data=triples, format="turtle")
        grdf = RDFGraph(g)
        x21 = list(partition_2(grdf))
        self.assertEqual(2, len(x21))
        x22 = list(partition_t(grdf, 2))
        self.assertEqual(x21, x22)

        # Two elements give 4 partitions ((e1, e2), ()), ((e1), (e2)), ((e2), (e1)), ((), (e1, e2))
        triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .
                <Alice> a ex:Teacher .""")
        g = Graph()
        g.parse(data=triples, format="turtle")
        x = list(partition_2(RDFGraph(g)))
        self.assertEqual(4, len(x))

        triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .
                        <Alice> a ex:Teacher .
                        <Alice> a ex:Person .""")
        g = Graph()
        g.parse(data=triples, format="turtle")
        self.assertEqual(8, len(list(partition_2(RDFGraph(g)))))

        triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .
                        <Alice> a ex:Teacher .
                        <Alice> a ex:Person .
                        <Alice> a ex:Fool .""")
        g = Graph()
        g.parse(data=triples, format="turtle")
        self.assertEqual(16, len(list(partition_2(RDFGraph(g)))))
Exemplo n.º 10
0
def arcsIn(G: Graph, n: Node) -> RDFGraph:
    """ arcsIn(G, n) is the set of triples in a graph G with object n. """
    return RDFGraph(G.triples((None, None, n)))
Exemplo n.º 11
0
def arcsOut(G: Graph, n: Node) -> RDFGraph:
    """ arcsOut(G, n) is the set of triples in a graph G with subject n. """
    return RDFGraph(G.triples((n, None, None)))
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape,
                   c: DebugContext) -> bool:
    """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_

    For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if:

    * `neigh(G, n)` can be partitioned into two sets matched and remainder such that
      `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`.

    :param n: focus node
    :param S: Shape to be satisfied
    :param cntxt: Evaluation context
    :param c: Debug context
    :return: true iff `satisfies(n, S, cntxt)`
    """

    # Recursion detection.  If start_evaluating returns a boolean value, this is the assumed result of the shape
    # evaluation.  If it returns None, then an initial evaluation is needed
    rslt = cntxt.start_evaluating(n, S)

    if rslt is None:
        cntxt.evaluate_stack.append((n, S.id))
        predicates = directed_predicates_in_expression(S, cntxt)
        matchables = RDFGraph()

        # Note: The code below does an "over-slurp" for the sake of expediency.  If you are interested in
        #       getting EXACTLY the needed triples, set cntxt.over_slurp to false
        if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp:
            with slurper(cntxt, n, S) as g:
                _ = g.triples((n, None, None))

        for predicate, direction in predicates.items():
            with slurper(cntxt, n, S) as g:
                matchables.add_triples(
                    g.triples((n if direction.is_fwd else None,
                               iriref_to_uriref(predicate),
                               n if direction.is_rev else None)))

        if c.debug:
            print(
                c.i(1, "predicates:",
                    sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys())))
            print(
                c.i(1, "matchables:",
                    sorted(cntxt.n3_mapper.n3(m) for m in matchables)))
            print()

        if S.closed:
            # TODO: Is this working correctly on reverse items?
            non_matchables = RDFGraph(
                [t for t in arcsOut(cntxt.graph, n) if t not in matchables])
            if len(non_matchables):
                cntxt.fail_reason = "Unmatched triples in CLOSED shape:"
                cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables)
                if c.debug:
                    print(
                        c.i(
                            0, "<--- Satisfies shape " + c.d() + " FAIL - ",
                            len(non_matchables) +
                            " non-matching triples on a closed shape"))
                    print(c.i(1, "", list(non_matchables)))
                    print()
                return False

        # Evaluate the actual expression.  Start assuming everything matches...
        if S.expression:
            if matches(cntxt, matchables, S.expression):
                rslt = True
            else:
                extras = {iriref_to_uriref(e)
                          for e in S.extra} if S.extra is not None else {}
                if len(extras):
                    permutable_matchables = RDFGraph(
                        [t for t in matchables if t.p in extras])
                    non_permutable_matchables = RDFGraph([
                        t for t in matchables if t not in permutable_matchables
                    ])
                    if c.debug:
                        print(
                            c.i(1,
                                "Complete match failed -- evaluating extras",
                                list(extras)))
                    for matched, remainder in partition_2(
                            permutable_matchables):
                        permutation = non_permutable_matchables.union(matched)
                        if matches(cntxt, permutation, S.expression):
                            rslt = True
                            break
                rslt = rslt or False
        else:
            rslt = True  # Empty shape

        # If an assumption was made and the result doesn't match the assumption, switch directions and try again
        done, consistent = cntxt.done_evaluating(n, S, rslt)
        if not done:
            rslt = satisfiesShape(cntxt, n, S)
        rslt = rslt and consistent

        cntxt.evaluate_stack.pop()
    return rslt