Exemplo n.º 1
0
def relaxed_compare(graph1, graph2):
    '''Compare two graphs, but treat untyped literals as strings'''
    # if graphs are really identical, comparison is true
    if graph1 == graph2:
        return True
    # if different number of triples, comparison is false
    elif len(graph1) != len(graph2):
        return False
    # otherwise, get the triples that are not identical in both graphs
    else:
        in_both, in_first, in_second = graph_diff(graph1, graph2)
        # compare extra triples in first graph to second
        for (s, p, o) in in_first:
            v = in_second.value(subject=s, predicate=p)
            if not o.eq(v):
                return False
            else:
                pass
        # compare extra triples in second graph to first
        for (s, p, o) in in_second:
            v = in_first.value(subject=s, predicate=p)
            if not o.eq(v):
                return False
            else:
                pass
        # if no checks have failed to this point, the graphs are equal
        return True
Exemplo n.º 2
0
def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password,
                namespace=None, skip_person=False, confirmed_orcid_id=False):
    with Store(data_path) as store:
        #Crosswalk
        (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri,
                                                       person_id=person_id, skip_person=skip_person,
                                                       person_class=person_class, confirmed_orcid_id=confirmed_orcid_id)

        graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower())
        previous_graph = Graph(namespace_manager=ns_manager)
        #Load last graph
        if os.path.exists(graph_filepath):
            log.debug("Loading previous graph %s", graph_filepath)
            previous_graph.parse(graph_filepath, format="turtle")

        #Diff against last graph
        (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph)

        #SPARQL Update
        log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id)
        sparql_delete(delete_graph, endpoint, username, password)
        sparql_insert(add_graph, endpoint, username, password)

        #Save new last graph
        log.debug("Saving new graph %s", graph_filepath)
        with codecs.open(graph_filepath, "w") as out:
            graph.serialize(format="turtle", destination=out)

        #Touch
        store.touch(orcid_id)

        return graph, add_graph, delete_graph
Exemplo n.º 3
0
def update_graph(q):
    """Update global GRAPHS dictionary with the current data for the given Q number.

    Downloads the entity data from wikidata in Turle form, parses it and then
    does a diff (to stdout) with any current version of that entity.

    FIXME -- THIS IS NOT THE RIGHT WAY TO GET THE GRAPH DATA! DON'T KNOW HOW TO GET
    CURRENT RDF.
    """
    global GRAPHS
    try:
        r = requests.get(url=WIKIDATA_ENTITY_BASE + q, headers={'Accept': 'text/turtle'})
        g_new = Graph()
        g_new.parse(data=r.text, format='turtle')
    except:
        print("Update for %s failed" % (q))
        return
    if q in GRAPHS:
        g_old = GRAPHS[q]
        print("%s: old, %d -> new, %d triples" % (q, len(g_old), len(g_new)))
        in_both, in_old, in_new = graph_diff(g_old, g_new)
        print("%s: < %d, == %d, > %d" % (q, len(in_old), len(in_both), len(in_new)))
        for s, p, o in in_old:
            print("< %s %s %s" % (str(s), str(p), str(o)))
        for s, p, o in in_new:
            print("> %s %s %s" % (str(s), str(p), str(o)))
    else:
        print("%s: new, %d triples" % (q, len(g_new)))
    GRAPHS[q] = g_new
Exemplo n.º 4
0
def test_compare_triples():
    for mime, fext in MIME_TYPES.items():
        dump_path = path.join(DUMP_DIR, path.basename(mime))

        for url in URLs:
            if six.PY2:
                fname = '%s.%s' % (path.basename(urlparse.urlparse(url).path), fext)
            else:
                fname = '%s.%s' % (path.basename(urlparse(url).path), fext)

            fname = path.join(dump_path, fname)

            req = Request(url)
            req.add_header('Accept', mime)
            res = urlopen(req)

            g_fdp.parse(data=res.read(), format=mime)
            g_dump.parse(fname, format=mime)

            both, first, second = graph_diff(g_fdp, g_dump)
            n_first = len(first)
            # n_second = len(second)
            # n_both = len(both)

            assert_equals(
               n_first, 0, '{} triple(s) different from reference:\n\n{}===\n{}\n'.format(
                  n_first, first.serialize(format='turtle'), second.serialize(format='turtle')))
Exemplo n.º 5
0
def merge_graphs(g: Graph,
                 g2: Graph,
                 map_uri_from=None,
                 map_uri_to=None,
                 args: Optional[AttribDict] = None):
    """Merge two graphs (g2 into g), but taking care to replace certain properties that are known to take a single value, and mapping URIs where needed"""
    i = 0
    remapped = 0
    removed = 0
    both, first, second = graph_diff(g, g2)
    for (s, p, o) in second:
        s = handle_rel_uri(s, args.baseuri)
        p = handle_rel_uri(p, args.baseuri)
        o = handle_rel_uri(o, args.baseuri, prop=p)
        if map_uri_from and map_uri_to:
            if s == URIRef(map_uri_from):
                s = URIRef(map_uri_to)
                remapped += 1
            if o == URIRef(map_uri_from):
                remapped += 1
                o = URIRef(map_uri_to)
        if p in SINGULAR_PROPERTIES:
            #remove existing triples in the graph
            for (s2, p2, o2) in g.triples((s, p, None)):
                g.remove((s2, p2, o2))
                removed += 1
        g.add((s, p, o))
        i += 1
    l = len(g2)
    print(
        f"    Merged {i} of {l} triples, removed {removed} superseded values, remapped {remapped} uris",
        file=sys.stderr)
Exemplo n.º 6
0
    def testB(self):
        """Curiously, this one passes, even before the fix in issue 151"""

        g = rdflib.Graph()
        g.add((rdflib.URIRef("urn:a"), rdflib.URIRef("urn:p"), rdflib.Literal(u'\xe9')))

        diff = graph_diff(g, g)
Exemplo n.º 7
0
    def testA(self):
        """with bnode"""
        g = rdflib.Graph()
        g.add(
            (rdflib.BNode(), rdflib.URIRef("urn:p"), rdflib.Literal(u"\xe9")))

        diff = graph_diff(g, g)
Exemplo n.º 8
0
def test_compare_triples():
    for mime, fext in MIME_TYPES.items():
        dump_path = path.join(DUMP_DIR, path.basename(mime))

        for url in URLs:
            if six.PY2:
                fname = '%s.%s' % (path.basename(
                    urlparse.urlparse(url).path), fext)
            else:
                fname = '%s.%s' % (path.basename(urlparse(url).path), fext)

            fname = path.join(dump_path, fname)

            req = Request(url)
            req.add_header('Accept', mime)
            res = urlopen(req)

            g_fdp.parse(data=res.read(), format=mime)
            g_dump.parse(fname, format=mime)

            both, first, second = graph_diff(g_fdp, g_dump)
            n_first = len(first)
            # n_second = len(second)
            # n_both = len(both)

            assert_equals(
                n_first, 0,
                '{} triple(s) different from reference:\n\n{}===\n{}\n'.format(
                    n_first, first.serialize(format='turtle'),
                    second.serialize(format='turtle')))
Exemplo n.º 9
0
 def rdf_comparator(self, old_data: str, new_data: str) -> bool:
     old_graph = Graph()
     new_graph = Graph()
     old_graph.parse(data=old_data, format="turtle")
     new_graph.parse(data=new_data, format="turtle")
     old_iso = to_isomorphic(old_graph)
     # Remove the metadata specific triples
     for t in list(old_iso.triples((None, MMNS.generation_date, None))):
         old_iso.remove(t)
     new_iso = to_isomorphic(new_graph)
     for t in list(new_iso.triples((None, MMNS.generation_date, None))):
         new_iso.remove(t)
     # Graph compare takes a Looong time
     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
     # if old_iso != new_iso:
     #     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
     old_len = len(list(in_old))
     new_len = len(list(in_new))
     if old_len or new_len:
         if old_len:
             print("----- Old graph only -----")
             self._print_triples(in_old)
         if new_len:
             print("----- New Grapn Only -----")
             self._print_triples(in_new)
         self.assertTrue(False, "RDF file mismatch")
         return False
     return True
Exemplo n.º 10
0
 def check_graph(graph,type):
     """check if update or create by comparision with live graph"""
     if not graph:
         return False
     result=graph.query("""SELECT DISTINCT ?subject WHERE {?subject ?b ?c}""")
     """for every subject of changeset graph try to find other triples in DBpedia live to differ between add/update/delete"""
     events=[]
     for subject in result:
         resource=subject[0]
         if(resource.find(DBpedia.DBPEDIAURL)==0): #apply only for resources on server with DBPEDIA URL
             live_resource=DBpedia.liveize(resource) #online version of dbpedia live have different URIs as changeset URIs
             onl_graph=rdflib.Graph()
         #try:
             onl_graph.parse(live_resource)
             onl_iso = to_isomorphic(onl_graph)
             loc_iso = to_isomorphic(graph)
             in_both, in_onl, in_loc = graph_diff(onl_iso,loc_iso)
             event_type="notupdated"
             event=None
             for res_of_diff, b, c in in_onl:
                 # if live graph has more triples about resource it should be an update
                 if(str(live_resource)==str(res_of_diff)): 
                     event_type="update"
                     break;
             if(event_type=="notupdated" and type=="added"):
                 event = ResourceChange(uri=str(live_resource), changetype="CREATE")
             elif(event_type=="update" and type=="added"):
                 event = ResourceChange(uri=str(live_resource), changetype="UPDATE")
             else:
                 event = ResourceChange(uri=str(live_resource), changetype="DELETE")
             events.append(event)
         #except Exception as e:
             #print "Error parsing %s: %s" % (live_resource,e)
             #self.notify_observers(event)
     return events
def turtle(test):
    g = Graph()

    try:
        base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='turtle')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = Graph()
            res.parse(test.result, format='nt')

            if verbose:
                both, first, second = graph_diff(g,res)
                if not first and not second: return
                print("Diff:")
                #print "%d triples in both"%len(both)
                print("Turtle Only:")
                for t in first:
                    print(t)

                print("--------------------")
                print("NT Only")
                for t in second:
                    print(t)
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'


    except:
        if test.syntax:
            raise
Exemplo n.º 12
0
def calcPrecisionRecall2(cano_dbp, cano_lift, size_bgp_dbp, size_bgp_lift):
    if cano_dbp == cano_lift:  # If ground truth and deduction is equal then precision and recall are 1
        precision = 1
        recall = 1
    else:
        in_both, in_first, in_second = graph_diff(cano_dbp, cano_lift)
        b = len(in_both)  # b has the number of well deduced triple patterns
        for s, p, o in in_first:
            for ss, pp, oo in in_second:
                if (
                    (isinstance(s, Variable) and isinstance(ss, Variable))
                        and p == pp and o == oo
                ) or (s == ss and p == pp and
                      (isinstance(o, Variable) and isinstance(oo, Variable)) or
                      (s == ss and
                       (isinstance(p, Variable) and isinstance(pp, Variable))
                       and o == oo)):
                    b += 1  # b is incremented with triple patterns whose variables were canonized differently because the size of the BGP and that have two things in common (subject, predicate, or object);
                    break
        try:
            precision = b / size_bgp_lift  #How many deduced triple patterns are relevant
        except ZeroDivisionError:
            precision = 0
            print("Division by zero in precision with size_bgp_lift")
        try:
            recall = b / size_bgp_dbp  # How many relevant triple patterns are deduced
        except ZeroDivisionError:
            recall = 0
            print("Division by zero in recall with size_bgp_dbp")
    return (precision, recall)
Exemplo n.º 13
0
def turtle(test):
    g = Graph()

    try:
        base = 'http://www.w3.org/2013/TurtleTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='turtle')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = Graph()
            res.parse(test.result, format='nt')

            if verbose:
                both, first, second = graph_diff(g,res)
                if not first and not second: return
                print "Diff:"
                #print "%d triples in both"%len(both)
                print "Turtle Only:"
                for t in first:
                    print t

                print "--------------------"
                print "NT Only"
                for t in second:
                    print t
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'


    except:
        if test.syntax:
            raise
Exemplo n.º 14
0
    def test_subsets(self) -> None:
        """
        This test verifies that `graph_diff` returns the correct values
        for two graphs, `g0` and `g1` where the triples in `g0` is a
        subset of the triples in `g1`.

        The expectation is that graph_diff reports that there are no
        triples only in `g0`, and that there are triples that occur in both
        `g0` and `g1`, and that there are triples only in `g1`.
        """
        g0_ts: _TripleSetT = set()
        bnode = BNode()
        g0_ts.update({
            (bnode, FOAF.name, Literal("Golan Trevize")),
            (bnode, RDF.type, FOAF.Person),
        })
        g0 = Graph()
        g0 += g0_ts

        g1_ts: _TripleSetT = set()
        bnode = BNode()
        g1_ts.update({
            *g0_ts,
            (bnode, FOAF.name, Literal("Janov Pelorat")),
            (bnode, RDF.type, FOAF.Person),
        })
        g1 = Graph()
        g1 += g1_ts

        result = graph_diff(g0, g1)
        in_both, in_first, in_second = GraphHelper.triple_sets(result)
        self.assertFalse(in_first)
        self.assertTrue(in_second)
        self.assertTrue(in_both)
Exemplo n.º 15
0
def load_single(orcid_id, person_uri, person_id, person_class, data_path, endpoint, username, password,
                namespace=None, skip_person=False, confirmed_orcid_id=False):
    with Store(data_path) as store:
        #Crosswalk
        (graph, profile, person_uri) = default_execute(orcid_id, namespace=namespace, person_uri=person_uri,
                                                       person_id=person_id, skip_person=skip_person,
                                                       person_class=person_class, confirmed_orcid_id=confirmed_orcid_id)

        graph_filepath = os.path.join(data_path, "%s.ttl" % orcid_id.lower())
        previous_graph = Graph(namespace_manager=ns_manager)
        #Load last graph
        if os.path.exists(graph_filepath):
            log.debug("Loading previous graph %s", graph_filepath)
            previous_graph.parse(graph_filepath, format="turtle")

        #Diff against last graph
        (both_graph, delete_graph, add_graph) = graph_diff(previous_graph, graph)

        #SPARQL Update
        log.info("Adding %s, deleting %s triples for %s", len(add_graph), len(delete_graph), orcid_id)
        sparql_delete(delete_graph, endpoint, username, password)
        sparql_insert(add_graph, endpoint, username, password)

        #Save new last graph
        log.debug("Saving new graph %s", graph_filepath)
        with codecs.open(graph_filepath, "w") as out:
            graph.serialize(format="turtle", destination=out)

        #Touch
        store.touch(orcid_id)

        return graph, add_graph, delete_graph
Exemplo n.º 16
0
    def assertEqualGraphs(self, want, got, exact=True):
        """Assert that two RDF graphs are identical (isomorphic).

        :param want: The graph as expected, as an
                     :py:class:`~rdflib.graph.Graph` object or the filename
                     of a serialized graph
        :param got: The actual graph, as an :py:class:`~rdflib.graph.Graph`
                    object or the filename of a serialized graph
        :param exact: Whether to require that the graphs are exactly alike
                      (True) or only if all triples in `want` exists in `got`
                      (False)
        :type  exact: bool
        """

        def _loadgraph(filename):
            g = rdflib.Graph()
            # we must read the data ourself, providing a non-ascii
            # filename to Graph.parse fails deep in rdflib internals
            format = guess_format(filename)
            if format == "nt":
                data = util.readfile(filename, "r", encoding="utf-8")
            else:
                data = util.readfile(filename, "rb")

            g.parse(data=data, format=format)
            return g

        if not isinstance(want, rdflib.Graph):
            want = _loadgraph(want)
        if not isinstance(got, rdflib.Graph):
            got = _loadgraph(got)

        (in_both, in_first, in_second) = graph_diff(want, got)
        msg = ""
        if in_first:
            for (s, p, o) in sorted(in_first, key=lambda t: (t[0], t[1], t[2])):
                msg += "- %s %s %s\n" % (s.n3(), p.n3(), o.n3())
        if (exact and in_second) or in_first:
            for (s, p, o) in sorted(in_second, key=lambda t: (t[0], t[1], t[2])):
                msg += "+ %s %s %s\n" % (s.n3(), p.n3(), o.n3())
        if ((len(in_first) > 0) or (len(in_second) > 0 and exact)):
            if len(in_first) > 0:
                msg = "%s expected triples were not found\n" % len(in_first) + msg
            if len(in_second) > 0:
                msg = "%s unexpected triples were found\n" % len(in_second) + msg
            ntdiff = True
            if ntdiff:
                msg = "%r != %r\n" % (want, got) + msg
            else: 
                import difflib
                d = difflib.unified_diff(want.serialize(format="turtle").decode("utf-8").split("\n"),
                                         got.serialize(format="turtle").decode("utf-8").split("\n"), n=10000)
                msg = msg + "\n".join(d)
#            print("=======WANT=======")
#            print(want.serialize(format="n3"))
#            print("=======GOT========")
#            print(got.serialize(format="n3"))
#            sys.exit(0)
            return self.fail(msg)
Exemplo n.º 17
0
    def testB(self):
        """Curiously, this one passes, even before the fix in issue 151"""

        g = rdflib.Graph()
        g.add((rdflib.URIRef("urn:a"), rdflib.URIRef("urn:p"),
               rdflib.Literal("\xe9")))

        diff = graph_diff(g, g)
Exemplo n.º 18
0
def _dump_diff(g1, g2):
    in_both, in_first, in_second = graph_diff(g1, g2)
    print("\nin both:")
    _dump_turtle_sorted(in_both)
    print("\nin first:")
    _dump_turtle_sorted(in_first)
    print("\nin second:")
    _dump_turtle_sorted(in_second)
Exemplo n.º 19
0
def _dump_diff(g1: Graph, g2: Graph) -> None:
    in_both, in_first, in_second = graph_diff(g1, g2)
    print("\nin both:")
    _dump_turtle(in_both)
    print("\nin first:")
    _dump_turtle(in_first)
    print("\nin second:")
    _dump_turtle(in_second)
Exemplo n.º 20
0
    def compare_full_graphs(self,
                            gt_graph,
                            other_graph,
                            owl,
                            include=False,
                            raise_now=False,
                            reconcile=True,
                            to_ignore=None):
        ''' Compare gt_graph and other_graph '''
        my_exception = ""

        # We reconcile gt_graph with other_graph
        if reconcile:
            gt_graph, other_graph = self._reconcile_graphs(
                gt_graph, other_graph)

        in_both, in_gt, in_other = graph_diff(gt_graph, other_graph)

        exc_missing = list()

        for s, p, o in in_gt:
            # If there is a corresponding s,p check if
            # there is an equivalent o
            for o_other in in_other.objects(s, p):
                same_json_array, close_float, same_str = \
                            self._same_json_or_float(o, o_other)
                if same_json_array or close_float or same_str:
                    # Remove equivalent o from other as well
                    in_other.remove((s, p, o_other))
                    break
            else:
                if (p not in to_ignore):
                    exc_missing.append(
                        "\nMissing :\t '%s %s %s'" %
                        (self.get_readable_name(owl, gt_graph, s),
                         self.get_readable_name(owl, gt_graph, p),
                         self.get_readable_name(owl, gt_graph, o)))

        exc_added = list()
        if not include:
            for s, p, o in in_other:
                if p not in to_ignore:
                    exc_added.append(
                        "\nAdded :\t '%s %s %s'" %
                        (self.get_readable_name(owl, other_graph, s),
                         self.get_readable_name(owl, other_graph, p),
                         self.get_readable_name(owl, other_graph, o)))

        my_exception += "".join(sorted(exc_missing) + sorted(exc_added))

        if raise_now and my_exception:
            raise Exception(my_exception)

        return my_exception
Exemplo n.º 21
0
def test_graph_diff(g1, g2):
    in_both, only_in_first, only_in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2))
    only_in_first.namespace_manager = g1.namespace_manager
    only_in_second.namespace_manager = g2.namespace_manager
    ok_(len(only_in_second) == 0, f"""
<<<
{only_in_first.serialize(format='n3').decode('utf-8')}
===
{only_in_second.serialize(format='n3').decode('utf-8')}
>>>
""")
Exemplo n.º 22
0
 def compare_graphs(self):
     """
     Name: compare_graphs
     Description: Iterate through RDF graphs and populate subject, predicate, object lists.
     Parameters: None.
     Return: None.
     """
     print("Loading graphs for comparison...")
     graph = self.__graph_parse__(self.custom_gloss, self.gloss_format)
     graph_tool = self.__graph_parse__(self.tool_output, self.tool_format)
     in_both , in_graph, in_graph_tool = compare.graph_diff(graph, graph_tool)
Exemplo n.º 23
0
    def complete_new_graph(cls, service, uri, parameters, new_graph,
                           resource=None):
        """I implement :meth:`ILocalResource.complete_new_graph`.

        If new_graph contains only a wikitext property, then all corresponding
        triples are generated.

        If new_graph contains other triples and either
        no wikitext *or* the same wikitext as previously,
        then the wikitext is updated to reflect the triples.

        If new_graph contains other triples and a wikitext different from
        the previous one, then the wikitext and the triples *have* to be
        consistent, or a InvalidDataError will be raised.
        """
        assert resource is not None # topics can only be created by PUT
        wikitexts = list(new_graph.objects(uri, SW.wikitext))
        if len(wikitexts) > 1:
            # leave it to WithCardinalityMixin to raise an error
            return

        if len(wikitexts) == 0:
            new_wikitext = None
        else:
            new_wikitext = unicode(wikitexts[0])

        if new_wikitext is not None  and  len(new_graph) == 1:
            # wikitext only: parse other triples from it
            wikitext_to_triples(resource, new_wikitext, into=new_graph)
            return

        if new_wikitext is not None  and  new_wikitext != resource.wikitext:
            # wikitext *and* triples were changed: they must be consistent
            from_text = wikitext_to_triples(resource, new_wikitext)
            from_text.add((uri, SW.wikitext, wikitexts[0]))
            if not isomorphic(from_text, new_graph):
                raise InvalidDataError("wikitext and triples are inconsistent")
            else:
                return

        # new_wikitext is either None or equal to old wikitext,
        # so we focus on the triples of new_graph
        if new_wikitext is None:
            old_wikitext = resource.get_state().value(uri, SW.wikitext)
            new_graph.add((uri, SW.wikitext, old_wikitext))
            new_wikitext = unicode(old_wikitext)
        _, added, removed = graph_diff(new_graph, resource.get_state())
        if added:
            new_wikitext = add_triples(resource, new_wikitext, added)
        if removed:
            new_wikitext = ban_triples(resource, new_wikitext, removed)
        if added or removed:
            new_graph.set((uri, SW.wikitext, Literal(new_wikitext)))
Exemplo n.º 24
0
    def do_algorithm(self, source_content: str,
                     target_content: str) -> List[SyncOperation]:
        source_g = Graph().parse(format='turtle', data=source_content)
        target_g = Graph().parse(format='turtle', data=target_content)
        source_g_iso = to_isomorphic(source_g)
        target_g_iso = to_isomorphic(target_g)
        _, removals_graph, additions_graph = graph_diff(
            source_g_iso, target_g_iso)

        additions_ops = self._create_add_ops_from(additions_graph)
        removals_ops = self._create_remove_ops_from(removals_graph)
        return removals_ops + additions_ops
Exemplo n.º 25
0
    def compare_full_graphs(self, gt_graph, other_graph, owl, include=False,
                            raise_now=False, reconcile=True, to_ignore=None):
        ''' Compare gt_graph and other_graph '''
        my_exception = ""

        # We reconcile gt_graph with other_graph
        if reconcile:
            gt_graph, other_graph = self._reconcile_graphs(
                gt_graph, other_graph)

        in_both, in_gt, in_other = graph_diff(gt_graph, other_graph)

        exc_missing = list()

        for s, p, o in in_gt:
            # If there is a corresponding s,p check if
            # there is an equivalent o
            for o_other in in_other.objects(s,  p):
                same_json_array, close_float, same_str = \
                            self._same_json_or_float(o, o_other)
                if same_json_array or close_float or same_str:
                    # Remove equivalent o from other as well
                    in_other.remove((s, p, o_other))
                    break
            else:
                if (p not in to_ignore):
                    exc_missing.append(
                        "\nMissing :\t '%s %s %s'"
                        % (
                            self.get_readable_name(owl, gt_graph, s),
                            self.get_readable_name(owl, gt_graph, p),
                            self.get_readable_name(owl, gt_graph, o)
                        ))

        exc_added = list()
        if not include:
            for s, p, o in in_other:
                if p not in to_ignore:
                    exc_added.append(
                        "\nAdded :\t '%s %s %s'"
                        % (
                            self.get_readable_name(owl, other_graph, s),
                            self.get_readable_name(owl, other_graph, p),
                            self.get_readable_name(owl, other_graph, o)
                        ))

        my_exception += "".join(sorted(exc_missing) + sorted(exc_added))

        if raise_now and my_exception:
            raise Exception(my_exception)

        return my_exception
def process_file(file):        
    root, filename = os.path.split(file)
    rdf_file = os.path.splitext(filename)[0]+'.nt'
    rdf_path = root + '/' + rdf_file
    
    clean_file(file)
    
    try:
        xml = etree.parse(file)
        rdf = transform(xml)
    
        g = Graph()
        g.parse(StringInputSource(rdf),"xml")   
    
    
        # If the graph already exists then we want to generate some diffs before overwriting it: these can be used generating changesets when uploading to a datastore
        existing = False
        if os.path.exists(rdf_path):
            print "Comparing graphs"
            go = Graph()
            go.parse(rdf_path,format='nt')
            existing = True
        elif os.path.exists(root+'/archive/'+rdf_file):
            print "Comparing with archived graph"
            go = Graph()
            go.parse(root+'/archive/'+rdf_file,format='nt')
            existing = True
        
        if existing:
            both, old, new = graph_diff(go,g)
            if(len(old)):
                # old.serialize(rdf_path+'_old',format='nt') #Uncomment if you want a non-reified version of the statements
                cs = BatchChangeSet()
                cs.setCreatorName('IATI Update Scripts')
                cs.setChangeReason('Statements to remove from'+file)
                for (s,p,o) in old.triples((None, None, None)):
                    cs.remove(s,p,o)
                cs.getGraph().serialize(rdf_path+'_csremove',format='nt')
            if(len(new)):
                # new.serialize(rdf_path+'_new',format='nt') #Uncomment if you want a non-reified version of the statements
                cs = BatchChangeSet()
                cs.setCreatorName('IATI Update Scripts')
                cs.setChangeReason('Statements to add from '+file)
                for (s,p,o) in new.triples((None, None, None)):
                    cs.remove(s,p,o)
                cs.getGraph().serialize(rdf_path+'_csadd',format='nt')
            			
        g.serialize(rdf_path,format='nt')
        
    except Exception, e:
        print "Error processing file "+ file
        print e
Exemplo n.º 27
0
def compare_graphs(actual, expected):
    actual_iso = to_isomorphic(actual)
    expected_iso = to_isomorphic(expected)

    if actual_iso != expected_iso:
        _, in_first, in_second = graph_diff(actual_iso, expected_iso)
        print("The actual and expected graphs differ")
        print("----- Contents of actual graph not in expected graph -----")
        dump_ttl_sorted(in_first)
        print("----- Contents of expected graph not in actual graph -----")
        dump_ttl_sorted(in_second)

    assert actual_iso == expected_iso
Exemplo n.º 28
0
 def sync_named_graph(self, name, incoming, size=BATCH_SIZE):
     """
     Pass in incoming data and sync with existing data in
     named graph.
     """
     existing = self.get_existing(name)
     both, adds, deletes = graph_diff(incoming, existing)
     del both
     added = self.bulk_add(name, adds, size=size)
     logger.info("Adding {} triples to {}.".format(added, name))
     removed = self.bulk_remove(name, deletes, size=size)
     logger.info("Removed {} triples from {}.".format(removed, name))
     return added, removed
Exemplo n.º 29
0
def compare_rdf(expected: Union[Graph, str],
                actual: Union[Graph, str],
                fmt: Optional[str] = "turtle") -> Optional[str]:
    """
    Compare expected to actual, returning a string if there is a difference
    :param expected: expected RDF. Can be Graph, file name, uri or text
    :param actual: actual RDF. Can be Graph, file name, uri or text
    :param fmt: RDF format
    :return: None if they match else summary of difference
    """
    def rem_metadata(g: Graph) -> IsomorphicGraph:
        # Remove list declarations from target
        for s in g.subjects(RDF.type, RDF.List):
            g.remove((s, RDF.type, RDF.List))
        g_iso = to_isomorphic(g)
        return g_iso

    expected_graph = to_graph(expected, fmt)
    expected_isomorphic = rem_metadata(expected_graph)
    actual_graph = to_graph(actual, fmt)
    actual_isomorphic = rem_metadata(actual_graph)

    # Graph compare takes a Looong time
    in_both, in_old, in_new = graph_diff(expected_isomorphic,
                                         actual_isomorphic)
    # if old_iso != new_iso:
    #     in_both, in_old, in_new = graph_diff(old_iso, new_iso)

    old_len = len(list(in_old))
    if old_len:
        for t in triples_to_ignore:
            if t in in_old:
                print(f"WARNING: {t} removed from expected graph")
                in_old.remove(t)
        old_len = len(in_old)
    new_len = len(list(in_new))
    if old_len and new_len:
        fix_subject_bnodes(in_old, in_new)
        old_len = len(in_old)
        new_len = len(in_new)
    if old_len or new_len:
        txt = StringIO()
        with redirect_stdout(txt):
            print("----- Missing Triples -----")
            if old_len:
                print_triples(in_old)
            print("----- Added Triples -----")
            if new_len:
                print_triples(in_new)
        return txt.getvalue()
    return None
Exemplo n.º 30
0
def trig(test):
    g = ConjunctiveGraph()

    try:
        base = "http://www.w3.org/2013/TriGTests/" + split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format="trig")
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result:  # eval test
            res = ConjunctiveGraph()
            res.parse(test.result, format="nquads")

            if verbose:

                both, first, second = graph_diff(g, res)
                if not first and not second:
                    return

                print("===============================")
                print("TriG")
                print(g.serialize(format="nquads"))
                print("===============================")
                print("NQuads")
                print(res.serialize(format="nquads"))
                print("===============================")

                print("Diff:")
                # print "%d triples in both"%len(both)
                print("TriG Only:")
                for t in first:
                    print(t)

                print("--------------------")
                print("NQuads Only")
                for t in second:
                    print(t)
                raise Exception("Graphs do not match!")

            assert isomorphic(
                g, res
            ), "graphs must be the same, expected\n%s\n, got\n%s" % (
                g.serialize(),
                res.serialize(),
            )

    except:
        if test.syntax:
            raise
Exemplo n.º 31
0
def compare_rdf(expected: Union[Graph, str],
                actual: Union[Graph, str],
                fmt: Optional[str] = "turtle") -> Optional[str]:
    """
    Compare expected to actual, returning a string if there is a difference
    :param expected: expected RDF. Can be Graph, file name, uri or text
    :param actual: actual RDF. Can be Graph, file name, uri or text
    :param fmt: RDF format
    :return: None if they match else summary of difference
    """
    def rem_metadata(g: Graph) -> IsomorphicGraph:
        # Remove list declarations from target
        for s in g.subjects(RDF.type, RDF.List):
            g.remove((s, RDF.type, RDF.List))
        for t in g:
            if t[1] in (LINKML.generation_date, LINKML.source_file_date,
                        LINKML.source_file_size, TYPE.generation_date,
                        TYPE.source_file_date, TYPE.source_file_size):
                g.remove(t)
        g_iso = to_isomorphic(g)
        return g_iso

    # Bypass compare if settings have turned it off
    if SKIP_RDF_COMPARE:
        print(f"tests/utils/compare_rdf.py: {SKIP_RDF_COMPARE_REASON}")
        return None

    expected_graph = to_graph(expected, fmt)
    expected_isomorphic = rem_metadata(expected_graph)
    actual_graph = to_graph(actual, fmt)
    actual_isomorphic = rem_metadata(actual_graph)

    # Graph compare takes a Looong time
    in_both, in_old, in_new = graph_diff(expected_isomorphic,
                                         actual_isomorphic)
    # if old_iso != new_iso:
    #     in_both, in_old, in_new = graph_diff(old_iso, new_iso)
    old_len = len(list(in_old))
    new_len = len(list(in_new))
    if old_len or new_len:
        txt = StringIO()
        with redirect_stdout(txt):
            print("----- Missing Triples -----")
            if old_len:
                print_triples(in_old)
            print("----- Added Triples -----")
            if new_len:
                print_triples(in_new)
        return txt.getvalue()
    return None
Exemplo n.º 32
0
def main():
    values = ap.parse_args()
    format1 = guess_format(values.file1)
    format2 = guess_format(values.file2)
    g1: Graph = Graph().parse(values.file1, format=format1)
    g2: Graph = Graph().parse(values.file2, format=format2)
    iso1: IsomorphicGraph = to_isomorphic(g1)
    iso2: IsomorphicGraph = to_isomorphic(g2)
    _in_both, in_first, in_second = graph_diff(iso1, iso2)
    print(f"Only in {values.file1}")
    dump_nt_sorted(in_first)

    print(f"Only in {values.file2}")
    dump_nt_sorted(in_second)
Exemplo n.º 33
0
def post_updates(named_graph, graph):
    """
    Function for posting the data.
    """

    #Define the VIVO store
    query_endpoint = os.environ['VIVO_URL'] + '/api/sparqlQuery'
    update_endpoint = os.environ['VIVO_URL'] + '/api/sparqlUpdate'
    vstore = SyncVStore(
                os.environ['VIVO_EMAIL'],
                os.environ['VIVO_PASSWORD']
            )
    vstore.open((query_endpoint, update_endpoint))

    existing = vstore.get_existing(named_graph)

    # Get the URIs for statements that will be additions.
    changed_uris = set([u for u in graph.subjects()])

    # Get the statements from the deletes that apply to this
    # incremental update. This will be the posted deletes.
    remove_graph = Graph()
    # Remove all triples related to the changed uris.
    for curi in changed_uris:
        for pred, obj in existing.predicate_objects(subject=curi):
            remove_graph.add((curi, pred, obj))

    # Diff
    both, adds, deletes = graph_diff(graph, remove_graph)

    num_additions = len(adds)
    num_remove = len(deletes)

    if (num_additions == 0) and (num_remove == 0):
        logger.info("No updates to {}.".format(named_graph))
    else:
        #print adds.serialize(format='n3')
        #print '-' * 10
        #print deletes.serialize(format='n3')

        if num_additions > 0:
            logger.info("Will add {} triples to {}.".format(num_additions, named_graph))
            vstore.bulk_add(named_graph, adds)

        if num_remove > 0:
            logger.info("Will remove {} triples from {}.".format(num_remove, named_graph))
            vstore.bulk_remove(named_graph, deletes)

    return True
Exemplo n.º 34
0
    def recheck(self) -> bool:
        """ Recompute the differences between the graphs

        :returns: True if changes were detected
        """
        if not self.passed and self.changed:
            self.expected_graph.changed = False
            self.actual_graph.changed = False
            if not self.known_match:
                cur_lens = (self.both_len, self.old_len, self.new_len)
                self.in_both, self.in_old, self.in_new = graph_diff(
                    self.expected_graph, self.actual_graph)
                self._upd_lens()
                self.passed = not self.diffs_exist
                return cur_lens != (self.both_len, self.old_len, self.new_len)
        return False
Exemplo n.º 35
0
def graphdiff(first, second):
    """
    Diff between graph instances, should be replaced/included in quit diff
    """
    from rdflib.compare import to_isomorphic, graph_diff

    diffs = OrderedDict()
    iris = set()

    if first is not None and isinstance(first, InMemoryAggregatedGraph):
        first_identifiers = list((g.identifier for g in first.graphs()))
        iris = iris.union(first_identifiers)
    if second is not None and isinstance(second, InMemoryAggregatedGraph):
        second_identifiers = list((g.identifier for g in second.graphs()))
        iris = iris.union(second_identifiers)

    for iri in sorted(list(iris)):
        changes = diffs.get(iri, [])

        if (first is not None and iri in first_identifiers) and (
                second is not None and iri in second_identifiers):
            g1 = first.get_context(iri)
            g2 = second.get_context(iri)
            in_both, in_first, in_second = graph_diff(to_isomorphic(g1),
                                                      to_isomorphic(g2))

            if len(in_second) > 0:
                changes.append(
                    ('additions', ((s, p, o) for s, p, o in in_second)))
            if len(in_first) > 0:
                changes.append(
                    ('removals', ((s, p, o) for s, p, o in in_first)))
        elif first is not None and iri in first_identifiers:
            changes.append(
                ('removals', ((s, p, o)
                              for s, p, o in first.get_context(iri))))
        elif second is not None and iri in second_identifiers:
            changes.append(
                ('additions', ((s, p, o)
                               for s, p, o in second.get_context(iri))))
        else:
            continue

        diffs[iri] = changes
    return diffs
Exemplo n.º 36
0
def trig(test):
    g = ConjunctiveGraph()

    try:
        base = 'http://www.w3.org/2013/TriGTests/'+split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='trig')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result: # eval test
            res = ConjunctiveGraph()
            res.parse(test.result, format='nquads')

            if verbose:


                both, first, second = graph_diff(g,res)
                if not first and not second: return

                print('===============================')
                print('TriG')
                print(g.serialize(format='nquads'))
                print('===============================')
                print('NQuads')
                print(res.serialize(format='nquads'))
                print('===============================')

                print("Diff:")
                #print "%d triples in both"%len(both)
                print("TriG Only:")
                for t in first:
                    print(t)

                print("--------------------")
                print("NQuads Only")
                for t in second:
                    print(t)
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'

    except:
        if test.syntax:
            raise
Exemplo n.º 37
0
def trig(test):
    g = ConjunctiveGraph()

    try:
        base = 'http://www.w3.org/2013/TriGTests/' + split_uri(test.action)[1]

        g.parse(test.action, publicID=base, format='trig')
        if not test.syntax:
            raise AssertionError("Input shouldn't have parsed!")

        if test.result:  # eval test
            res = ConjunctiveGraph()
            res.parse(test.result, format='nquads')

            if verbose:

                both, first, second = graph_diff(g, res)
                if not first and not second: return

                print '==============================='
                print 'TriG'
                print g.serialize(format='nquads')
                print '==============================='
                print 'NQuads'
                print res.serialize(format='nquads')
                print '==============================='

                print "Diff:"
                #print "%d triples in both"%len(both)
                print "TriG Only:"
                for t in first:
                    print t

                print "--------------------"
                print "NQuads Only"
                for t in second:
                    print t
                raise Exception('Graphs do not match!')

            assert isomorphic(g, res), 'graphs must be the same'

    except:
        if test.syntax:
            raise
Exemplo n.º 38
0
def turtle_equal(a, b):
    """
    Given two strings representing turtle-encoded RDF,
    check whether they represent the same graph.
    """
    ga = Graph().parse(format='turtle', data=a)
    for x, y, z in ga:
        print((x, y, z))
    gb = Graph().parse(format='turtle', data=b)
    eq = isomorphic(ga, gb)
    if not eq:
        both, first, second = graph_diff(ga, gb)
        print("Present in both:")
        print(both)
        print("Present in first:")
        print(first)
        print("Present in second:")
        print(second)
    return eq
Exemplo n.º 39
0
def find_diff(g_rdf, g0_rdf):
    graphs_equal = True
    in_both, in_first, in_second = graph_diff(g_rdf, g0_rdf)
    g1 = sorted(in_first.serialize(format='nt').splitlines())[1:]
    g2 = sorted(in_second.serialize(format='nt').splitlines())[1:]
    # Compare literals
    if len(g1) != len(g2):
        graphs_equal = False
    matching_indices = [[], []]
    for idx in range(len(g1)):
        g1_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g1[idx]),
                                                   format='nt'))[0]
        match_found = False
        for idx2 in range(len(g2)):
            if idx2 in matching_indices[1]:
                continue
            g2_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g2[idx2]),
                                                       format='nt'))[0]
            try:
                all_match = all([g1_stmt[i].eq(g2_stmt[i]) for i in range(3)])
            except TypeError as e:
                #logger.info(e, g1_stmt, g2_stmt)
                all_match = False
            if all_match:
                matching_indices[0].append(idx)
                matching_indices[1].append(idx2)
                match_found = True
                break
        if not match_found:
            graphs_equal = False
    in_first2 = rl.ConjunctiveGraph()
    for idx in range(len(g1)):
        if idx in matching_indices[0]:
            in_both.parse(BytesIO(g1[idx]), format='nt')
        else:
            in_first2.parse(BytesIO(g1[idx]), format='nt')
    in_second2 = rl.ConjunctiveGraph()
    for idx in range(len(g2)):
        if not idx in matching_indices[1]:
            in_second2.parse(BytesIO(g2[idx]), format='nt')
    #logger.info(in_first2)
    #logger.info(in_second2)
    return graphs_equal, in_both, in_first2, in_second2
Exemplo n.º 40
0
Arquivo: test.py Projeto: niklasl/oort
def run_grit_test(rdfxml_fpath, grit_fpath):
    rdfxml = etree.parse(rdfxml_fpath)
    speced_grit = etree.parse(grit_fpath)
    actual_grit = GRIT_XSLT(rdfxml)
    assert canonical_str(actual_grit) == canonical_str(speced_grit), \
            "Grit from <%s> doesn't equal specified result in <%s>" % (
                    rdfxml_fpath, grit_fpath)
    if not isograph:
        return
    gleaned_rdf = GRDDL_XSLT(speced_grit)
    gleaned_graph = isograph(gleaned_rdf)
    ref_graph = isograph(rdfxml)
    diff = lambda: "\nOnly in gleaned:%s\nOnly in spec:%s\n" % tuple(
                 "\n".join(
                    sorted(g.serialize(format='nt').splitlines()) )
                for g in graph_diff(gleaned_graph, ref_graph)[1:] )
    assert gleaned_graph == ref_graph, \
            "RDF from GRDDL:ed <%s> doesn't equal original RDF. Diff: %s" % (
                    grit_fpath, diff())
Exemplo n.º 41
0
def find_diff(g_rdf, g0_rdf):
    graphs_equal = True
    in_both, in_first, in_second = graph_diff(g_rdf, g0_rdf)
    g1 = sorted(in_first.serialize(format='nt').splitlines())[1:]
    g2 = sorted(in_second.serialize(format='nt').splitlines())[1:]
    # Compare literals
    if len(g1) != len(g2):
        graphs_equal = False
    matching_indices = [[], []]
    for idx in range(len(g1)):
        g1_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g1[idx]),
                                                   format='nt'))[0]
        match_found = False
        for idx2 in range(len(g2)):
            if idx2 in matching_indices[1]:
                continue
            g2_stmt = list(rl.ConjunctiveGraph().parse(BytesIO(g2[idx2]),
                                                       format='nt'))[0]
            try:
                all_match = all([g1_stmt[i].eq(g2_stmt[i]) for i in range(3)])
            except TypeError as e:
                #logger.info(e, g1_stmt, g2_stmt)
                all_match = False
            if all_match:
                matching_indices[0].append(idx)
                matching_indices[1].append(idx2)
                match_found = True
                break
        if not match_found:
            graphs_equal = False
    in_first2 = rl.ConjunctiveGraph()
    for idx in range(len(g1)):
        if idx in matching_indices[0]:
            in_both.parse(BytesIO(g1[idx]), format='nt')
        else:
            in_first2.parse(BytesIO(g1[idx]), format='nt')
    in_second2 = rl.ConjunctiveGraph()
    for idx in range(len(g2)):
        if not idx in matching_indices[1]:
            in_second2.parse(BytesIO(g2[idx]), format='nt')
    #logger.info(in_first2)
    #logger.info(in_second2)
    return graphs_equal, in_both, in_first2, in_second2
Exemplo n.º 42
0
def graphdiff(first, second):
    """
    Diff between graph instances, should be replaced/included in quit diff
    """
    from rdflib.compare import to_isomorphic, graph_diff

    diffs = OrderedDict()
    iris = set()

    if first is not None and isinstance(first, InMemoryAggregatedGraph):
        first_identifiers = list((g.identifier for g in first.graphs()))
        iris = iris.union(first_identifiers)
    if second is not None and isinstance(second, InMemoryAggregatedGraph):
        second_identifiers = list((g.identifier for g in second.graphs()))
        iris = iris.union(second_identifiers)

    for iri in sorted(list(iris)):
        changes = diffs.get(iri, [])

        if (
            first is not None and iri in first_identifiers
        ) and (
            second is not None and iri in second_identifiers
        ):
            g1 = first.get_context(iri)
            g2 = second.get_context(iri)
            in_both, in_first, in_second = graph_diff(to_isomorphic(g1), to_isomorphic(g2))

            if len(in_second) > 0:
                changes.append(('additions', ((s, p, o) for s, p, o in in_second)))
            if len(in_first) > 0:
                changes.append(('removals', ((s, p, o) for s, p, o in in_first)))
        elif first is not None and iri in first_identifiers:
            changes.append(('removals', ((s, p, o) for s, p, o in first.get_context(iri))))
        elif second is not None and iri in second_identifiers:
            changes.append(('additions', ((s, p, o) for s, p, o in second.get_context(iri))))
        else:
            continue

        diffs[iri] = changes
    return diffs
Exemplo n.º 43
0
    def difftool(self, local, remote, merged, base, diffFormat='sparql'):

        if local:
            self.local = self.readIsomorphicGraph(local)

        if remote:
            self.remote = self.readIsomorphicGraph(remote)

        if merged:
            self.merged = self.readIsomorphicGraph(merged)

        if base:
            self.base = self.readIsomorphicGraph(base)

        add = {}
        remove = {}

        graphUris = set(self.local.keys()) | set(self.remote.keys())

        for uri in graphUris:
            if uri in self.local.keys() and uri in self.remote.keys():
                localGraph = self.local[uri]
                remoteGraph = self.remote[uri]
                in_both, in_first, in_second = compare.graph_diff(localGraph, remoteGraph)
                add[uri] = in_second
                remove[uri] = in_first
            elif uri in self.local.keys():
                remove[uri] = self.local[uri]
            elif uri in self.remote.keys():
                add[uri] = self.remote[uri]
            else:
                True

        module = diffFormat.title() + "Diff"
        diff = getattr(import_module('quit_diff.serializer.' + module), module)

        diffSerializer = diff()
        print(diffSerializer.serialize(add, remove))
Exemplo n.º 44
0
def compute_added_and_removed(new_graph, old_graph, added=None, removed=None):
    """I compute the graphs of added triples and of removed triples.

    For overridden versions of `check_new_graph` that require `added` and
    `removed` to be set, I should be called as::

        added, removed = self._compute_added_and_removed(
            new_graph, old_graph, added, removed)

    If `added` and `removed` are not None, this method will simply return
    them, preventing the overhead of computing them again.

    However, it is important to call this function *before* the call to
    ``super(...).check_new_graph``, because the result is not transmitted
    to the calling function. So to ensure that the computation happens only
    once, it must be performed at the highest level that needs it.
    """
    if added is None:
        assert removed is None
        _, added, removed = graph_diff(new_graph, old_graph)
    else:
        assert removed is not None
    return added, removed
Exemplo n.º 45
0
    def assertEqualGraphs(self, want, got, exact=True):
        """Assert that two RDF graphs are identical (isomorphic).

        :param want: The graph as expected, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph
        :param got: The actual graph, as an :py:class:`~rdflib.graph.Graph` object or the filename of a serialized graph
        :param exact: Whether to require that the graphs are exactly alike (True) or only if all triples in want exists in got (False)
        :type  exact: bool
        """

        def _loadgraph(filename):
            g = rdflib.Graph()
            # we must read the data ourself, providing a non-ascii
            # filename to Graph.parse fails deep in rdflib internals
            g.parse(data=util.readfile(filename, "rb"),
                    format=guess_format(filename))
            return g

        if not isinstance(want, rdflib.Graph):
            want = _loadgraph(want)
        if not isinstance(got, rdflib.Graph):
            got = _loadgraph(got)

        (in_both, in_first, in_second) = graph_diff(want, got)
        msg = ""
        if in_first:
            for (s, p, o) in sorted(in_first, key=lambda t: (t[0], t[1], t[2])):
                msg += "- %s %s %s\n" % (s.n3(), p.n3(), o.n3())
        if (exact and in_second) or in_first:
            for (s, p, o) in sorted(in_second, key=lambda t: (t[0], t[1], t[2])):
                msg += "+ %s %s %s\n" % (s.n3(), p.n3(), o.n3())
        if ((len(in_first) > 0) or (len(in_second) > 0 and exact)):
            if len(in_first) > 0:
                msg = "%s expected triples were not found\n" % len(in_first) + msg
            if len(in_second) > 0:
                msg = "%s unexpected triples were found\n" % len(in_second) + msg
            msg = "%r != %r\n" % (want, got) + msg
            return self.fail(msg)
Exemplo n.º 46
0
    if not args:
        print "USAGE: %s FILE [rdf...]" % p.basename(cmd)
        print "Where FILE is a local copy of <https://lagen.nu/1976:725>. Get it by doing e.g.:"
        print "  $ /usr/bin/curl -sk 'https://lagen.nu/1976:725' > /tmp/sfs-1976_725.xhtml"
        print
        print "If additional local rdf files are supplied, a diff of the " \
            "extracted data and the supplied data is output (instead of just the " \
            "extracted data)."
        exit()
    docpath = args[0]

    graph = fsdoc_to_graph(docpath)

    from rdfextras.tools.pathutils import guess_format
    cmp_graph = Graph()
    for fpath in args[1:]:
        cmp_graph.load(fpath, format=guess_format(fpath))

    if cmp_graph:
        from rdflib.compare import graph_diff
        in_both, in_first, in_second = graph_diff(graph, cmp_graph)
        print "# %s new statements:" % len(in_first)
        for pfx, uri in graph.namespaces():
            in_first.bind(pfx, uri)
        print in_first.serialize(format='n3')

    else:
        print "# Nothing to compare against. New RDF is:"
        print graph.serialize(format='n3')

Exemplo n.º 47
0
#!/usr/bin/env python
from os import path as p, popen
from rdflib import Graph, compare

scriptpath = lambda lpath: p.join(p.dirname(__file__), lpath)

real = Graph().parse(popen("sh %s" % scriptpath("run.sh")), format='n3')
expected = Graph().parse(scriptpath("expected.ttl"), format='n3')

diff = compare.graph_diff(expected, real)
assert not diff[1] and not diff[2], "Expected: %s Got: %s" % tuple(
        g.serialize(format='n3') for g in diff[1:])
print "Ok."

        #If the old data has been uploaded then it will be in /archive/
        #If it has not yet been uploaded it will be in the main path. Prefer the main path version...
        rdf_path = exec_path + '/online.nt'

        #Now check if either really does exist, and make the changesets
        if os.path.exists(rdf_path):
            print "Comparing with old data"
            og = Graph()
            try:
                og.parse(rdf_path,format='nt')
            except Exception, e:
                print "Failed reading archived online data"
                print e

            print "Running graph diff - new data against archived data"
            both, old, new = graph_diff(og,ng)
            if(len(old)):
                cs = BatchChangeSet()
                cs.setCreatorName('R4D Update Scripts')
                cs.setChangeReason('Statements to remove from'+dirList[0])
                for (s,p,o) in old.triples((None, None, None)):
                    cs.remove(s,p,o)
                print "Saving triples for removal to changeset"
                cs.getGraph().serialize(rdf_path+'_csremove',format='nt')
            if(len(new)):
                cs = BatchChangeSet()
                cs.setCreatorName('IATI Update Scripts')
                cs.setChangeReason('Statements to add from '+dirList[0])
                for (s,p,o) in new.triples((None, None, None)):
                    cs.remove(s,p,o)
                print "Saving new triples to changeset"
Exemplo n.º 49
0
def my_graph_diff(graph1, graph2):
    """Compares graph2 to graph1 and highlights everything that changed.
    Colored if pygments available"""

    # quick fix for wrong type
    if not type(graph1) == type(graph2) == rdflib.Graph:
        if type(graph1) == rdflib.ConjunctiveGraph:
            g1contexts = list(graph1.contexts())
            assert len(g1contexts) == 1
            graph1 = g1contexts[0]
        if type(graph2) == rdflib.ConjunctiveGraph:
            g2contexts = list(graph2.contexts())
            assert len(g2contexts) == 1
            graph2 = g2contexts[0]

    # Return if both graphs are isomorphic
    iso1 = compare.to_isomorphic(graph1)
    iso2 = compare.to_isomorphic(graph2)

    if graph1.identifier == graph2.identifier:
        str_bit = u"The 2 '%s' Graphs" % graph1.identifier
    else:
        str_bit = (u"Graphs '%s' and '%s'"
                   % (graph1.identifier, graph2.identifier))

    if iso1 == iso2:
        logger.debug(u"%s are isomorphic" % str_bit)
        return

    print(u"Differences between %s." % str_bit)

    in_both, in_first, in_second = compare.graph_diff(iso1, iso2)

    def dump_nt_sorted(g):
        return sorted(g.serialize(format='nt').splitlines())

    sorted_first = dump_nt_sorted(in_first)
    sorted_second = dump_nt_sorted(in_second)

    import difflib

    diff = difflib.unified_diff(
        sorted_first,
        sorted_second,
        u'Original',
        u'Current',
        lineterm=''
    )

    try:
        from pygments import highlight
        from pygments.formatters import terminal
        from pygments.lexers import web

        lexer = web.XmlLexer()
        formatter = terminal.TerminalFormatter()
        print(highlight(u'\n'.join(diff), lexer, formatter))
    except ImportError:
        logger.info("Install pygments for colored diffs")
        print(u'\n'.join(diff))
    except UnicodeDecodeError:
        print(u"Only in first", unicode(sorted_first))
        print(u"Only in second", unicode(sorted_second))
Exemplo n.º 50
0
from rdflib import Graph
from rdflib.compare import to_isomorphic, graph_diff
import sys

if len(sys.argv)>=3:
   F1 = sys.argv[1]
   F2= sys.argv[2] 
else:
   F1 = "/home/barry/Downloads/instance.ttl"
   F2 = "/home/barry/Downloads/t2.ttl"
g1 = Graph()
g1.parse(F1, format="turtle")

g2 = Graph()
g2.parse(F2, format="turtle")

iso1 = to_isomorphic(g1)
iso2 = to_isomorphic(g2)

in_both, in_first, in_second = graph_diff(iso1, iso2)

if len(sys.argv)==4:
   print(in_first.serialize(format="n3").decode('utf-8'))
else:
   print(in_second.serialize(format="n3").decode('utf-8'))

Exemplo n.º 51
0
    def testA(self):
        """with bnode"""
        g = rdflib.Graph()
        g.add((rdflib.BNode(), rdflib.URIRef("urn:p"), rdflib.Literal(u'\xe9')))

        diff = graph_diff(g, g)
Exemplo n.º 52
0
# Compare two releases of schema.org

from rdflib import Graph
from rdflib.compare import to_isomorphic, graph_diff

if __name__ == '__main__':

	sdons = 'http://schema.org/'
        g1 = Graph()
        g2 = Graph()
        p = Graph()

	#       first = str(sys.argv[1])
	#	second = str(sys.argv[2]
	first = 'data/releases/2.2/schema.rdfa'
	second = 'data/releases/3.0/schema.rdfa'

        g1.parse(first, format='rdfa', pgraph=p)#, charset="utf8")
        g2.parse(second, format='rdfa', pgraph=p)#, charset="utf8")

    	in_both, in_first, in_second = graph_diff(g1, g2)

	in_both.bind('schema', sdons)
	in_first.bind('schema', sdons)
	in_second.bind('schema', sdons)

	print in_both.serialize(format="n3")

#	print in_first.serialize(format="n3")
#	print in_second.serialize(format="n3")