예제 #1
0
    def __do_three_step_update(self, row, column_name, uri, path, data_update):
        """
        Given the current state in the update, and a path length three column_def, add, change or delete intermediate
        and end objects as necessary to perform the requested update
        :param row: row number of the update.  For printing
        :param column_name: column_name of the update.  For printing
        :param uri: uri of the entity at the head of the path
        :param path: the column definition
        :param data_update: the data provided for the update
        :return: Changes in the update_graph
        """
        from rdflib import RDF, RDFS, Literal, URIRef
        from vivopump import new_uri, get_step_triples

        step_def = path[0]
        step_uris = [
            o for s, p, o in
            get_step_triples(self.update_graph, uri, column_name, step_def,
                             self.query_parms, self.verbose)
        ]

        if len(step_uris) == 0:

            # VIVO has no values for first intermediate, so add new intermediate and do a two step update on it

            step_uri = URIRef(new_uri(self.query_parms))
            self.update_graph.add(
                (uri, step_def['predicate']['ref'], step_uri))
            self.update_graph.add(
                (step_uri, RDF.type, step_def['object']['type']))
            if 'label' in step_def['object']:
                self.update_graph.add(
                    (step_uri, RDFS.label,
                     Literal(step_def['object']['label'],
                             datatype=step_def['object'].get('datatype', None),
                             lang=step_def['object'].get('lang', None))))
            self.__do_two_step_update(row, column_name, step_uri, path[1:],
                                      data_update)

        elif step_def['predicate']['single'] == True:

            #   VIVO has 1 or more values for first intermediate, so we need to see if the predicate
            #   is expected to be single

            step_uri = step_uris[0]
            if len(step_uris) > 1:
                print "WARNING: Single predicate", path[0]['object']['name'], "has", len(step_uris), "values: ", \
                    step_uris, "using", step_uri
            self.__do_two_step_update(row, column_name, step_uri, path[1:],
                                      data_update)
        return None
예제 #2
0
파일: pump.py 프로젝트: atloiaco/vivo-pump
    def __do_three_step_update(self, row, column_name, uri, path, data_update):
        """
        Given the current state in the update, and a path length three column_def, add, change or delete intermediate
        and end objects as necessary to perform the requested update
        :param row: row number of the update.  For printing
        :param column_name: column_name of the update.  For printing
        :param uri: uri of the entity at the head of the path
        :param path: the column definition
        :param data_update: the data provided for the update
        :return: Changes in the update_graph
        """
        from rdflib import RDF, RDFS, Literal, URIRef
        from vivopump import new_uri, get_step_triples

        step_def = path[0]
        step_uris = [o for s, p, o in get_step_triples(self.update_graph, uri, column_name, step_def, self.query_parms,
                                                       self.verbose)]

        if len(step_uris) == 0:

            # VIVO has no values for first intermediate, so add new intermediate and do a two step update on it

            step_uri = URIRef(new_uri(self.query_parms))
            self.update_graph.add((uri, step_def['predicate']['ref'], step_uri))
            self.update_graph.add((step_uri, RDF.type, step_def['object']['type']))
            if 'label' in step_def['object']:
                self.update_graph.add((step_uri, RDFS.label, Literal(step_def['object']['label'],
                                                                     datatype=step_def['object'].get('datatype', None),
                                                                     lang=step_def['object'].get('lang', None))))
            self.__do_two_step_update(row, column_name, step_uri, path[1:], data_update)

        elif step_def['predicate']['single']  == True:

            #   VIVO has 1 or more values for first intermediate, so we need to see if the predicate
            #   is expected to be single

            step_uri = step_uris[0]
            if len(step_uris) > 1:
                print "WARNING: Single predicate", path[0]['object']['name'], "has", len(step_uris), "values: ", \
                    step_uris, "using", step_uri
            self.__do_two_step_update(row, column_name, step_uri, path[1:], data_update)
        return None
예제 #3
0
    def __do_two_step_update(self, row, column_name, uri, column_def,
                             data_update):
        """
        In a two step update, identify intermediate entity that might need to be created, and end path objects that
        might not yet exist or might need to be created.  Cases are:

                              Predicate Single   Predicate Multiple
        VIVO has 0 values     Add, do_the        Add intermediate, do_the
        VIVO has 1 value         do_the          Set compare through intermediate
        VIVO has >1 value     WARNING, do_the    Set compare through intermediate

        :param: row: current row in spreadsheet
        :param: column_name: name of current column in spreadsheet
        :param: uri: uri in VIVO of the current entity
        :param: column_def: the column def for the current column
        :param: data_update: the column_value
        :return: alterations in update graph
        """
        from rdflib import RDF, RDFS, Literal, URIRef
        from vivopump import new_uri, get_step_triples, prepare_column_values

        step_def = column_def[0]

        #   Determine the add set (which intermediates point to column values that are not yet in VIVO
        #       For each element in the add set, construct the intermediate and call __do_the_update to
        #       construct the leaf
        #   Determine the sub set (which intermediates point to column values that are in VIVO and are
        #   not in the column values
        #       For each element in the sub set, remove the leaf and the intermediate
        #
        #   This framework should also handle single valued predicates, and cases where there are no step_uris.
        #   That is, it should handle everything.  All the code below should be replaced.

        step_uris = [
            o for s, p, o in
            get_step_triples(self.update_graph, uri, column_name,
                             column_def[0], self.query_parms, self.verbose)
        ]
        vivo_objs = {}
        for step_uri in step_uris:
            for s, p, o in get_step_triples(self.update_graph, step_uri,
                                            column_name, column_def[1],
                                            self.query_parms, self.verbose):
                vivo_objs[unicode(o)] = [o, step_uri]

        #   Nasty hack below.  The predicate property "single" appears to have two meanings.  One has to do
        #   with the semantic graph and one has to do with the cardinality of the data column.  These are not
        #   the same.  When the first step is multiple and the second single, the "second single" is not the
        #   cardinality of the data column.  The cardinality of the data column is the multiple if any of the
        #   predicates in the path are multiple.  Here we set the cardinality of the leaf to be used by
        #   prepare_column_values and then set it back.  Nasty.  Create a property for the leaf cardinality.

        predicate2_cardinality = column_def[1]['predicate']['single']
        if column_def[0]['predicate']['single'] == False:
            column_def[1]['predicate']['single'] = False
        column_values = prepare_column_values(data_update[column_name],
                                              self.intra, column_def[1],
                                              self.enum, row, column_name)
        column_def[1]['predicate']['single'] = predicate2_cardinality

        vivo_values = [vivo_objs[x][0] for x in vivo_objs.keys()]
        if unicode(column_values[0]).lower() == 'none':
            add_values = set()
            sub_values = set(vivo_values)
        else:
            add_values = set(column_values) - set(vivo_values)
            sub_values = set(vivo_values) - set(column_values)
        if self.verbose:
            print 'Two step SET COMPARE', '\n\tRow', row, '\n\tColumn', column_name, '\n\tSource', column_values, \
                '\n\tVIVO', vivo_values, '\n\tAdd:', add_values, '\n\tSub:', sub_values, '\n\tStep_uris', step_uris

        #   Process the adds

        if len(add_values) > 0:
            if column_def[0]['predicate']['single'] == False:

                #   Multiple intermediaries, single valued-leaves

                for leaf_value in add_values:
                    step_uri = URIRef(new_uri(self.query_parms))
                    self.update_graph.add(
                        (uri, step_def['predicate']['ref'], step_uri))
                    if 'type' in step_def['object']:
                        self.update_graph.add(
                            (step_uri, RDF.type, step_def['object']['type']))
                    if 'label' in step_def['object']:
                        self.update_graph.add(
                            (step_uri, RDFS.label,
                             Literal(step_def['object']['label'],
                                     datatype=step_def['object'].get(
                                         'datatype', None),
                                     lang=step_def['object'].get('lang',
                                                                 None))))
                    self.__do_the_update(row, column_name, step_uri,
                                         column_def[1], [leaf_value], {})
            else:

                #   Multiple values on the single leaf

                if len(step_uris) == 0:
                    step_uri = URIRef(new_uri(self.query_parms))
                    self.update_graph.add(
                        (uri, step_def['predicate']['ref'], step_uri))
                    if 'type' in step_def['object']:
                        self.update_graph.add(
                            (step_uri, RDF.type, step_def['object']['type']))
                    if 'label' in step_def['object']:
                        self.update_graph.add(
                            (step_uri, RDFS.label,
                             Literal(step_def['object']['label'],
                                     datatype=step_def['object'].get(
                                         'datatype', None),
                                     lang=step_def['object'].get('lang',
                                                                 None))))
                else:
                    step_uri = step_uris[0]
                self.__do_the_update(row, column_name, step_uri, column_def[1],
                                     column_values, {})

        #   Process the subs

        if len(sub_values) > 0:
            if column_def[0]['predicate']['single'] == False:

                #   Handle multiple intermediaries, single leaves, by removing each intermediary and all its
                #   assertions

                for leaf_value in sub_values:
                    step_uri = vivo_objs[unicode(leaf_value)][1]
                    self.update_graph.remove(
                        (uri, step_def['predicate']['ref'], step_uri))
                    self.update_graph.remove((step_uri, None, None))
            else:

                #   Handle single intermediary, possibly multiple leaves, by removing each leaf from the intermediary
                #   Then check to see if the intermediary has any remaining leaf assertions and remove if empty

                step_uri = vivo_objs[unicode(next(iter(sub_values)))][1]
                for leaf_value in sub_values:
                    self.update_graph.remove((step_uri, None, leaf_value))
                g = self.update_graph.triples(
                    (step_uri, column_def[1]['predicate']['ref'], None))
                if g == set():
                    self.update_graph.remove(
                        (uri, step_def['predicate']['ref'], step_uri))
                    self.update_graph.remove((step_uri, None, None))

        # if len(step_uris) == 0:
        #
        #     # VIVO has no values for intermediate, so add a new intermediate and __do_the_update on the leaf
        #
        #     step_uri = URIRef(new_uri(self.query_parms))
        #     self.update_graph.add((uri, step_def['predicate']['ref'], step_uri))
        #     self.update_graph.add((step_uri, RDF.type, step_def['object']['type']))
        #     if 'label' in step_def['object']:
        #         self.update_graph.add((step_uri, RDFS.label, Literal(step_def['object']['label'],
        #                                                      datatype=step_def['object'].get('datatype', None),
        #                                                      lang=step_def['object'].get('lang', None))))
        #     uri = step_uri
        #     step_def = column_def[1]
        #     vivo_objs = {unicode(o): o for s, p, o in
        #                  get_step_triples(self.update_graph, uri, column_name, step_def, self.query_parms,
        #                                   self.verbose)}
        #     column_values = prepare_column_values(data_update[column_name], self.intra, step_def, self.enum, row,
        #                                           column_name)
        #     self.__do_the_update(row, column_name, uri, step_def, column_values, vivo_objs)
        #
        # elif step_def['predicate']['single'] == True:
        #
        #     # VIVO has 1 or more values, so we need to see if the predicate is expected to be single
        #
        #     step_uri = step_uris[0]
        #     if len(step_uris) > 1:
        #         print "WARNING: Single predicate", column_name, "has", len(step_uris), "values: ", \
        #             step_uris, "using", step_uri
        #     uri = step_uri
        #     step_def = column_def[1]
        #     vivo_objs = {unicode(o): o for s, p, o in
        #                  get_step_triples(self.update_graph, uri, column_name, step_def, self.query_parms,
        #                                   self.verbose)}
        #     column_values = prepare_column_values(data_update[column_name], self.intra, step_def, self.enum, row,
        #                                           column_name)
        #     self.__do_the_update(row, column_name, uri, step_def, column_values, vivo_objs)
        #
        # else:
        #     print "WARNING: Updating multi-valued multi-step predicates such as ", column_name, " not yet implemented"
        return None
예제 #4
0
    def __do_update(self):
        """
        For each row, process each column.  Compare to data in VIVO.  Generate add and sub
        rdf as necessary to process requested add, change, delete
        """
        from rdflib import URIRef, RDF
        from vivopump import new_uri, prepare_column_values, get_step_triples, PathLengthException

        merges = {}

        for row, data_update in self.update_data.items():

            # Create a URI if empty

            if data_update['uri'].strip() == '':

                # If the source uri is empty, create one.  Remaining processing is unchanged.
                # Since the new uri does not have triples for the columns in the spreadsheet, each will be added

                uri_string = new_uri(self.query_parms)
                if self.verbose:
                    print "Adding an entity for row", row, ".  Will be added at", uri_string
                uri = URIRef(uri_string)
                self.update_graph.add(
                    (uri, RDF.type, self.update_def['entity_def']['type']))

            #   Create a URI entity if not found

            else:
                uri = URIRef(data_update['uri'].strip())
                if (uri, None, None) not in self.update_graph:
                    if self.verbose:
                        print "Adding an entity for row", row, ".  Will be added at", str(
                            uri)
                    self.update_graph.add(
                        (uri, RDF.type, self.update_def['entity_def']['type']))

            entity_uri = uri
            action = data_update.get('action', '').lower()

            # Process remove action if any

            if action == 'remove':
                self.__do_remove(row, uri)
                continue

            # Collect merge info if any

            if action != '':
                k = action.find('1')
                if k > -1:
                    key = action[0:k]
                    if key not in merges:
                        merges[key] = {}
                        merges[key]['primary'] = None
                        merges[key]['secondary'] = [uri]
                    else:
                        merges[key]['secondary'].append(uri)
                else:
                    if action not in merges:
                        merges[action] = {}
                    merges[action]['primary'] = uri
                    if 'secondary' not in merges[action]:
                        merges[action]['secondary'] = []

            #   For this row, process all the column_defs and then process closure defs if any.  Closures allow
            #   columns to be "reused" providing additional paths from the row entity to entities in the paths.

            for column_name, column_def in self.update_def['column_defs'].items() + \
                    self.update_def.get('closure_defs', {}).items():
                if column_name not in data_update:
                    continue  # extra column names are allowed in the spreadsheet for annotation
                uri = entity_uri

                if data_update[column_name] == '':
                    continue

                if len(column_def) > 3:
                    raise PathLengthException(
                        "ERROR: Path lengths > 3 not supported.  Path length for "
                        + column_name + " is " + str(len(column_def)))
                elif len(column_def) == 3:
                    self.__do_three_step_update(row, column_name, uri,
                                                column_def, data_update)
                elif len(column_def) == 2:
                    self.__do_two_step_update(row, column_name, uri,
                                              column_def, data_update)
                elif len(column_def) == 1:
                    step_def = column_def[0]
                    vivo_objs = {
                        unicode(o): o
                        for s, p, o in get_step_triples(
                            self.update_graph, uri, column_name, step_def,
                            self.query_parms, self.verbose)
                    }
                    column_values = prepare_column_values(
                        data_update[column_name], self.intra, step_def,
                        self.enum, row, column_name)
                    if self.verbose:
                        print row, column_name, column_values, uri, vivo_objs
                    self.__do_the_update(row, column_name, uri, step_def,
                                         column_values, vivo_objs)

        if any(merges):
            self.__do_merges(merges)

        # Return the add and sub graphs representing the changes that need to be made to the original

        add = self.update_graph - self.original_graph  # Triples in update that are not in original
        if self.verbose:
            print "Triples to add"
            print add.serialize(format='nt')
        sub = self.original_graph - self.update_graph  # Triples in original that are not in update
        if self.verbose:
            print "Triples to sub"
            print sub.serialize(format='nt')
        return [add, sub]
예제 #5
0
파일: pump.py 프로젝트: ctsit/vivo-pump
    def __do_two_step_update(self, row, column_name, uri, column_def, data_update):
        """
        In a two step update, identify intermediate entity that might need to be created, and end path objects that
        might not yet exist or might need to be created.  Cases are:

                              Predicate Single   Predicate Multiple
        VIVO has 0 values     Add, do_the        Add intermediate, do_the
        VIVO has 1 value         do_the          Set compare through intermediate
        VIVO has >1 value     WARNING, do_the    Set compare through intermediate

        :param: row: current row in spreadsheet
        :param: column_name: name of current column in spreadsheet
        :param: uri: uri in VIVO of the current entity
        :param: column_def: the column def for the current column
        :param: data_update: the column_value
        :return: alterations in update graph
        """
        from rdflib import RDF, RDFS, Literal, URIRef
        from vivopump import new_uri, get_step_triples, prepare_column_values

        step_def = column_def[0]

        #   Determine the add set (which intermediates point to column values that are not yet in VIVO
        #       For each element in the add set, construct the intermediate and call __do_the_update to
        #       construct the leaf
        #   Determine the sub set (which intermediates point to column values that are in VIVO and are
        #   not in the column values
        #       For each element in the sub set, remove the leaf and the intermediate
        #
        #   This framework should also handle single valued predicates, and cases where there are no step_uris.
        #   That is, it should handle everything.  All the code below should be replaced.

        step_uris = [o for s, p, o in
                     get_step_triples(self.update_graph, uri, column_def[0], self.query_parms)]
        vivo_objs = {}
        for step_uri in step_uris:
            for s, p, o in get_step_triples(self.update_graph, step_uri, column_def[1], self.query_parms):
                vivo_objs[unicode(o)] = [o, step_uri]

        #   Nasty hack below.  The predicate property "single" appears to have two meanings.  One has to do
        #   with the semantic graph and one has to do with the cardinality of the data column.  These are not
        #   the same.  When the first step is multiple and the second single, the "second single" is not the
        #   cardinality of the data column.  The cardinality of the data column is multiple if any of the
        #   predicates in the path are multiple.  Here we set the cardinality of the leaf to be used by
        #   prepare_column_values and then set it back.  Nasty.  Create a property for leaf cardinality.

        predicate2_cardinality = column_def[1]['predicate']['single']
        if column_def[0]['predicate']['single'] == False:
            column_def[1]['predicate']['single'] = False
        column_values = prepare_column_values(data_update[column_name], self.intra, column_def[1], self.enum, row,
                                              column_name)
        column_def[1]['predicate']['single'] = predicate2_cardinality

        vivo_values = [vivo_objs[x][0] for x in vivo_objs.keys()]
        if unicode(column_values[0]).lower() == 'none':
            add_values = set()
            sub_values = set(vivo_values)
        else:
            add_values = set(column_values) - set(vivo_values)
            sub_values = set(vivo_values) - set(column_values)
            logger.debug(u"Two step SET COMPARE\n\tRow {}\n\tColumn {}\n\tSource values {}\n\tVIVO values {}" +
                         "\n\tAdd values {}\n\tSub values {}\n\tStep_uris {}".
                         format(row, column_name, column_values, vivo_values, add_values, sub_values, step_uris))

        #   Process the adds

        if len(add_values) > 0:
            if column_def[0]['predicate']['single'] == False:

                #   Multiple intermediaries, single valued-leaves

                for leaf_value in add_values:
                    step_uri = URIRef(new_uri(self.query_parms))
                    self.update_graph.add((uri, step_def['predicate']['ref'], step_uri))
                    if 'type' in step_def['object']:
                        self.update_graph.add((step_uri, RDF.type, step_def['object']['type']))
                    if 'label' in step_def['object']:
                        self.update_graph.add((step_uri, RDFS.label, Literal(step_def['object']['label'],
                                                                             datatype=step_def['object'].get('datatype',
                                                                                                             None),
                                                                             lang=step_def['object'].get('lang',
                                                                                                         None))))
                    self.__do_the_update(row, column_name, step_uri, column_def[1], [leaf_value], {})
            else:

                #   Multiple values on the single leaf

                if len(step_uris) == 0:
                    step_uri = URIRef(new_uri(self.query_parms))
                    self.update_graph.add((uri, step_def['predicate']['ref'], step_uri))
                    if 'type' in step_def['object']:
                        self.update_graph.add((step_uri, RDF.type, step_def['object']['type']))
                    if 'label' in step_def['object']:
                        self.update_graph.add((step_uri, RDFS.label, Literal(step_def['object']['label'],
                                              datatype=step_def['object'].get('datatype', None),
                                              lang=step_def['object'].get('lang', None))))
                else:
                    step_uri = step_uris[0]
                self.__do_the_update(row, column_name, step_uri, column_def[1], column_values, {})

        #   Process the subs

        if len(sub_values) > 0:
            if column_def[0]['predicate']['single'] == False:

                #   Handle multiple intermediaries, single leaves, by removing each intermediary and all its
                #   assertions

                for leaf_value in sub_values:
                    step_uri = vivo_objs[unicode(leaf_value)][1]
                    self.update_graph.remove((uri, step_def['predicate']['ref'], step_uri))
                    self.update_graph.remove((step_uri, None, None))
            else:

                #   Handle single intermediary, possibly multiple leaves, by removing each leaf from the intermediary
                #   Then check to see if the intermediary has any remaining leaf assertions and remove if empty

                step_uri = vivo_objs[unicode(next(iter(sub_values)))][1]
                for leaf_value in sub_values:
                    self.update_graph.remove((step_uri, None, leaf_value))
                g = self.update_graph.triples((step_uri, column_def[1]['predicate']['ref'], None))
                if g == set():
                    self.update_graph.remove((uri, step_def['predicate']['ref'], step_uri))
                    self.update_graph.remove((step_uri, None, None))

        return None
예제 #6
0
파일: pump.py 프로젝트: ctsit/vivo-pump
    def __do_update(self):
        """
        For each row, process each column.  Compare to data in VIVO.  Generate add and sub
        rdf as necessary to process requested add, change, delete
        """
        from rdflib import URIRef, RDF
        from vivopump import new_uri, prepare_column_values, get_step_triples, PathLengthException

        merges = {}

        for row, data_update in self.update_data.items():

            # Create a URI if empty
            logger.debug("data_update[uri] = {}".format(data_update['uri']))

            if data_update['uri'].strip() == '':
                dict_is_empty = True

                for item in data_update.values():
                    if len(item) != 0:
                        dict_is_empty = False

                if dict_is_empty:
                    # skip blank lines in the input file
                    continue

                #   If the source uri is empty, create one.  Remaining processing is unchanged.
                #   Since the new uri does not have triples for the columns in the spreadsheet, each will be added

                uri_string = new_uri(self.query_parms)
                logger.debug(u"Adding an entity for row {}. Will be added at {}".format(row, uri_string))
                uri = URIRef(uri_string)
                self.update_graph.add((uri, RDF.type, self.update_def['entity_def']['type']))

            #   Create a URI entity if not found

            else:
                uri = URIRef(data_update['uri'].strip())
                if (uri, None, None) not in self.update_graph:
                    logger.debug(u"Adding an entity for row {}. Will be added at {}".format(row, str(uri)))
                    self.update_graph.add((uri, RDF.type, self.update_def['entity_def']['type']))

            entity_uri = uri
            action = data_update.get('action', '').lower()

            #   Process remove action if any

            if action == 'remove':
                self.__do_remove(row, uri)
                continue

            #   Collect merge info if any

            if action != '':
                k = action.find('1')
                if k > -1:
                    key = action[0:k]
                    if key not in merges:
                        merges[key] = {}
                        merges[key]['primary'] = None
                        merges[key]['secondary'] = [uri]
                    else:
                        merges[key]['secondary'].append(uri)
                else:
                    if action not in merges:
                        merges[action] = {}
                    merges[action]['primary'] = uri
                    if 'secondary' not in merges[action]:
                        merges[action]['secondary'] = []

            #   For this row, process all the column_defs and then process closure defs if any.  Closures allow
            #   columns to be "reused" providing additional paths from the row entity to entities in the paths.

            for column_name, column_def in self.update_def['column_defs'].items() + \
                    self.update_def.get('closure_defs', {}).items():
                if column_name not in data_update:
                    continue  # extra column names are allowed in the spreadsheet for annotation
                uri = entity_uri

                if data_update[column_name] == '':
                    logger.debug(u"Skipping blank value. row {} column {}".format(row, column_name))
                    continue

                column_def_len = len(column_def)
                logger.debug("column_def length is: {}".format(column_def_len))

                if column_def_len > 3:
                    raise PathLengthException(
                        "ERROR: Path lengths > 3 not supported.  Path length for {} is {}"
                        .format(column_name, column_def_len))
                elif column_def_len == 3:
                    self.__do_three_step_update(row, column_name, uri, column_def, data_update)
                elif column_def_len == 2:
                    self.__do_two_step_update(row, column_name, uri, column_def, data_update)
                elif column_def_len == 1:
                    step_def = column_def[0]
                    vivo_objs = {unicode(o): o for s, p, o in
                                 get_step_triples(self.update_graph, uri, step_def, self.query_parms)}
                    column_values = prepare_column_values(data_update[column_name], self.intra, step_def, self.enum,
                                                          row, column_name)
                    logger.debug(u"{} {} {} {} {}".format(row, column_name, column_values, uri, vivo_objs))
                    self.__do_the_update(row, column_name, uri, step_def, column_values, vivo_objs)

        if any(merges):
            self.__do_merges(merges)

        #   Return the add and sub graphs representing the changes that need to be made to the original

        add = self.update_graph - self.original_graph  # Triples in update that are not in original
        logger.info(u"Triples to add\n{}".format(add.serialize(format='nt')))
        sub = self.original_graph - self.update_graph  # Triples in original that are not in update
        logger.info(u"Triples to sub\n{}".format(sub.serialize(format='nt')))
        return [add, sub]