def standardLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0):

    """
    :param mappings         : dictionary of namespaces as keys and prefixes ad values.
    :param offset           : an integer to increment the counting of tghe links
    :param link_predicate   : a turtle representation of a URI (e.i: owl:sameAs).
    :param result_batch     : an iterable object with link results.
    :param clusters         : a dictionary proving the size of the clusters links.
    :return                 : Yields a string as set of triples.
    """

    buffer = Buffer()
    errors = ""

    def ns_modification(uri):

        for ns in mappings:
            if uri.startswith(ns):
                uri = uri.replace(ns, F"{mappings[ns]}:")
                break

        if uri.__contains__("://"):
            uri = F"<{uri}>"

        return uri

    for count, link in enumerate(result_batch):

        if True:

            # GET THE SOURCE AND TARGET URIS
            # src_data, trg_data = link['source'], link['target']
            src_data, trg_data = ns_modification(link['source']), ns_modification(link['target'])

            # GENERATION OF THE LINK
            if src_data and trg_data:

                # The RDFStar subject
                buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n"
                             F"{space}{src_data}    {Rsc.ga_resource_ttl(link_predicate)}    {trg_data} .\n")

                # STANDARD REIFICATION
                reification = F"{space}{src_data}    {Rsc.ga_resource_ttl(link_predicate)}    {trg_data} .\n"
                code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(reification)}")
                buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" 
                             F"\n{space}{code}\n" 
                             F"{space}{preVal('a', 'rdf:Statement')}" 
                             F"{space}{preVal('rdf:predicate', link_predicate)}" 
                             F"{space}{preVal('rdf:subject', F'{src_data}')}" 
                             F"{space}{preVal('rdf:object', F'{trg_data}')}")

                # ANNOTATION OF THE LINK USING THE REIFIED CODE
                for counter, (feature, value) in enumerate(link.items()):

                    end = ".\n" if counter == len(link) - 1 else ";"

                    cur_predicate = JSON_HEADERS.get(feature, None)

                    if cur_predicate:

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if cur_predicate == VoidPlus.has_validation_ttl:
                            small = link['source'] if link['source'] < link['target'] else link['target']
                            big = link['target'] if small == link['source'] else link['source']
                            # print(F"{small} {big} {link_predicate}")
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key) if key is not None else key

                        # APPENDING THE CLUSTER ID AS A RESOURCE
                        elif cur_predicate == VoidPlus.cluster_ID_ttl:
                            triple_value = Rsc.cluster_ttl(value) if value is not None else value
                            # triple_value = None

                        elif cur_predicate == VoidPlus.network_ID_ttl:
                            print("++++++++++++++++++>>>>>>>>>>")
                            triple_value = Literal(value).n3(MANAGER) if value is not None else value

                        # APPENDING ANYTHING ELSE
                        else:
                            if cur_predicate == VoidPlus.cluster_Int_ID_ttl:
                                triple_value = None

                            elif value is not None:
                                triple_value = Literal(round(float(value), 5)).n3(MANAGER) \
                                    if Grl.isDecimalLike(value) \
                                    else Literal(value).n3(MANAGER)
                            else:
                                triple_value = None

                        if triple_value is not None:
                            buffer.write(F"{space * 2}{cur_predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                yield buffer.getvalue()
                clearBuffer(buffer)
Пример #2
0
g.bind("qkg", QKG)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)
g.bind("owl", OWL)
g.bind("void", VOID)
g.bind("foaf", FOAF)
g.bind("foaf", FOAF)
g.bind("dcterms", DCTERMS)

quotekg_uri = URIRef(QKG) + "QuoteKG"

g.add((quotekg_uri, RDF.type, VOID.Dataset))
g.add((quotekg_uri, FOAF.homepage, URIRef("https://quotekg.l3s.uni-hannover.de/")))
g.add((quotekg_uri, VOID.sparqlEndpoint, URIRef("https://quotekg.l3s.uni-hannover.de/sparql")))

g.add((quotekg_uri, DCTERMS.title, Literal("QuoteKG")))
g.add((quotekg_uri, DCTERMS.description, Literal("QuoteKG is a multilingual knowledge graph of quotations from famous "
                                                 "people.", "en")))

g.add((quotekg_uri, DCTERMS.publisher, URIRef("https://www.l3s.de/home")))
g.add((quotekg_uri, DCTERMS.creator, URIRef("https://www.l3s.de/~kuculo/")))
g.add((quotekg_uri, DCTERMS.creator, URIRef("https://www.l3s.de/~gottschalk/")))

g.add((quotekg_uri, DCTERMS.created, Literal("2020-04-20", datatype=XSD.date)))
g.add((quotekg_uri, DCTERMS.modified, Literal("2020-12-07", datatype=XSD.date)))

g.add((quotekg_uri, DCTERMS.license, URIRef("https://creativecommons.org/licenses/by-sa/4.0/")))

languages = "it", "en", "pl", "ru", "cs", "fa", "de", "et", "pt", "fr", "uk", "es", "he", "sk", "tr", "bs", "ca", "eo", "fi", "az", "sl", "lt", "zh", "ar", "bg", "hy", "hr", "el", "su", "nn", "id", "sv", "li", "hu", "ko", "nl", "ja", "la", "ta", "sah", "sr", "gu", "gl", "ur", "te", "be", "cy", "no", "ml", "sq", "vi", "kn", "ro", "eu", "ku", "uz", "hi", "th", "ka", "da", "sa", "is"
for language in languages:
    g.add((quotekg_uri, DCTERMS.source, URIRef("https://" + language + ".wikiquote.org/")))
		person_uri = basis_uri + row['entityID']
		graph.add((URIRef(person_uri), RDF['type'], djo['Person']))
		if row['entityID'] in entities_links:
			if len(entities_links[row['entityID']]['VIAF']) > 0:
				graph.add((URIRef(person_uri), skos['closeMatch'], URIRef(entities_links[row['entityID']]['VIAF'][0])))
			if len(entities_links[row['entityID']]['wikidata']) > 0:
				graph.add((URIRef(person_uri), skos['closeMatch'], URIRef(entities_links[row['entityID']]['wikidata'][0])))
			if len(entities_links[row['entityID']]['Yiddish Leksikon']) > 0:
				graph.add((URIRef(person_uri), rdfs['seeAlso'], URIRef(entities_links[row['entityID']]['Yiddish Leksikon'][0])))
		if len(row['gender']) > 0:
			if row['gender'] == 'male':
				graph.add((URIRef(person_uri), schema['gender'], URIRef('https://schema.org/Male')))
			if row['gender'] == 'female':
				graph.add((URIRef(person_uri), schema['gender'], URIRef('https://schema.org/Female')))
		if len(row['birthDate']) > 0: 
			graph.add((URIRef(person_uri), schema['birthDate'], Literal(row['birthDate'])))
		if len(row['deathDate']) > 0: 
			graph.add((URIRef(person_uri), schema['deathDate'], Literal(row['deathDate'])))
		if len(row['activeDate']) > 0: 
			graph.add((URIRef(person_uri), dbo['activeYears'], Literal(row['activeDate'])))
		if len(row['language']) > 0: 
			languages = row['language'].split('; ')
			for language in languages:
				graph.add((URIRef(person_uri), rdaad['P50102'], Literal(language)))

		if len(row['birthPlaceKima']) > 0:
			graph.add((URIRef(person_uri), schema['birthPlace'], URIRef(row['birthPlaceKima'])))
		else:
			if len(row['birthPlace']) > 0:
				graph.add((URIRef(person_uri), schema['birthPlace'], Literal(row['birthPlace'])))
Пример #4
0
g = Graph()
nm = NamespaceManager(g)
schema = Namespace("http://schema.org/")
nm.bind("schema", schema)
wd = Namespace("http://www.wikidata.org/entity/")
nm.bind("wd", wd)
owl = Namespace("http://www.w3.org/2002/07/owl#")
nm.bind("owl", owl)
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
nm.bind("skos", skos)

KGC = URIRef("http://www.knowledgegraph.tech")
KGC2019 = URIRef("http://www.knowledgegraph.tech/conference-2019")
KGC2020 = URIRef(("http://www.knowledgegraph.tech/conference-2020"))
g.add((KGC, RDF.type, schema.EventSeries))
g.add((KGC, schema.name, Literal("Knowledge Graph Conference")))
g.add((KGC, OWL.sameAs, wd.Q86935657))
g.add((KGC2019, OWL.sameAs, wd.Q87486633))
g.add((KGC2020, OWL.sameAs, wd.Q76451254))
g.add((KGC2019, schema.name, Literal("Knowledge Graph Conference 2019")))
g.add((KGC2020, schema.name, Literal("Knowledge Graph Conference 2020")))
g.add((KGC2019, RDF.type, schema.Event))
g.add((KGC2020, RDF.type, schema.Event))
g.add((KGC2019, schema.superEvent, KGC))
g.add((KGC2020, schema.superEvent, KGC))
g.add((KGC2019, schema.eventAttendanceMode, schema.OfflineEventAttendanceMode))
g.add((KGC2020, schema.eventAttendanceMode, schema.OnlineEventAttendanceMode))

speaker_file = csv.DictReader(open('KGC-2020-Speakers-Recon.csv'))
for row in speaker_file:
Пример #5
0
    def visit_class(self, cls: ClassDefinition) -> bool:
        cls_uri = self._class_uri(cls.name)
        self.add_metadata(cls, cls_uri)
        self.graph.add((cls_uri, RDF.type, OWL.Class))
        self.graph.add((cls_uri, RDF.type,
                        self.metamodel.namespaces[METAMODEL_NAMESPACE_NAME][
                            camelcase('class definition')]))
        self._add_element_properties(cls_uri, cls)

        # Parent classes
        # TODO: reintroduce this
        # if not cls.defining_slots:
        if True:
            if cls.is_a:
                self.graph.add(
                    (cls_uri, RDFS.subClassOf, self._class_uri(cls.is_a)))
            if cls.mixin:
                self.graph.add(
                    (cls_uri, RDFS.subClassOf, METAMODEL_NAMESPACE.mixin))
            for mixin in sorted(cls.mixins):
                self.graph.add(
                    (cls_uri, RDFS.subClassOf, self._class_uri(mixin)))
            if cls.name in self.synopsis.applytorefs:
                for appl in sorted(
                        self.synopsis.applytorefs[cls.name].classrefs):
                    self.graph.add(
                        (cls_uri, RDFS.subClassOf, self._class_uri(appl)))
        else:
            raise NotImplementedError("Defining slots need to be implemented")
            # If defining slots, we generate an equivalentClass entry
            # equ_node = BNode()
            # self.graph.add((cls_uri, OWL.equivalentClass, equ_node))
            # self.graph.add((equ_node, RDF.type, OWL.Class))
            #
            # elts = []
            # if cls.is_a:
            #     elts.append(self._class_uri(cls.is_a))
            # if cls.mixin:
            #     self.graph.add((cls_uri, RDFS.subClassOf, META_NS.mixin))
            # for mixin in cls.mixins:
            #     self.graph.add((cls_uri, RDFS.subClassOf, self._class_uri(mixin)))
            # if cls.name in self.synopsis.applytorefs:
            #     for appl in self.synopsis.applytorefs[cls.name].classrefs:
            #         self.graph.add((cls_uri, RDFS.subClassOf, self._class_uri(appl)))
            #
            # for slotname in cls.defining_slots:
            #     restr_node = BNode()
            #     slot = self.schema.slots[slotname]
            #
            #     self.graph.add((restr_node, RDF.type, OWL.Restriction))
            #     self.graph.add((restr_node, OWL.onProperty, self._prop_uri(slotname)))
            #     self._add_cardinality(restr_node, slot)
            #     # TODO: fix this
            #     # self.graph.add((restr_node, OWL.someValuesFrom, self._build_range(slot)))
            #     elts.append(restr_node)
            #
            # coll_bnode = BNode()
            # Collection(self.graph, coll_bnode, elts)
            # self.graph.add((equ_node, OWL.intersectionOf, coll_bnode))

        # TODO: see whether unions belong
        # if cls.union_of:
        #     union_node = BNode()
        #     Collection(self.graph, union_coll, [self.class_uri(union_node) for union_node in cls.union_of])
        #     self.graph.add((union_node, OWL.unionOf, union_coll))
        #     self.graph.add((cls_uri, RDFS.subClassOf, union_node))

        for sn in sorted(self.own_slot_names(cls)):
            # Defining_slots are covered above
            if sn not in cls.defining_slots:
                slot = self.schema.slots[sn]
                # Non-inherited slots are annotation properties
                if self.is_slot_object_property(slot):
                    slot_node = BNode()
                    self.graph.add((cls_uri, RDFS.subClassOf, slot_node))

                    #         required multivalued
                    if slot.required:
                        if slot.multivalued:
                            #    y         y     intersectionOf(restriction(slot only type) restriction(slot some type)
                            restr1 = BNode()
                            self.graph.add((restr1, RDF.type, OWL.Restriction))
                            self.graph.add((restr1, OWL.allValuesFrom,
                                            self._range_uri(slot)))
                            self.graph.add(
                                (restr1, OWL.onProperty,
                                 self._prop_uri(self.aliased_slot_name(slot))))

                            restr2 = BNode()
                            self.graph.add((restr2, RDF.type, OWL.Restriction))
                            self.graph.add((restr2, OWL.someValuesFrom,
                                            self._range_uri(slot)))
                            self.graph.add(
                                (restr2, OWL.onProperty,
                                 self._prop_uri(self.aliased_slot_name(slot))))

                            coll_bnode = BNode()
                            Collection(self.graph, coll_bnode,
                                       [restr1, restr2])
                            self.graph.add(
                                (slot_node, OWL.intersectionOf, coll_bnode))
                            self.graph.add((slot_node, RDF.type, OWL.Class))
                        else:
                            #    y         n      restriction(slot exactly 1 type)
                            self.graph.add(
                                (slot_node, RDF.type, OWL.Restriction))
                            self.graph.add(
                                (slot_node, OWL.qualifiedCardinality,
                                 Literal(1)))
                            self.graph.add(
                                (slot_node, OWL.onProperty,
                                 self._prop_uri(self.aliased_slot_name(slot))))
                            self.graph.add((slot_node, OWL.onClass,
                                            self._range_uri(slot)))
                    else:
                        if slot.multivalued:
                            #    n         y      restriction(slot only type)
                            self.graph.add(
                                (slot_node, RDF.type, OWL.Restriction))
                            self.graph.add((slot_node, OWL.allValuesFrom,
                                            self._range_uri(slot)))
                            self.graph.add(
                                (slot_node, OWL.onProperty,
                                 self._prop_uri(self.aliased_slot_name(slot))))
                        else:
                            #    n         n      intersectionOf(restriction(slot only type) restriction(slot max 1 type))
                            self.graph.add(
                                (slot_node, RDF.type, OWL.Restriction))
                            self.graph.add((slot_node, OWL.onClass,
                                            self._range_uri(slot)))
                            self.graph.add(
                                (slot_node, OWL.maxQualifiedCardinality,
                                 Literal(1)))
                            self.graph.add(
                                (slot_node, OWL.onProperty,
                                 self._prop_uri(self.aliased_slot_name(slot))))

        return True
Пример #6
0
def main():
    parser = argparse.ArgumentParser(description='Feature extraction')
    parser.add_argument('-im',
                        '--im',
                        metavar='image',
                        nargs='+',
                        dest='im',
                        type=str,
                        required=True,
                        help='Images to calculate features on')
    parser.add_argument('-md',
                        '--md',
                        metavar='metadata',
                        dest='md',
                        type=str,
                        required=False,
                        nargs='+',
                        help='Clinical data on patient (DICOM)')
    parser.add_argument('-sem',
                        '--sem',
                        metavar='semantics',
                        dest='sem',
                        type=str,
                        required=False,
                        nargs='+',
                        help='Semantic Features')
    parser.add_argument('-seg',
                        '--seg',
                        metavar='segmentation',
                        dest='seg',
                        type=str,
                        required=True,
                        nargs='+',
                        help='Segmentation to calculate features on')
    parser.add_argument('-para',
                        '--para',
                        metavar='Parameters',
                        nargs='+',
                        dest='para',
                        type=str,
                        required=False,
                        help='Parameters')
    parser.add_argument('-out',
                        '--out',
                        metavar='Features',
                        dest='out',
                        type=str,
                        required=False,
                        help='Patient features output (HDF)')
    args = parser.parse_args()

    if type(args.im) is list:
        args.im = ''.join(args.im)

    if type(args.seg) is list:
        args.seg = ''.join(args.seg)

    if type(args.out) is list:
        args.out = ''.join(args.out)

    featureVector = CalcFeatures(image=args.im, mask=args.seg)

    if 'rdf' in args.out:
        # Write output to rdf
        # import rdflib and some namespace
        from rdflib import Graph, URIRef, BNode, Literal, Namespace
        from rdflib.namespace import RDF, FOAF

        # convert python object to RDF
        print "-----------------------------------------------------------"
        print "			RDF Output:"
        print ""
        Img = Graph()
        lung1_image = URIRef("http://example.org/CT-Image")
        Img.add((lung1_image, RDF.type, FOAF.Image))

        list_key = featureVector.keys()
        list_value = featureVector.values()
        for i in range(len(list_key)):
            tmp_value = Literal(list_value[i])
            tmp_name = list_key[i]
            Img.add((lung1_image, FOAF.tmp_name, tmp_value))

        print Img.serialize(format='turtle')
        # Create a rdf file for storing output
        Img.serialize(args.out, format="pretty-xml")

    elif 'hdf5' in args.out:
        # Write output to hdf5
        import numpy as np
        import pandas as pd

        # Assign features to corresponding groups
        shape_labels = list()
        shape_features = list()
        histogram_labels = list()
        histogram_features = list()
        GLCM_labels = list()
        GLCM_features = list()
        GLRLM_labels = list()
        GLRLM_features = list()
        GLSZM_labels = list()
        GLSZM_features = list()

        for featureName in featureVector.keys():
            if 'shape' in featureName:
                shape_labels.append(featureName)
                shape_features.append(featureVector[featureName])
            if 'firstorder' in featureName:
                histogram_labels.append(featureName)
                histogram_features.append(featureVector[featureName])
            if 'glcm' in featureName:
                GLCM_labels.append(featureName)
                GLCM_features.append(featureVector[featureName])
            if 'glrlm' in featureName:
                GLRLM_labels.append(featureName)
                GLRLM_features.append(featureVector[featureName])
            if 'glszm' in featureName:
                GLSZM_labels.append(featureName)
                GLSZM_features.append(featureVector[featureName])

        # Convert feature to single dictionary containing PD series
        features = dict()
        pandas_dict = dict(zip(shape_labels, shape_features))
        shape_dict = dict()
        shape_dict['all'] = pd.Series(pandas_dict)
        shape_features = pd.Series(shape_dict)
        features['shape_features'] = shape_features

        pandas_dict = dict(zip(histogram_labels, histogram_features))
        histogram_dict = dict()
        histogram_dict['all'] = pd.Series(pandas_dict)
        histogram_features = pd.Series(histogram_dict)
        features['histogram_features'] = histogram_features

        GLCM_dict = dict(zip(GLCM_labels, GLCM_features))
        GLRLM_dict = dict(zip(GLRLM_labels, GLRLM_features))
        GLSZM_dict = dict(zip(GLSZM_labels, GLSZM_features))

        texture_features = dict()
        texture_features['GLCM'] = pd.Series(GLCM_dict)
        texture_features['GLRLM'] = pd.Series(GLRLM_dict)
        texture_features['GLSZM'] = pd.Series(GLSZM_dict)

        texture_features = pd.Series(texture_features)
        features['texture_features'] = texture_features

        # We also return just the arrray
        image_feature_array = list()

        for _, feattype in features.iteritems():
            for _, imfeatures in feattype.iteritems():
                image_feature_array.extend(imfeatures.values)

        image_feature_array = np.asarray(image_feature_array)
        image_feature_array = image_feature_array.ravel()

        panda_labels = ['image_features', 'image_features_array']
        panda_data = pd.Series([features, image_feature_array],
                               index=panda_labels,
                               name='Image features')

        panda_data.to_hdf(args.out, 'image_features')
Пример #7
0
    def _build_model(self):
        inputNode = self.MODELS['inputRequirement1']
        self.graph.add((inputNode, self.RDF.type, self.PRIVVULNV2.Constraint))
        self.graph.add((inputNode, self.PRIVVULNV2.spatialRequirement,
                        self.__DOMAINNAMESPACE__.Room))
        self.graph.add((inputNode, self.PRIVVULNV2.TemporalResolution,
                        Literal("300", datatype=self.XSD.double)))
        self.graph.add((inputNode, self.PRIVVULN.feeds,
                        self.__DOMAINNAMESPACE__.DoorOpen))

        inputRequirement2 = self.MODELS['inputRequirement2']
        self.graph.add(
            (inputRequirement2, self.RDF.type, self.PRIVVULNV2.Require))
        self.graph.add((inputRequirement2, self.PRIVVULN.feeds,
                        self.__DOMAINNAMESPACE__.PIR))
        self.graph.add((inputRequirement2, self.PRIVVULNV2.spatialRequirement,
                        self.__DOMAINNAMESPACE__.Room))
        self.graph.add((inputRequirement2, self.PRIVVULNV2.TemporalResolution,
                        Literal("300", datatype=self.XSD.double)))

        doorOpenToPressens = self.MODELS['DoorOpenAndPIRToPresence']
        self.graph.add(
            (doorOpenToPressens, self.RDF.type, self.PRIVVULN.Transformation))
        self.graph.add((inputNode, self.PRIVVULN['feeds'], doorOpenToPressens))
        self.graph.add(
            (inputRequirement2, self.PRIVVULN['feeds'], doorOpenToPressens))

        timeResolutionLinear1 = self.MODELS['timeResolutionLinear1']
        self.graph.add((timeResolutionLinear1, self.RDF.type,
                        self.PRIVVULNV2.TimeResolutionLinear))
        self.graph.add((timeResolutionLinear1, self.PRIVVULNV2.TimeInput,
                        Literal("1", datatype=self.XSD.double)))
        self.graph.add((timeResolutionLinear1, self.PRIVVULNV2.TimeOutput,
                        Literal("1", datatype=self.XSD.double)))
        self.graph.add((inputNode, self.PRIVVULN.feeds, timeResolutionLinear1))

        timeResolutionLinear2 = self.MODELS['timeResolutionLinear2']
        self.graph.add((timeResolutionLinear2, self.RDF.type,
                        self.PRIVVULNV2.TimeResolutionLinear))
        self.graph.add((doorOpenToPressens, self.PRIVVULN.TimeFactor,
                        timeResolutionLinear2))
        self.graph.add((timeResolutionLinear2, self.PRIVVULNV2.TimeInput,
                        Literal("1", datatype=self.XSD.double)))
        self.graph.add((timeResolutionLinear2, self.PRIVVULNV2.TimeOutput,
                        Literal("1", datatype=self.XSD.double)))
        self.graph.add(
            (inputRequirement2, self.PRIVVULN.feeds, timeResolutionLinear2))

        spatialResolution1 = self.MODELS['SpatialResolution1']
        self.graph.add((spatialResolution1, self.RDF.type,
                        self.PRIVVULNV2.SpatialResolution))
        self.graph.add((spatialResolution1, self.PRIVVULNV2.spatialInput,
                        self.__DOMAINNAMESPACE__.Room))
        self.graph.add((spatialResolution1, self.PRIVVULNV2.spatialOutput,
                        self.__DOMAINNAMESPACE__.Floor))
        self.graph.add((inputNode, self.PRIVVULN.feeds, spatialResolution1))

        spatialResolution2 = self.MODELS['spatialResolution2']
        self.graph.add((spatialResolution2, self.RDF.type,
                        self.PRIVVULNV2.SpatialResolution))
        self.graph.add((spatialResolution2, self.PRIVVULNV2.spatialInput,
                        self.__DOMAINNAMESPACE__.Room))
        self.graph.add((spatialResolution2, self.PRIVVULNV2.spatialOutput,
                        self.__DOMAINNAMESPACE__.Floor))
        self.graph.add(
            (inputRequirement2, self.PRIVVULN.feeds, spatialResolution2))

        presenceStream = self.MODELS['PresenceStream']
        self.graph.add(
            (presenceStream, self.RDF.type, self.__DOMAINNAMESPACE__.Presence))
        self.graph.add(
            (doorOpenToPressens, self.PRIVVULN['feeds'], presenceStream))
        self.graph.add(
            (presenceStream, self.RDF.type, self.PRIVVULN.TimeSeries))
Пример #8
0
def encode(headache_csv, output_path='data/headache_KG.ttl'):
    # Initialize empty triple graph
    g = Graph()
    g.bind('chron', chronicals, override=True)

    for symptom in mappings.symptom_to_URI:
        query = mappings.symptom_to_URI[symptom].split('/')[-1].replace(
            '_', ' ')
        g.add((mappings.symptom_to_URI[symptom], RDF.type,
               concepts.symptom_type))
        #g.add( (mappings.symptom_to_URI[symptom], OWL.sameAs, URIRef(getDescriptionsByString(query, semTag='finding'))) )

    # For each diagnose, we create a entity
    for diagnose in mappings.diagnoses_to_URI:
        g.add((mappings.diagnoses_to_URI[diagnose], RDF.type,
               concepts.diagnose_type))
        #g.add( (mappings.diagnoses_to_URI[diagnose], OWL.sameAs, URIRef(getDescriptionsByString(mappings.diagnoses_to_URI[diagnose].split('/')[-1], semTag='disorder'))) )

    # Entities for pain characterisations
    for characterisation in mappings.characterisation_to_URI:
        g.add((mappings.characterisation_to_URI[characterisation], RDF.type,
               concepts.characterisation_type))

    # Entities for the location of the pain
    for location in mappings.location_to_URI:
        g.add((mappings.location_to_URI[location], RDF.type,
               concepts.location_type))

    # Entities for the pain severity
    for severity in mappings.severity_to_URI:
        g.add((mappings.severity_to_URI[severity], RDF.type,
               concepts.severity_type))

    duration_bounds = {
        'A': (0, 4),
        'B': (5, 119),
        'C': (120, 239),
        'D': (240, 899),
        'E': (900, 1799),
        'F': (1800, 10799),
        'G': (10800, 14399),
        'H': (14400, 259199),
        'I': (259200, 604799),
        'J': (604800, 'INF'),
    }

    # For each durationGroup, we add a node to our KG with edges to
    # the corresponding upper and lower bounds
    for duration in mappings.duration_to_URI:
        g.add((mappings.duration_to_URI[duration], concepts.lb_predicate,
               Literal(duration_bounds[duration][0])))
        g.add((mappings.duration_to_URI[duration], concepts.ub_predicate,
               Literal(duration_bounds[duration][1])))
        g.add((mappings.duration_to_URI[duration], RDF.type,
               concepts.duration_group_type))

    # Defining our predicates/properties
    g.add((concepts.diagnose_predicate, RDF.type, RDF.Property))
    g.add((concepts.prev_attacks_predicate, RDF.type, RDF.Property))
    g.add((concepts.characterisation_predicate, RDF.type, RDF.Property))
    g.add((concepts.intensity_predicate, RDF.type, RDF.Property))
    g.add((concepts.location_predicate, RDF.type, RDF.Property))
    g.add((concepts.duration_predicate, RDF.type, RDF.Property))

    # Read the CSV file and iterate over the rows
    for i, row in pd.read_csv(headache_csv).iterrows():

        symptoms = []
        for symptom in mappings.symptoms:
            if symptom in mappings.wrongly_written_symptoms:
                if row[mappings.wrongly_written_symptoms[symptom]] == 'yes':
                    symptoms.append(symptom)
            else:
                if row[symptom] == 'yes':
                    symptoms.append(symptom)

        add_sample(g, i, symptoms, row['CLASS'], row['previous_attacks'],
                   row['characterisation'], row['severity'], row['location'],
                   row['durationGroup'])

    g.serialize(destination=output_path, format='turtle')

    return g
Пример #9
0
## Setup an RDFLib graph

g = Graph()
g.namespace_manager = namespace_manager

## We need to store maintainer, uploader and translator details for later

maintainers = []

## Generate Debian triples

# TODO: There should be translations for the description here.

debian = URIRef('http://rdf.debian.net/debian')
g.add((debian, RDF.type, ADMSSW.SoftwareProject))
g.add((debian, DOAP.name, Literal('The Debian Project')))
g.add((
    debian, DOAP.description,
    Literal(
        'Debian is a free operating system ' +
        '(OS) for your computer. An operating system is the set of basic programs '
        +
        'and utilities that make your computer run. Debian provides more than a '
        +
        'pure OS: it comes with over 37500 packages, precompiled software bundled '
        + 'up in a nice format for easy installation on your machine.',
        lang="en")))
g.add((debian, DOAP.homepage, URIRef('http://www.debian.org/')))

## Generate all release triples
def validationGraphGenerator(validationset, linksetStats, auto_prefixes, setGraph, set_id, created, isLinkset: bool):

    # THE LAST STATUS MUST ALWAYS HAVE A VALUE DO THAT IT DETERMINES THE LAST TRIPLE
    predicate_map = {
        "Motivation": VoidPlus.motivation_ttl,
        "Status": VoidPlus.has_validation_status_ttl
    }

    if isLinkset is False:
        auto_prefixes[Rsc.lens] = "lens"

    if validationset:

        validationset_graph = F"{Rsc.validationset_ttl(Grl.deterministicHash(validationset))}-{set_id}"
        writer = Buffer()

        # ADDING THE CLUSTER NAMESPACE
        # auto_prefixes[Rsc.validationset] = "validationset"

        # APPENDING ALL NAMESPACES
        writer.write(
            linksetNamespaces(
                auto_prefixes,
                # isValidated=validationset and len(validationset['items']) > 0,
                isValidated=True,
                isClustered=Vars.clusters in linksetStats and linksetStats[Vars.clusters] > 0
            ))

        # VALIDATION METADATA
        writer.write(F'{header("LINK VALIDATION METADATA")}\n\n')
        writer.write(F"{validationset_graph}\n")
        writer.write(preVal('a', VoidPlus.Validationset_ttl))
        writer.write(preVal(VoidPlus.hasTarget_ttl, setGraph))
        if "creator" in validationset and len(validationset["creator"].strip()) > 0:
            writer.write(preVal(Sns.DCterms.creator_ttl, Literal(validationset["creator"]).n3()))
        if "publisher" in validationset and len(validationset["publisher"].strip()) > 0:
            writer.write(preVal(Sns.DCterms.publisher_ttl, Literal(validationset["publisher"]).n3()))

        # CREATED
        writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER)))

        # EXPORT TIMESTAMP
        writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp()))

        # VALIDATION STATS
        # THE TOTAL AMOUNT OF LINKS ACCEPTED
        writer.write(F"\n{space}### VOID+ VALIDATION STATS\n")
        if Vars.accepted in linksetStats and linksetStats[Vars.accepted] > -1:
            writer.write(preVal(VoidPlus.accepted_ttl, Rsc.literal_resource(linksetStats[Vars.accepted])))

        # THE TOTAL AMOUNT OF LINKS REJECTED
        if Vars.rejected in linksetStats and linksetStats[Vars.rejected] > -1:
            writer.write(preVal(VoidPlus.rejected_ttl, Rsc.literal_resource(linksetStats[Vars.rejected])))

        # THE TOTAL AMOUNT OF LINKS WITH AN UNCERTAIN VALIDATION FLAG
        if Vars.not_sure in linksetStats and linksetStats[Vars.not_sure] > -1:
            writer.write(preVal(VoidPlus.uncertain_ttl, Rsc.literal_resource(linksetStats[Vars.not_sure])))

        # THE TOTAL AMOUNT OF LINKS NOT VALIDATED
        if Vars.notValidated in linksetStats and linksetStats[Vars.notValidated] > -1:
            writer.write(
                preVal(VoidPlus.unchecked_ttl, Rsc.literal_resource(linksetStats[Vars.notValidated])))

        writer.write("\n")
        writer.write(preVal(Sns.DCterms.description_ttl, Rsc.literal_resource(validate.generic_desc), end=True))

        # VALIDATION TERMS
        writer.write(validate.terminology())

        # VALIDATIONSET
        writer.write(F'{header("VALIDATIONSET")}\n\n')
        writer.write(F"{validationset_graph}\n{{")

        # VALIDATIONS
        for key, validation in validationset['items'].items():
            # print(validation)
            writer.write(F'\n\t{Rsc.validation_ttl(key)}\n')
            writer.write(preVal('a', VoidPlus.Validation_ttl, position=2))

            for index, (val_header, value) in enumerate(predicate_map.items()):

                end = True if index == len(predicate_map) - 1 else False
                curr_feature = predicate_map.get(val_header, None)

                if curr_feature:

                    #  aACCEPTED | REJECTED | NOT-VALIDATED : UNSURE | MIXED
                    if curr_feature == VoidPlus.has_validation_status_ttl:
                        writer.write(preVal(VoidPlus.has_validation_status_ttl, validate.get_resource[validation[val_header]], end=end, position=2))

                    elif validation[val_header]:
                        writer.write(preVal(curr_feature, Literal(validation[val_header]).n3(MANAGER), end=end, position=2))

        writer.write("}")
        # print(writer.getvalue())
        return writer.getvalue()
Пример #11
0
pizza_doc = OntologyDocument()
pizza_doc.prefixes(PIZZA, dc=DC, terms=TERMS)
pizza = Ontology("http://www.co-ode.org/ontologies/pizza",
                 "http://www.co-ode.org/ontologies/pizza/2.0.0")
pizza_doc.ontology = pizza
pizza.annotation(DC.title, "pizza")
pizza.annotation(TERMS.contributor, "Alan Rector")
pizza.annotation(TERMS.contributor, "Matthew Horridge")
pizza.annotation(TERMS.contributor, "Chris Wroe")
pizza.annotation(TERMS.contributor, "Robert Stevens")
pizza.annotation(
    DC.description,
    AnnotationValue(
        Literal("""An ontology about pizzas and their toppings.

This is an example ontology that contains all constructs required for the various versions of the Pizza Tutorial
 run by Manchester University 
 (see http://owl.cs.manchester.ac.uk/publications/talks-and-tutorials/protg-owl-tutorial).""",
                lang="en")))
pizza.axioms.append(SubObjectPropertyOf(PIZZA.hasBase, PIZZA.hasIngredient))
pizza.axioms.append(InverseObjectProperties(PIZZA.hasBase, PIZZA.isBaseOf))
pizza.axioms.append(FunctionalObjectProperty(PIZZA.hasBase))
pizza.axioms.append(InverseFunctionalObjectProperty(PIZZA.hasBase))
pizza.axioms.append(ObjectPropertyDomain(PIZZA.hasBase, PIZZA.Pizza))
pizza.axioms.append(ObjectPropertyRange(PIZZA.hasBase, PIZZA.PizzaBase))

print(pizza_doc.to_functional().getvalue())

#%% md

## Ontologies in functional format can be read from files or URL's
def rdfStarLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0):

    errors = ""
    vars_size = 0
    buffer = Buffer()
    vars_dic = defaultdict(int)

    for count, row in enumerate(result_batch):

        if True:

            # THE FIRST LINE IS ASSUMED TO BE THE HEADER
            if count > 0 and len(row) > 1:

                # GET THE SOURCE AND TARGET URIS
                src_data, trg_data = row[0], row[1]

                # GENERATION OF THE LINK
                if src_data and trg_data:

                    # The RDFStar subject
                    buffer.write(F"{space}### LINK Nbr: {count + offset}\n"
                                 F"{space}<<<{src_data}>    {link_predicate}    <{trg_data}>>>\n"
                                 if len(vars_dic) > 0
                                 else F"{space}<{src_data}>    {link_predicate}    <{trg_data}> .\n")

                    # ANNOTATION OF THE LINK
                    # ll_val:has-link-validation               "not_validated" .
                    for counter, (predicate, index) in enumerate(vars_dic.items()):
                        end = ".\n" if counter == vars_size - 1 else ";"

                        # APPENDING THE CLUSTER SIZE
                        # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters:
                        #     buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}"
                        #                  F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n")

                        # APPENDING THE VALIDATION FLAG
                        # if predicate == VoidPlus.has_validation_flag_ttl:
                        #     triple_value = validate.get_resource[row[index]]

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if predicate == VoidPlus.has_validation_ttl:
                            small = src_data if src_data < trg_data else trg_data
                            big = trg_data if small == src_data else src_data
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key)
                            # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                        # APPENDING THE CLUSTER ID AS A RESOURCE
                        elif predicate == VoidPlus.cluster_ID_ttl:
                            cluster_id = int(row[index])
                            triple_value = Rsc.cluster_ttl(cluster_id)
                            # clusters[cluster_id]['item'].extend([src_data, trg_data])

                        # APPENDING ANYTHING ELSE
                        else:
                            triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \
                                if Grl.isDecimalLike(row[index]) \
                                else Literal(row[index]).n3(MANAGER)

                        buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                        # buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}"
                        #              F"{validate.get_resource[row[index]] if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n")

                    yield buffer.getvalue()
                    clearBuffer(buffer)

            else:

                # THE CSV HEADER
                # Star at position
                for column in range(2, len(row)):

                    if row[column] in CSV_HEADERS:
                        vars_dic[CSV_HEADERS[row[column]]] = column
                        vars_size += 1
def clusterGraphGenerator(clusters, stats, auto_prefixes, linksetGraph, created, linkset_id):

    node_count = 0
    validated = 0
    clusterset_graph = F"{Rsc.clusterset_ttl(Grl.deterministicHash(clusters))}-{linkset_id}"

    if clusters:

        # ADDING THE CLUSTER NAMESPACE
        # auto_prefixes[Rsc.clusterset] = "clusterset"

        writer = Buffer()
        predicate_map = {
            # SET OF NODES
            # "nodes": VoidPlus.size_ttl,
            "extended": VoidPlus.extended_ttl,
            "id": VoidPlus.intID_ttl,
            "hash_id": VoidPlus.hashID_ttl,
            # VALIDATIONS
            # "links": VoidPlus.links_ttl,
            "reconciled": VoidPlus.reconciled_ttl,
            "size": VoidPlus.size_ttl,
            "accepted": VoidPlus.accepted_ttl,
            "rejected": VoidPlus.rejected_ttl,
            "not_sure": VoidPlus.uncertain_ttl,
            "mixed": VoidPlus.contradictions_ttl,
            "not_validated": VoidPlus.unchecked_ttl,
            'network_id': VoidPlus.network_ID_ttl
        }

    # APPENDING ALL NAMESPACES
    writer.write(
        linksetNamespaces(
            auto_prefixes, isClustered=clusters and len(clusters) > 0,
            isValidated=(Vars.notValidated in stats and stats[Vars.notValidated] < stats[Vars.triples]) is True
        ))

    # THE CLUSTER METADATA
    writer.write(F'{header("RESOURCE PARTITIONING METADATA")}\n\n')
    writer.write(F"{clusterset_graph}\n")
    writer.write(preVal('a', VoidPlus.Clusterset_ttl))
    writer.write(preVal(VoidPlus.clusters_ttl, Literal(len(clusters)).n3(MANAGER)))
    writer.write(preVal(Sns.VoID.entities_ttl, "###NodeCounts"))
    writer.write(preVal(VoidPlus.validations_ttl, "###VALIDATED"))

    writer.write(preVal(VoidPlus.largestNodeCount_ttl, Rsc.literal_resource(stats['largest_size'])))
    writer.write(preVal(VoidPlus.largestLinkCount_ttl, Rsc.literal_resource(stats['largest_count'])))

    writer.write(preVal(VoidPlus.hasTarget_ttl, linksetGraph))
    writer.write(preVal(VoidPlus.method_ttl, Algorithm.simple_clustering_ttl))
    # EXPORT TIMESTAMP
    writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp()))
    # CREATED TIMESTAMP
    writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER), end=True))

    # DESCRIPTION OF THE CLUSTERING ALGORITHM
    writer.write(F'\n\n{Algorithm.simple_clustering_ttl}\n')
    writer.write(preVal('a', VoidPlus.ClusteringAlgorithm_ttl))
    writer.write(preVal(Sns.DCterms.description_ttl, Literal(Algorithm.simple_clustering_short_description).n3(MANAGER)))
    writer.write(preVal(Sns.RDFS.seeAlso_ttl, Rsc.ga_resource_ttl("https://doi.org/10.3233/SW-200410"), end=True))

    # THE PARTITION OF CO-REFERENT MATCHED RESOURCES
    writer.write(F'{header("ANNOTATED CO-REFERENT RESOURCES")}\n\n')
    writer.write(F"{clusterset_graph}\n{{\n")
    for cid, cluster_data in clusters.items():
        # print(cluster_data.keys())
        # exit()
        temp = Buffer()

        # A CLUSTER RESOURCE
        writer.write(F"\n\t{Rsc.cluster_ttl(cid)}\n")
        writer.write(preVal('a', VoidPlus.Cluster_ttl, position=2))

        for feature, value in cluster_data.items():

            # CLUSTERED RESOURCES
            if feature == 'nodes':

                if value:
                    nodes = set(value)
                    # temp.write(preVal(predicate_map[feature], Literal(len(nodes)).n3(MANAGER), position=2))
                    node_count += len(nodes)
                    temp.write(
                        preVal(
                            VoidPlus.hasItem_ttl,
                            F" ,\n{space*2}{' ' * Vars.PRED_SIZE}".join(Rsc.ga_resource_ttl(elt) for elt in nodes),
                            position=2
                        )
                    )

            # VALIDATION FLAGS
            elif feature == "links":

                if value and value['not_validated'] == 0:
                    validated += 1

                for flag, integer in value.items():
                    temp.write(
                        preVal(
                            predicate_map[flag],
                            Literal(integer).n3(MANAGER),
                            position=2
                        )
                    )

            elif feature in ["values"]:
                pass

            # ABOUT THE CLUSTER'S SIZE, Extension, Reconciliation, intID
            else:
                temp.write(preVal(predicate_map[feature], Literal(value).n3(MANAGER), position=2))

        writer.write(F"{temp.getvalue()[:-2]}.\n")

    # print(triples.getvalue())
    result = writer.getvalue().replace('###NodeCounts', Literal(node_count).n3(MANAGER))
    return F"{result.replace('###VALIDATED', Literal(validated).n3(MANAGER))}}}"
def standardLinkGenerator2(link_predicate: str, result_batch, namespace, clusters=None, offset=0):

    """
    :param offset           : an integer to increment the counting of tghe links
    :param link_predicate   : a turtle representation of a URI (e.i: owl:sameAs).
    :param namespace        : a dictionary for namespace
    :param result_batch     : an iterable object with link results.
    :param clusters         : a dictionary proving the size of the clusters links.
    :return                 : Yields a string as set of triples.
    """
    errors = ""
    vars_size = 0
    buffer = Buffer()
    vars_dic = defaultdict(int)

    for count, row in enumerate(result_batch):

        try:

            # THE FIRST LINE IS ASSUMED TO BE THE HEADER
            if count > 0 and len(row) > 1:

                # GET THE SOURCE AND TARGET URIS
                src_data, trg_data, predicate = uri2ttl(row[0], namespace)["short"], \
                                                uri2ttl(row[1], namespace)["short"], \
                                                uri2ttl(link_predicate, namespace)["short"]
                print(src_data)

                # GENERATION OF THE LINK
                if src_data and trg_data:

                    # The RDFStar subject
                    buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n"
                                 F"{space}{src_data}    {Rsc.ga_resource_ttl(predicate)}    {trg_data} .\n")

                    # STANDARD REIFICATION
                    link = F"{space}{src_data}    {Rsc.ga_resource_ttl(predicate)}    {trg_data} .\n"
                    code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}")
                    buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" 
                                 F"\n{space}{code}\n" 
                                 F"{space}{preVal('a', 'rdf:Statement')}" 
                                 F"{space}{preVal('rdf:predicate', predicate)}" 
                                 F"{space}{preVal('rdf:subject', F'{src_data}')}" 
                                 F"{space}{preVal('rdf:object', F'{trg_data}')}")

                    # ANNOTATION OF THE LINK USING THE REIFIED CODE
                    for counter, (predicate, index) in enumerate(vars_dic.items()):
                        end = ".\n" if counter == vars_size - 1 else ";"

                        # APPENDING THE CLUSTER SIZE
                        if clusters and predicate == VoidPlus.cluster_ID_ttl and row[index] in clusters:
                            buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}"
                                         F"{Literal(clusters[row[index]]).n3(MANAGER)} ;\n")

                        # APPENDING THE VALIDATION FLAG
                        if predicate == VoidPlus.has_validation_status_ttl:
                            triple_value = validate.get_resource[row[index]]

                        # APPENDING DING ANYTHING ELSE
                        else:
                            triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \
                                if Grl.isDecimalLike(row[index]) \
                                else Literal(row[index]).n3(MANAGER)

                        buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                    yield buffer.getvalue()
                    clearBuffer(buffer)

            else:

                # THE CSV HEADER
                # Star at position
                # MAPPING THE CSV HEADERS
                for column in range(2, len(row)):
                    header = row
                    if row[column] in CSV_HEADERS:
                        vars_dic[CSV_HEADERS[row[column]]] = column
                        vars_size += 1

        except Exception as err:
            errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}"
            print(errors)
Пример #15
0
def liftingtabs(mydb, cursor, cursorupdate):

    graph = Graph()  # graph for the dataset
    docgraph1 = Graph()  # graph for the documentation drawing
    docgraph2 = Graph()  # graph for the documentation drawing
    # add namespaces
    graph = apply_namespaces(graph)
    docgraph1 = apply_namespaces(docgraph1)
    docgraph2 = apply_namespaces(docgraph2)
    # get the ones we need here
    STCATH = get_namespace(graph, 'stcath')
    CRM = get_namespace(graph, 'crm')

    doci1 = 2  # msid with no liftingtabs
    doci2 = 1307  # msid with liftingtabs, pagemarker id: 23

    # deal with thesaurus concepts
    graph.add((URIRef("http://w3id.org/lob/concept/2833"), RDF.type,
               CRM["E55_Type"]))
    graph.add((URIRef("http://w3id.org/lob/concept/2833"), RDFS.label,
               Literal("board strap markers", lang="en")))
    graph.add((URIRef("http://w3id.org/lob/concept/1658"), RDF.type,
               CRM["E57_Material"]))
    graph.add((URIRef("http://w3id.org/lob/concept/1658"), RDFS.label,
               Literal("tanned skin", lang="en")))
    graph.add((URIRef("http://w3id.org/lob/concept/1197"), RDF.type,
               CRM["E57_Material"]))
    graph.add((URIRef("http://w3id.org/lob/concept/1197"), RDFS.label,
               Literal("tawed skin", lang="en")))
    graph.add((URIRef("http://w3id.org/lob/concept/5429"), RDF.type,
               CRM["E55_Type"]))
    graph.add((URIRef("http://w3id.org/lob/concept/5429"), RDFS.label,
               Literal("adhering", lang="en")))
    graph.add((URIRef("http://w3id.org/lob/concept/4045"), RDF.type,
               CRM["E55_Type"]))
    graph.add((URIRef("http://w3id.org/lob/concept/4045"), RDFS.label,
               Literal("nailing", lang="en")))
    graph.add((URIRef("http://stcath.overturnin"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.overturnin"), RDFS.label,
               Literal("over turn-in attaching", lang="en")))
    graph.add((URIRef("http://stcath.underturnin"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.underturnin"), RDFS.label,
               Literal("under turn-in attaching", lang="en")))
    graph.add((URIRef("http://stcath.brokenoff"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.brokenoff"), RDFS.label,
               Literal("broken off", lang="en")))
    graph.add(
        (URIRef("http://stcath.brokenandsewn"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.brokenandsewn"), RDFS.label,
               Literal("broken and sewn", lang="en")))
    graph.add((URIRef("http://stcath.missing"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.missing"), RDFS.label,
               Literal("missing", lang="en")))
    graph.add((URIRef("http://stcath.sound"), RDF.type, CRM["E55_Type"]))
    graph.add(
        (URIRef("http://stcath.sound"), RDFS.label, Literal("sound",
                                                            lang="en")))
    graph.add((URIRef("http://stcath.worn"), RDF.type, CRM["E55_Type"]))
    graph.add(
        (URIRef("http://stcath.worn"), RDFS.label, Literal("worn", lang="en")))
    graph.add((URIRef("http://stcath.detached"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.detached"), RDFS.label,
               Literal("detached", lang="en")))
    graph.add((URIRef("http://stcath.dangling"), RDF.type, CRM["E55_Type"]))
    graph.add((URIRef("http://stcath.dangling"), RDFS.label,
               Literal("dangling", lang="en")))

    docgraph1.add((URIRef("http://w3id.org/lob/concept/2833"), RDF.type,
                   CRM["E55_Type"]))
    docgraph1.add((URIRef("http://w3id.org/lob/concept/2833"), RDFS.label,
                   Literal("board strap markers", lang="en")))

    docgraph2.add((URIRef("http://w3id.org/lob/concept/2833"), RDF.type,
                   CRM["E55_Type"]))
    docgraph2.add((URIRef("http://w3id.org/lob/concept/2833"), RDFS.label,
                   Literal("board strap markers", lang="en")))
    docgraph2.add((URIRef("http://w3id.org/lob/concept/1658"), RDF.type,
                   CRM["E55_Type"]))
    docgraph2.add((URIRef("http://w3id.org/lob/concept/1658"), RDFS.label,
                   Literal("tanned skin", lang="en")))
    docgraph2.add((URIRef("http://w3id.org/lob/concept/5429"), RDF.type,
                   CRM["E55_Type"]))
    docgraph2.add((URIRef("http://w3id.org/lob/concept/5429"), RDFS.label,
                   Literal("adhering", lang="en")))
    docgraph2.add(
        (URIRef("http://stcath.underturnin"), RDF.type, CRM["E55_Type"]))
    docgraph2.add((URIRef("http://stcath.underturnin"), RDFS.label,
                   Literal("under turn-in attaching", lang="en")))
    docgraph2.add(
        (URIRef("http://stcath.brokenoff"), RDF.type, CRM["E55_Type"]))
    docgraph2.add((URIRef("http://stcath.brokenoff"), RDFS.label,
                   Literal("broken off", lang="en")))

    # 1_3_LiftingTabs
    cursor.execute(
        "SELECT mss.msuuid, mss.cataloguename, lt.msid, lt.yesnonk FROM MSs mss INNER JOIN `1_3_LiftingTabs` lt on mss.id=lt.msid"
    )
    rows = cursor.fetchall()

    for row in rows:
        msuuid = URIRef(row["msuuid"], str(STCATH))
        if row["yesnonk"] == "no":
            graph.add((msuuid,
                       CRM["NTP46_is_not_composed_of_physical_thing_of_type"],
                       URIRef("http://w3id.org/lob/concept/2833")))

        if row["msid"] == doci1:
            docgraph1.add(
                (msuuid,
                 CRM["NTP46_is_not_composed_of_physical_thing_of_type"],
                 URIRef("http://w3id.org/lob/concept/2833")))
            docgraph1.add((msuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph1.add(
                (msuuid, RDFS.label, Literal(row["cataloguename"], lang="en")))
        if row["msid"] == doci2:
            docgraph2.add((msuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph2.add(
                (msuuid, RDFS.label, Literal(row["cataloguename"], lang="en")))

    # LiftingTabs
    cursor.execute(
        "SELECT lt.id, mss.msuuid, mss.cataloguename, lt.msid, lt.partadditionuuid, lt.leftliftingtabuuid, lt.rightliftingtabuuid, lt.location, lt.material, lt.attachment, lt.turnin FROM MSs mss INNER JOIN `LiftingTabs` lt on mss.id=lt.msid"
    )
    rows = cursor.fetchall()

    for row in rows:
        shelfmark = row["cataloguename"]
        msuuid = URIRef(row["msuuid"], str(STCATH))
        # lifting tabs
        locations = row["location"].split(",")
        # TODO: deal with option "Centre, board"
        for location in locations:
            location = location.strip()
            if location == "Left board" or location == "Both boards" or location == "Right board":
                if location == "Left board" or location == "Both boards":
                    if row["leftliftingtabuuid"] is None:
                        newuuid = str(uuid.uuid4())
                        leftliftingtabuuid = URIRef(newuuid, str(STCATH))
                        # update the database
                        sql = "UPDATE LiftingTabs SET leftliftingtabuuid=%s WHERE id=%s"
                        val = (newuuid, row["id"])
                        cursorupdate.execute(sql, val)
                        mydb.commit()
                    else:
                        leftliftingtabuuid = URIRef(row["leftliftingtabuuid"],
                                                    str(STCATH))

                    graph.add((leftliftingtabuuid, RDF.type,
                               CRM["E22_Man-Made_Object"]))
                    graph.add((leftliftingtabuuid, CRM["P2_has_type"],
                               URIRef("http://w3id.org/lob/concept/2833")))
                    graph.add((leftliftingtabuuid, RDFS.label,
                               Literal("Left board marker of " + shelfmark,
                                       lang="en")))

                    if row["material"] == "Tanned leather":
                        graph.add((leftliftingtabuuid, CRM["P45_consists_of"],
                                   URIRef("http://w3id.org/lob/concept/1658")))
                    elif row["material"] == "Tawed leather":
                        graph.add((leftliftingtabuuid, CRM["P45_consists_of"],
                                   URIRef("http://w3id.org/lob/concept/1197")))

                if location == "Right board" or location == "Both boards":
                    if row["rightliftingtabuuid"] is None:
                        newuuid = str(uuid.uuid4())
                        rightliftingtabuuid = URIRef(newuuid, str(STCATH))
                        # update the database
                        sql = "UPDATE LiftingTabs SET rightliftingtabuuid=%s WHERE id=%s"
                        val = (newuuid, row["id"])
                        cursorupdate.execute(sql, val)
                        mydb.commit()
                    else:
                        rightliftingtabuuid = URIRef(
                            row["rightliftingtabuuid"], str(STCATH))

                    graph.add((rightliftingtabuuid, RDF.type,
                               CRM["E22_Man-Made_Object"]))
                    graph.add((rightliftingtabuuid, CRM["P2_has_type"],
                               URIRef("http://w3id.org/lob/concept/2833")))
                    graph.add((rightliftingtabuuid, RDFS.label,
                               Literal("Right board marker of " + shelfmark,
                                       lang="en")))

                    if row["material"] == "Tanned leather":
                        graph.add((rightliftingtabuuid, CRM["P45_consists_of"],
                                   URIRef("http://w3id.org/lob/concept/1658")))
                    elif row["material"] == "Tawed leather":
                        graph.add((rightliftingtabuuid, CRM["P45_consists_of"],
                                   URIRef("http://w3id.org/lob/concept/1197")))

                if row["partadditionuuid"] is None:
                    newuuid = str(uuid.uuid4())
                    partadditionuuid = URIRef(newuuid, str(STCATH))
                    # update the database
                    sql = "UPDATE LiftingTabs SET partadditionuuid=%s WHERE id=%s"
                    val = (newuuid, row["id"])
                    cursorupdate.execute(sql, val)
                    mydb.commit()
                else:
                    partadditionuuid = URIRef(row["partadditionuuid"],
                                              str(STCATH))

                graph.add(
                    (partadditionuuid, RDF.type, CRM["E79_Part_Addition"]))
                graph.add((partadditionuuid, RDFS.label,
                           Literal("Addition of board markers to " + shelfmark,
                                   lang="en")))
                graph.add((partadditionuuid, CRM["P110_augmented"], msuuid))

                if location == "Both boards" or location == "Left board":
                    graph.add((partadditionuuid, CRM["P111_added"],
                               leftliftingtabuuid))
                if location == "Both boards" or location == "Right board":
                    graph.add((partadditionuuid, CRM["P111_added"],
                               rightliftingtabuuid))
                if row['attachment'] == "Glued":
                    graph.add(
                        (partadditionuuid, CRM["P32_used_general_technique"],
                         URIRef("http://w3id.org/lob/concept/5429")))
                elif row['attachment'] == "Nailed":
                    graph.add(
                        (partadditionuuid, CRM["P32_used_general_technique"],
                         URIRef("http://w3id.org/lob/concept/4045")))
                if row['turnin'] == "Under turn-in":
                    graph.add(
                        (partadditionuuid, CRM["P32_used_general_technique"],
                         URIRef("http://stcath.underturnin")))
                elif row['turnin'] == "Over turn-in":
                    graph.add(
                        (partadditionuuid, CRM["P32_used_general_technique"],
                         URIRef("http://stcath.overturnin")))

        if location == "Both boards":
            # TODO: Mark the location of the lifting tabs when the board foredge place is available
            #graph.add((leftliftingtabuuid, CRM["P55_has_current_location"], ...))
            #graph.add((rightliftingtabuuid, CRM["P55_has_current_location"], ...))
            pass
        elif location == "Right board":
            # TODO: Mark the location of the lifting tabs when the board foredge place is available
            #graph.add((rightliftingtabuuid, CRM["P55_has_current_location"], ...))
            pass
        elif location == "Left board":
            # TODO: Mark the location of the lifting tabs when the board foredge place is available
            #graph.add((leftliftingtabuuid, CRM["P55_has_current_location"], ...))
            pass

        if row["msid"] == doci2:
            docgraph2.add(
                (leftliftingtabuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph2.add((leftliftingtabuuid, RDFS.label,
                           Literal("Left board marker of " + shelfmark,
                                   lang="en")))
            docgraph2.add((leftliftingtabuuid, CRM["P2_has_type"],
                           URIRef("http://w3id.org/lob/concept/2833")))
            docgraph2.add(
                (rightliftingtabuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph2.add((rightliftingtabuuid, RDFS.label,
                           Literal("Right board marker of " + shelfmark,
                                   lang="en")))
            docgraph2.add((rightliftingtabuuid, CRM["P2_has_type"],
                           URIRef("http://w3id.org/lob/concept/2833")))
            docgraph2.add(
                (partadditionuuid, RDF.type, CRM["E79_Part_Addition"]))
            docgraph2.add((partadditionuuid, RDFS.label,
                           Literal("Addition of board markers to " + shelfmark,
                                   lang="en")))
            docgraph2.add(
                (partadditionuuid, CRM["P111_added"], leftliftingtabuuid))
            docgraph2.add(
                (partadditionuuid, CRM["P111_added"], rightliftingtabuuid))
            docgraph2.add((partadditionuuid, CRM["P110_augmented"], msuuid))
            docgraph2.add(
                (leftliftingtabuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph2.add((leftliftingtabuuid, RDFS.label,
                           Literal("Left board marker of " + shelfmark,
                                   lang="en")))
            docgraph2.add(
                (rightliftingtabuuid, RDF.type, CRM["E22_Man-Made_Object"]))
            docgraph2.add((rightliftingtabuuid, RDFS.label,
                           Literal("Right board marker of " + shelfmark,
                                   lang="en")))
            docgraph2.add(
                (partadditionuuid, RDF.type, CRM["E79_Part_Addition"]))
            docgraph2.add((partadditionuuid, RDFS.label,
                           Literal("Addition of board markers to " + shelfmark,
                                   lang="en")))
            docgraph2.add(
                (partadditionuuid, CRM["P111_added"], leftliftingtabuuid))
            docgraph2.add(
                (partadditionuuid, CRM["P111_added"], rightliftingtabuuid))
            docgraph2.add((partadditionuuid, CRM["P110_augmented"], msuuid))
            docgraph2.add((leftliftingtabuuid, CRM["P45_consists_of"],
                           URIRef("http://w3id.org/lob/concept/1658")))
            docgraph2.add((rightliftingtabuuid, CRM["P45_consists_of"],
                           URIRef("http://w3id.org/lob/concept/1658")))
            docgraph2.add((partadditionuuid, CRM["P32_used_general_technique"],
                           URIRef("http://w3id.org/lob/concept/5429")))
            docgraph2.add((partadditionuuid, CRM["P32_used_general_technique"],
                           URIRef("http://stcath.underturnin")))

    # LiftingTabsCondition
    cursor.execute(
        "SELECT ltc.id, lt.msid, mss.msuuid, mss.cataloguename, lt.leftliftingtabuuid, lt.rightliftingtabuuid, ltc.condition, ltc.conditionuuid, ltc.leftboard FROM `LiftingTabsCondition` ltc LEFT JOIN `LiftingTabs` lt ON ltc.liftingtabid=lt.id INNER JOIN MSs mss ON mss.id=lt.msid"
    )
    rows = cursor.fetchall()

    for row in rows:
        shelfmark = row["cataloguename"]
        msuuid = URIRef(row["msuuid"], str(STCATH))
        if row['leftliftingtabuuid'] is not None:
            leftliftingtabuuid = URIRef(row["leftliftingtabuuid"], str(STCATH))
        # elif row['leftliftingtabuuid'] is None:
        #     if row["leftboard"] == 1: # we have a left lifting tab condition but no left lifting tab
        #         print(str(row["msid"]) + ", ")
        if row['rightliftingtabuuid'] is not None:
            rightliftingtabuuid = URIRef(row["rightliftingtabuuid"],
                                         str(STCATH))
        # elif row['rightliftingtabuuid'] is None:
        #     if row["leftboard"] == 0: # we have a right lifting tab condition but no right lifting tab
        #         print(str(row["msid"]) + ", ")
        if row["leftboard"] == 1:  # this is the left board
            if row["conditionuuid"] is None:
                newuuid = str(uuid.uuid4())
                conditionuuid = URIRef(newuuid, str(STCATH))
                # update the database
                sql = "UPDATE LiftingTabsCondition SET conditionuuid=%s WHERE id=%s"
                val = (newuuid, row["id"])
                cursorupdate.execute(sql, val)
                mydb.commit()
            else:
                conditionuuid = URIRef(row["conditionuuid"], str(STCATH))

            graph.add((conditionuuid, RDF.type, CRM["E3_Condition_State"]))
            graph.add(
                (conditionuuid, RDFS.label,
                 Literal("Condition of left board marker of " + shelfmark,
                         lang="en")))
            graph.add(
                (leftliftingtabuuid, CRM["P44_has_condition"], conditionuuid))

            if row["msid"] == doci2:
                docgraph2.add(
                    (conditionuuid, RDF.type, CRM["E3_Condition_State"]))
                docgraph2.add(
                    (conditionuuid, RDFS.label,
                     Literal("Condition of left board marker of " + shelfmark,
                             lang="en")))
                docgraph2.add((leftliftingtabuuid, CRM["P44_has_condition"],
                               conditionuuid))
                docgraph2.add((conditionuuid, CRM["P2_has_type"],
                               URIRef("http://stcath.brokenoff")))

        elif row["leftboard"] == 0:  # this is the right board
            if row["conditionuuid"] is None:
                newuuid = str(uuid.uuid4())
                conditionuuid = URIRef(newuuid, str(STCATH))
                # update the database
                sql = "UPDATE LiftingTabsCondition SET conditionuuid=%s WHERE id=%s"
                val = (newuuid, row["id"])
                cursorupdate.execute(sql, val)
                mydb.commit()
            else:
                conditionuuid = URIRef(row["conditionuuid"], str(STCATH))

            graph.add((conditionuuid, RDF.type, CRM["E3_Condition_State"]))
            graph.add(
                (conditionuuid, RDFS.label,
                 Literal("Condition of right board marker of " + shelfmark,
                         lang="en")))
            graph.add(
                (rightliftingtabuuid, CRM["P44_has_condition"], conditionuuid))

            if row["msid"] == doci2:
                docgraph2.add(
                    (conditionuuid, RDF.type, CRM["E3_Condition_State"]))
                docgraph2.add(
                    (conditionuuid, RDFS.label,
                     Literal("Condition of right board marker of " + shelfmark,
                             lang="en")))
                docgraph2.add((rightliftingtabuuid, CRM["P44_has_condition"],
                               conditionuuid))
                docgraph2.add((conditionuuid, CRM["P2_has_type"],
                               URIRef("http://stcath.brokenoff")))

        if row["condition"] == "Broken off":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.brokenoff")))
        elif row["condition"] == "Broken and Sewn":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.brokenandsewn")))
        elif row["condition"] == "Missing":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.missing")))
        elif row["condition"] == "Sound":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.sound")))
        elif row["condition"] == "Worn":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.worn")))
        elif row["condition"] == "Detached":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.detached")))
        elif row["condition"] == "Dangling":
            graph.add((conditionuuid, CRM["P2_has_type"],
                       URIRef("http://stcath.dangling")))

    # documentation drawing
    dot1 = visualise_graph(docgraph1, 'MS without board strap markers',
                           "forth")
    dot2 = visualise_graph(docgraph2, 'MS with board strap markers', "forth")
    dot1.render('liftingtabs/liftingtabs-1.gv', format='svg')
    dot2.render('liftingtabs/liftingtabs-2.gv', format='svg')

    # serialise the graph
    graph.serialize(destination='liftingtabs/liftingtabs.ttl',
                    format='turtle',
                    encoding="utf-8")
    docgraph1.serialize(destination='liftingtabs/liftingtabs-doc-1.n3',
                        format='n3',
                        encoding="utf-8")
    docgraph2.serialize(destination='liftingtabs/liftingtabs-doc-2.n3',
                        format='n3',
                        encoding="utf-8")
Пример #16
0
def populate_graph(articles, persons, relationships):
    """
    Adds triples to an RDF graph with the following structure:

        Person triples, we store only the wikidata id and the surface string(s) by which is referred
        to in the news articles;
            <wiki_URI, SKOS.altLabel, name>

        Article triples:
            <url, DC.title, title>
            <url, DC.data, date)

        Relationships as Blank Node:
            - <_rel, ns1.type, rel_type>
            - <_rel, ns1.score, rel_score>
            - <_rel, ns1.url, url>
            - g.add((_rel, ns1.ent1, URIRef(f"http://www.wikidata.org/entity/{rel.ent1}")))
            - g.add((_rel, ns1.ent2, URIRef(f"http://www.wikidata.org/entity/{rel.ent2}")))
            - g.add((_rel, ns1.ent1_str, Literal(rel.ent1_str)))
            - g.add((_rel, ns1.ent2_str, Literal(rel.ent2_str)))

    NOTE: linked-data vocabularies can be seen here: https://lov.linkeddata.es/dataset/lov/
    """
    g = Graph()
    ns1 = Namespace("http://www.politiquices.pt/")
    g.bind("politiquices", ns1)
    wiki_prop = Namespace("http://www.wikidata.org/prop/direct/")
    wiki_item = Namespace("http://www.wikidata.org/entity/")
    g.bind("wd", wiki_item)
    g.bind("wdt", wiki_prop)

    print("\nadding Persons")
    for wikidata_id, person in persons.items():

        # state that in 'politiquices' this is a human, following the same as wikidata.org
        g.add((URIRef(f"http://www.wikidata.org/entity/{wikidata_id}"),
               wiki_prop.P31, wiki_item.Q5))
        for name in person.known_as:
            g.add((
                URIRef(f"http://www.wikidata.org/entity/{wikidata_id}"),
                SKOS.altLabel,
                Literal(name, lang="pt"),
            ))

    print("adding Articles")
    for article in articles:
        g.add((URIRef(article.url), DC.title, Literal(article.title,
                                                      lang="pt")))
        g.add((URIRef(article.url), DC.date,
               Literal(article.crawled_date, datatype=XSD.date)))

    print("adding Relationships")
    for rel in relationships:
        _rel = BNode()
        g.add((_rel, ns1.type, Literal(rel.rel_type)))
        g.add((_rel, ns1.score, Literal(rel.rel_score, datatype=XSD.float)))
        g.add((_rel, ns1.url, URIRef(rel.url)))
        g.add((_rel, ns1.ent1,
               URIRef(f"http://www.wikidata.org/entity/{rel.ent1}")))
        g.add((_rel, ns1.ent2,
               URIRef(f"http://www.wikidata.org/entity/{rel.ent2}")))
        g.add((_rel, ns1.ent1_str, Literal(rel.ent1_str)))
        g.add((_rel, ns1.ent2_str, Literal(rel.ent2_str)))

    date_time = datetime.now().strftime("%Y-%m-%d_%H%M")
    f_name = f"politiquices_{date_time}.ttl"
    g.serialize(destination=f_name, format="turtle")
    print("graph has {} statements.".format(len(g)))
    print()
    print("persons      : ", len(persons))
    print("articles     : ", len(articles))
    print("relationships: ", len(relationships))
    print()
Пример #17
0
def replaced_by(uri, title):
    new_uri = lookup(title)
    if(new_uri != ''):
        yse_skos.remove((None, None, URIRef(uri)))
        yse_skos.add((URIRef(uri), dct.isReplacedBy, URIRef(new_uri)))
        yse_skos.add((URIRef(uri), OWL.deprecated, Literal('true',datatype=XSD.boolean)))
            program = str(row[4])
            student_enrolled_subject = row[5]
            student_enrolled_snumber = row[6]
            course_name = URIRef("http://focu.io/data#" +
                                 str(row[7]).replace(" ", "%"))
            student_grade = row[8]
            student_enrolled_term = row[9]
            b_node = BNode()
            print("http://focu.io/data#" +
                  quote(str(student_lastname + student_firstname)))
            student_node = URIRef(
                "http://focu.io/data#" +
                quote(str(student_lastname + student_firstname)))
            print(student_node)
            g.add((student_node, rdf.type, focu.Student))
            g.add((student_node, foaf.givenName, Literal(student_firstname)))
            g.add((student_node, foaf.familyName, Literal(student_lastname)))
            g.add((student_node, v.email, Literal(student_email)))
            g.add((student_node, focu.student_id, Literal(student_id)))
            g.add((b_node, foaf.name, course_name))
            g.add((b_node, focu.Course_number,
                   Literal(student_enrolled_snumber)))
            g.add((b_node, focu.Course_Subject,
                   Literal(student_enrolled_subject)))
            g.add((b_node, focu.grades_achieved, Literal(student_grade)))
            g.add((b_node, focu.enrolled_term, Literal(student_enrolled_term)))
            g.add((student_node, focu.student_enrolled, b_node))

g.serialize('NewBase.ttl', format='turtle')
for items in g:
    print(items)
Пример #19
0
def update_rdf_for_conversion(prefix, vocab_properties, rdf_vocab_properties):

    #(id, base, prefix) = get_vocab_base(vocabfile)
    html_vocab_properties = {}
    html_vocab_properties['format'] = 'text/html'
    html_vocab_properties['name'] = "%s.html" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    html_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])
    html_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], html_vocab_properties['name'])

    newrdf_vocab_properties = {}
    newrdf_vocab_properties['format'] = 'application/rdf+xml'
    newrdf_vocab_properties['name'] = "%s_modified.rdf" % os.path.splitext(
        rdf_vocab_properties['name'])[0]
    newrdf_vocab_properties['path'] = rdf_vocab_properties['path'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])
    newrdf_vocab_properties['uri'] = rdf_vocab_properties['uri'].replace(
        rdf_vocab_properties['name'], newrdf_vocab_properties['name'])

    graph = Graph()
    graph.parse(rdf_vocab_properties['path'])

    subject = None
    for s in graph.subjects(namespaces['rdf']['type'],
                            URIRef(namespaces['owl']['Ontology'])):
        subject = s

    #graph2 = Graph()
    graph_ns = []
    for nsprefix, nsurl in graph.namespaces():
        graph_ns.append(str(nsurl))
    for prefix, url in namespaces.iteritems():
        if not str(url) in graph_ns:
            graph.bind(prefix, URIRef(url))

    #properties = get_vocab_properties(prefix)
    #subject = None
    #for s in graph.subjects(namespaces['dc']['title'], None):
    #    subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['title'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dc']['creator'], None):
    #        subject = s
    #if not subject:
    #    for s in graph.subjects(namespaces['dcterms']['creator'], None):
    #        subject = s

    formatNode1 = BNode()
    formatNode2 = BNode()

    #Add vocabulary properties identifier and format
    graph.add((subject, namespaces['dc']['identifier'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['isVersionOf'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(rdf_vocab_properties['uri'])))
    graph.add((subject, namespaces['dcterms']['hasFormat'],
               URIRef(html_vocab_properties['uri'])))
    graph.add((subject, namespaces['vann']['preferredNamespaceUri'],
               URIRef(vocab_properties['preferredNamespaceUri'])))
    graph.add((subject, namespaces['vann']['preferredNamespacePrefix'],
               URIRef(vocab_properties['preferredNamespacePrefix'])))

    graph.add((URIRef(html_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(html_vocab_properties['uri']),
               namespaces['dc']['format'], formatNode1))
    graph.add((formatNode1, namespaces['rdf']['value'], Literal('text/html')))
    graph.add((formatNode1, namespaces['rdfs']['label'], Literal('HTML')))
    graph.add((formatNode1, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['rdf']['type'],
               URIRef(namespaces['dctype']['Text'])))
    graph.add((URIRef(rdf_vocab_properties['uri']), namespaces['dc']['format'],
               formatNode2))
    graph.add((formatNode2, namespaces['rdf']['value'],
               Literal('application/rdf+xml')))
    graph.add((formatNode2, namespaces['rdfs']['label'], Literal('RDF')))
    graph.add((formatNode2, namespaces['rdf']['type'],
               URIRef(namespaces['dcterms']['IMT'])))

    #Add rdfs:isDefinedBy for each class / property / term of the vocabulary
    #Find if schema is rdfs / owl. This defines the possible types (rdf:type) for each class / property / term
    #testo = vocab_type_definitions_test['rdfs']
    #subjects = []
    #subs = graph.subjects(namespaces['rdf']['type'], URIRef(testo))
    #for s in subs:
    #    subjects.append(s)
    #if subjects:
    #    objects = vocab_type_definitions_rdfs
    #else:
    #    objects = vocab_type_definitions_owl

    #For all subjects that are of the type found above, add rdfs:isDefinedBy
    #for o in objects:
    #    subs = graph.subjects(namespaces['rdf']['type'], o)
    #    for s in subs:
    #        graph.add((s, namespaces['rdfs']['isDefinedBy'], URIRef(vocab_properties['preferredNamespaceUri'])))

    list_of_terms = get_terms(rdf_vocab_properties['path'])
    for s in list_of_terms:
        graph.add((URIRef(s), namespaces['rdfs']['isDefinedBy'],
                   URIRef(vocab_properties['preferredNamespaceUri'])))

    rdf_str = None
    rdf_str = graph.serialize(format="pretty-xml")
    #f = codecs.open(newrdf_vocab_properties['path'], 'w', 'utf-8')
    f = codecs.open(newrdf_vocab_properties['path'], 'w')
    f.write(rdf_str)
    f.close()
    return (newrdf_vocab_properties, html_vocab_properties)
Пример #20
0
import json

import os
path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
data = json.load(open(path + "/DatiPerElaborazione/Pub.geojson", "r"))
g = Graph()

cmo = Namespace("http://www.comune.milano.it/ontology/")
schema = Namespace("https://schema.org/")
g.bind("cmo", cmo)
g.bind("schema", schema)

for element in data:
    uri = element["URI"]
    g.add([URIRef(uri), RDF.type, cmo.Pub])
    g.add([URIRef(uri), RDFS.label, Literal(element["nome"])])
    g.add([URIRef(uri), cmo.localBusinessWebsite, Literal(element["website"])])
    g.add([URIRef(uri), schema.email, Literal(element["email"])])
    g.add([URIRef(uri), cmo.localBusinessPostalCode, Literal(element["cap"])])
    g.add([URIRef(uri), schema.address, Literal(element["indirizzo"])])
    g.add([
        URIRef(uri), cmo.latitude,
        Literal(element["lat"], datatype=XSD.float)
    ])
    g.add([
        URIRef(uri), cmo.longitude,
        Literal(element["long"], datatype=XSD.float)
    ])

g.serialize(destination=path + '/Turtles/pub.ttl', format='turtle')
Пример #21
0
inputfile = sys.argv[1]

g = Graph()
g.parse(inputfile, format='turtle')

URIRE = re.compile(r'\[(http://www.yso.fi/onto/[a-z]+/p[0-9]+)\]')


def uri_to_link(lang, matchobj):
    uri = matchobj.group(1)
    concept = URIRef(uri)
    labels = g.preferredLabel(concept, lang)
    try:
        label = labels[0][1]
    except IndexError:
        return matchobj.group(0)  # don't change if we can't find a label
    return "<a href='%s'>%s</a>" % (uri, label)


NOTEPROPS = (SKOS.note, SKOS.scopeNote, SKOS.definition)
for prop in NOTEPROPS:
    for s, o in g.subject_objects(prop):
        lang = o.language
        if URIRE.search(o) is not None:
            new = URIRE.sub(lambda m: uri_to_link(lang, m), o)
            g.remove((s, prop, o))
            g.add((s, prop, Literal(new, lang)))

g.serialize(destination=sys.stdout, format='turtle')
Пример #22
0
def browser_cerca():
    """
    Permite la comunicacion con el agente via un navegador
    via un formulario
    """

    global product_list
    if request.method == 'GET':
        return render_template('cerca.html', products=None)
    elif request.method == 'POST':
        # Peticio de cerca
        if request.form['submit'] == 'Cerca':
            logger.info("Enviando peticion de busqueda")

            # Content of the message
            contentResult = ECSDI['Cerca_productes_' + str(get_count())]

            # Graph creation
            gr = Graph()
            gr.add((contentResult, RDF.type, ECSDI.Cerca_productes))

            # Add restriccio nom
            nom = request.form['nom']
            if nom:
                # Subject nom
                subject_nom = ECSDI['RestriccioNom' + str(get_count())]
                gr.add((subject_nom, RDF.type, ECSDI.RestriccioNom))
                gr.add(
                    (subject_nom, ECSDI.Nom, Literal(nom,
                                                     datatype=XSD.string)))
                # Add restriccio to content
                gr.add((contentResult, ECSDI.Restringe, URIRef(subject_nom)))
            marca = request.form['marca']
            if marca:
                subject_marca = ECSDI['Restriccion_Marca_' + str(get_count())]
                gr.add((subject_marca, RDF.type, ECSDI.Restriccion_Marca))
                gr.add((subject_marca, ECSDI.Marca,
                        Literal(marca, datatype=XSD.string)))
                gr.add((contentResult, ECSDI.Restringe, URIRef(subject_marca)))
            min_price = request.form['min_price']
            max_price = request.form['max_price']

            if min_price or max_price:
                subject_preus = ECSDI['Restriccion_Preus_' + str(get_count())]
                gr.add((subject_preus, RDF.type, ECSDI.Rango_precio))
                if min_price:
                    gr.add(
                        (subject_preus, ECSDI.Precio_min, Literal(min_price)))
                if max_price:
                    gr.add(
                        (subject_preus, ECSDI.Precio_max, Literal(max_price)))
                gr.add((contentResult, ECSDI.Restringe, URIRef(subject_preus)))

            seller = get_agent_info(agn.SellerAgent, DirectoryAgent,
                                    UserPersonalAgent, get_count())

            gr2 = send_message(
                build_message(gr,
                              perf=ACL.request,
                              sender=UserPersonalAgent.uri,
                              receiver=seller.uri,
                              msgcnt=get_count(),
                              content=contentResult), seller.address)

            index = 0
            subject_pos = {}
            product_list = []
            for s, p, o in gr2:
                if s not in subject_pos:
                    subject_pos[s] = index
                    product_list.append({})
                    index += 1
                if s in subject_pos:
                    subject_dict = product_list[subject_pos[s]]
                    if p == RDF.type:
                        subject_dict['url'] = s
                    elif p == ECSDI.Marca:
                        subject_dict['marca'] = o
                    elif p == ECSDI.Modelo:
                        subject_dict['modelo'] = o
                    elif p == ECSDI.Precio:
                        subject_dict['precio'] = o
                    elif p == ECSDI.Nombre:
                        subject_dict['nombre'] = o
                    elif p == ECSDI.Peso:
                        subject_dict['peso'] = o
                    product_list[subject_pos[s]] = subject_dict

            return render_template('cerca.html', products=product_list)

        # --------------------------------------------------------------------------------------------------------------

        # Peticio de compra
        elif request.form['submit'] == 'Comprar':
            products_checked = []
            for item in request.form.getlist("checkbox"):
                item_checked = []
                item_map = product_list[int(item)]
                item_checked.append(item_map['marca'])
                item_checked.append(item_map['modelo'])
                item_checked.append(item_map['nombre'])
                item_checked.append(item_map['precio'])
                item_checked.append(item_map['url'])
                item_checked.append(item_map['peso'])
                products_checked.append(item_checked)

            logger.info("Creando la peticion de compra")

            # Content of the message
            content = ECSDI['Peticion_compra_' + str(get_count())]

            # Graph creation
            gr = Graph()
            gr.add((content, RDF.type, ECSDI.Peticion_compra))

            # Asignar prioridad a la peticion (asignamos el contador de mensaje)
            gr.add((content, ECSDI.Prioridad,
                    Literal(get_count(), datatype=XSD.integer)))

            # Creacion de la ciudad (por ahora Barcelona) --------------------------------------------------------------
            subject_ciudad = ECSDI['Ciudad_' +
                                   str(random.randint(1, sys.float_info.max))]

            gr.add((subject_ciudad, RDF.type, ECSDI.Ciudad))
            gr.add((subject_ciudad, ECSDI.Nombre,
                    Literal(41.398373, datatype=XSD.float)))
            gr.add((subject_ciudad, ECSDI.Latitud,
                    Literal(2.188247, datatype=XSD.float)))
            gr.add((subject_ciudad, ECSDI.Longitud,
                    Literal('Barcelona', datatype=XSD.string)))

            # Creacion del sobre (Compra) ------------------------------------------------------------------------------
            subject_sobre = ECSDI['Compra_' +
                                  str(random.randint(1, sys.float_info.max))]
            gr.add((subject_sobre, RDF.type, ECSDI.Compra))

            gr.add((subject_sobre, ECSDI.Pagat, Literal(0,
                                                        datatype=XSD.integer)))
            gr.add((subject_sobre, ECSDI.Enviar_a, URIRef(subject_ciudad)))

            total_price = 0.0

            for item in products_checked:
                total_price += float(item[3])
                # Creacion del producto --------------------------------------------------------------------------------
                subject_producto = item[4]
                gr.add((subject_producto, RDF.type, ECSDI.Producto))
                gr.add((subject_producto, ECSDI.Marca,
                        Literal(item[0], datatype=XSD.string)))
                gr.add((subject_producto, ECSDI.Modelo,
                        Literal(item[1], datatype=XSD.string)))
                gr.add((subject_producto, ECSDI.Nombre,
                        Literal(item[2], datatype=XSD.string)))
                gr.add((subject_producto, ECSDI.Precio,
                        Literal(item[3], datatype=XSD.float)))
                gr.add((subject_producto, ECSDI.Peso,
                        Literal(item[5], datatype=XSD.float)))
                gr.add(
                    (subject_sobre, ECSDI.Productos, URIRef(subject_producto)))

            gr.add((subject_sobre, ECSDI.Precio_total,
                    Literal(total_price, datatype=XSD.float)))

            gr.add((content, ECSDI.Sobre, URIRef(subject_sobre)))

            seller = get_agent_info(agn.SellerAgent, DirectoryAgent,
                                    UserPersonalAgent, get_count())

            answer = send_message(
                build_message(gr,
                              perf=ACL.request,
                              sender=UserPersonalAgent.uri,
                              receiver=seller.uri,
                              msgcnt=get_count(),
                              content=content), seller.address)

            products_matrix = []
            for item in answer.subjects(RDF.type, ECSDI.Producto):
                product = [
                    answer.value(subject=item, predicate=ECSDI.Marca),
                    answer.value(subject=item, predicate=ECSDI.Modelo),
                    answer.value(subject=item, predicate=ECSDI.Nombre),
                    answer.value(subject=item, predicate=ECSDI.Precio)
                ]
                products_matrix.append(product)

            return render_template('endSell.html', products=products_matrix)
Пример #23
0
def main(argv):
    workdir="/data/"
    inputfile='input2.nq'
    outputfile="output2.nq"

    try:
      opts, args = getopt.getopt(argv,"hi:o:",["inputfile="])
    except getopt.GetoptError:
      print ('test.py --inputfile <inputfile>')
      sys.exit(2)
    for opt, arg in opts:
      if opt in ("-i", "--inputfile"):
         inputfile = arg
    # print ('Input file is "', inputfile)
    inputdata=inputfile.split('.')
    # TODO: change all this to take the absolute full path as arg (e.g.: /data/input.nq)
    input_full_path=workdir + inputfile

    data=inputdata[0]
    datatype=inputdata[1]
    if(datatype == "nq"):
        g = ConjunctiveGraph(identifier="http://kraken/graph/data/"+ data)
        g.default_context.parse(input_full_path, format='nquads')
    else:
        g = Graph() #for n3
        if datatype == "nt":
            g.default_context.parse(input_full_path,format='nt')
        elif datatype == "ttl":
            g.default_context.parse(input_full_path,format='n3')

    patternstring1 = re.compile("^([A-Z]|[a-z]+)+$")
    patternstring = re.compile("\w+")
    patterndatey = re.compile("^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$")
    patterndatem = re.compile("^(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)\d\d$")
    patterndated = re.compile("^(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.](19|20)\d\d$")
    patternfloat=re.compile("^[-+]?[0-9]*\.[0-9]+$")
    for s, p, o in g:
        if patternstring.match(o) != None and len(o)>=2 and "symbol" not in str(p): #gene symbols are detected as lang
            # print(s,p,o)
            # print(detect(o))#works but string needs to be at least 3 characters- choice between string and lang
            if "name" in str(p):
                g.remove((s, p, o))
                # g.add((s, p, Literal(o, lang=detect(o)))) #works but string needs to be at least 3 charachters- choice between string and lang
                g.add((s, p, Literal(o, datatype=XSD.string)))
            elif patterndatey.match(o) != None or patterndated.match(o) != None or patterndatem.match(o)!= None:
                # print(o)
                g.remove((s, p, o))
                g.add((s, p,Literal(o, datatype=XSD.date)))
            elif re.search('true', o, re.IGNORECASE) or re.search('false', o, re.IGNORECASE):
                # print(o)
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.boolean)))
            elif patternfloat.match(o) != None:
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.float)))
            elif patternstring1.match(o) != None:
                g.remove((s, p, o))
                g.add((s, p, Literal(o, datatype=XSD.string)))
    if(datatype == "nq"):
        g.serialize(destination=workdir + outputfile, format='nquads')
    elif datatype == "nt":
        g.serialize(destination=workdir + outputfile, format='nt')
    elif datatype == "ttl":
        g.default_context.parse(workdir + outputfile,format='n3')
Пример #24
0
    def write_skos(self, directory):

        # parse the original v3 graph
        v3graph = RDFGraph()
        v3graph.parse(data=self.v3_skos)

        # create the namespace manager
        namespaces = (
            ("arches", ARCHES),
            ("skos", SKOS),
            ("dcterms", DCTERMS)
        )
        nsmanager = NamespaceManager(RDFGraph())
        for ns in namespaces:
            nsmanager.bind(ns[0], ns[1])

        # create the output graphs with the new namespace manager
        v4thesaurus = RDFGraph(namespace_manager=nsmanager)
        v4collections = RDFGraph(namespace_manager=nsmanager)

        # add the concept schemes to the thesaurus
        concept_schemes = [i for i in v3graph.triples((None, RDF.type, SKOS['ConceptScheme']))]
        for cs in concept_schemes:
            v4thesaurus.add(cs)

        # iterate the concepts and make collections for them.
        topconcepts = [i for i in v3graph.triples((None, SKOS['hasTopConcept'], None))]
        for tc in topconcepts:

            # get the top concept name and if convert it to a Literal object
            tc_name_literal = v3graph.value(subject=tc[2], predicate=SKOS['prefLabel'])

            # get the value from the JSON formatted Literal content
            # if the Literal content is NOT JSON, then this reference data was
            # exported from v3 with the wrong command and will not work.
            try:
                tc_name = json.loads(tc_name_literal.value)['value']
                collection_id = self.new_or_existing_uuid(tc_name)
            except ValueError:
                docs = "https://arches.readthedocs.io/en/stable/v3-to-v4-migration/"
                print("ERROR: Incompatible SKOS. See {} for more information.".format(docs))
                exit()

            if self.verbose:
                children = [i for i in v3graph.triples((tc[2], SKOS['narrower'], None))]
                print("{}: {} immediate child concepts".format(tc_name, len(children)))
                print("    collection uuid: "+collection_id)

            # create a new collection for each top concept
            v4thesaurus.add(tc)
            v4collections.add((ARCHES[collection_id], RDF.type, SKOS['Collection']))

            # add the preflabel for the collection, if it's not the r2r types collection
            # which already has a label in Arches by default.
            if tc_name != "Resource To Resource Relationship Types":
                simple_tc_name = Literal(tc_name, lang="en-US")
                v4collections.add((ARCHES[collection_id], SKOS['prefLabel'], simple_tc_name))

            # recursively add all of the concept children to the collection for this
            # top concept.
            v4collections = self.add_children_to_collection(v3graph, v4collections,
                                                            collection_id, tc[2])

        # add ALL concepts from the v3 graph to the thesaurus. this pulls along all
        # child/parent relationships into the thesaurus, as well as all extra info
        # for each concept, like sortorder, prefLabel, etc.
        for concept in v3graph.triples((None, RDF.type, SKOS['Concept'])):
            v4thesaurus.add(concept)

            # this is the extra info related to each concept, like prefLabel, sortorder, etc.
            for s, p, o in v3graph.triples((concept[0], None, None)):
                # skip the label of the resource to resource relationship type concept
                # as it's already in Arches and this would duplicate it.
                if s.endswith("000004") and p == SKOS['prefLabel']:
                    continue
                v4thesaurus.add((s, p, o))

        # export the thesaurus and collections to predetermined locations within the
        # package file structure.
        thesaurus_file = os.path.join(directory, 'concepts', 'thesaurus.xml')
        if self.verbose:
            print("writing thesaurus to: "+thesaurus_file)
        v4thesaurus.serialize(destination=thesaurus_file, format="pretty-xml")

        collections_file = os.path.join(directory, 'collections', 'collections.xml')
        if self.verbose:
            print("writing collections to: "+collections_file)
        v4collections.serialize(destination=collections_file, format="pretty-xml")
Пример #25
0
def check_ro(base_path, nested=False):
    manifest_file = os.path.join(base_path, "metadata", "manifest.json")
    assert os.path.isfile(manifest_file), "Can't find " + manifest_file
    arcp_root = find_arcp(base_path)
    base = urllib.parse.urljoin(arcp_root, "metadata/manifest.json")
    g = Graph()

    # Avoid resolving JSON-LD context https://w3id.org/bundle/context
    # so this test works offline
    context = Path(get_data("tests/bundle-context.jsonld")).as_uri()
    with open(manifest_file, "r", encoding="UTF-8") as f:
        jsonld = f.read()
        # replace with file:/// URI
        jsonld = jsonld.replace("https://w3id.org/bundle/context", context)
    g.parse(data=jsonld, format="json-ld", publicID=base)
    if os.environ.get("DEBUG"):
        print("Parsed manifest:\n\n")
        g.serialize(sys.stdout, format="ttl")
    ro = None

    for ro in g.subjects(ORE.isDescribedBy, URIRef(base)):
        break
    assert ro is not None, "Can't find RO with ore:isDescribedBy"

    profile = None
    for dc in g.objects(ro, DCTERMS.conformsTo):
        profile = dc
        break
    assert profile is not None, "Can't find profile with dct:conformsTo"
    assert profile == URIRef(provenance.CWLPROV_VERSION),\
        "Unexpected cwlprov version " + profile

    paths = []
    externals = []
    for aggregate in g.objects(ro, ORE.aggregates):
        if not arcp.is_arcp_uri(aggregate):
            externals.append(aggregate)
            # Won't check external URIs existence here
            # TODO: Check they are not relative!
            continue
        lfile = _arcp2file(base_path, aggregate)
        paths.append(os.path.relpath(lfile, base_path))
        assert os.path.isfile(lfile), "Can't find aggregated " + lfile

    assert paths, "Didn't find any arcp aggregates"
    assert externals, "Didn't find any data URIs"

    for ext in ["provn", "xml", "json", "jsonld", "nt", "ttl"]:
        f = "metadata/provenance/primary.cwlprov.%s" % ext
        assert f in paths, "provenance file missing " + f

    for f in [
            "workflow/primary-job.json", "workflow/packed.cwl",
            "workflow/primary-output.json"
    ]:
        assert f in paths, "workflow file missing " + f
    # Can't test snapshot/ files directly as their name varies

    # TODO: check urn:hash::sha1 thingies
    # TODO: Check OA annotations

    packed = urllib.parse.urljoin(arcp_root, "/workflow/packed.cwl")
    primary_job = urllib.parse.urljoin(arcp_root, "/workflow/primary-job.json")
    primary_prov_nt = urllib.parse.urljoin(
        arcp_root, "/metadata/provenance/primary.cwlprov.nt")
    uuid = arcp.parse_arcp(arcp_root).uuid

    highlights = set(g.subjects(OA.motivatedBy, OA.highlighting))
    assert highlights, "Didn't find highlights"
    for h in highlights:
        assert (h, OA.hasTarget, URIRef(packed)) in g

    describes = set(g.subjects(OA.motivatedBy, OA.describing))
    for d in describes:
        assert (d, OA.hasBody, URIRef(arcp_root)) in g
        assert (d, OA.hasTarget, URIRef(uuid.urn)) in g

    linked = set(g.subjects(OA.motivatedBy, OA.linking))
    for l in linked:
        assert (l, OA.hasBody, URIRef(packed)) in g
        assert (l, OA.hasBody, URIRef(primary_job)) in g
        assert (l, OA.hasTarget, URIRef(uuid.urn)) in g

    has_provenance = set(g.subjects(OA.hasBody, URIRef(primary_prov_nt)))
    for p in has_provenance:
        assert (p, OA.hasTarget, URIRef(uuid.urn)) in g
        assert (p, OA.motivatedBy, PROV.has_provenance) in g
        # Check all prov elements are listed
        formats = set()
        for prov in g.objects(p, OA.hasBody):
            assert (prov, DCTERMS.conformsTo,
                    URIRef(provenance.CWLPROV_VERSION)) in g
            # NOTE: DC.format is a Namespace method and does not resolve like other terms
            formats.update(set(g.objects(prov, DC["format"])))
        assert formats, "Could not find media types"
        expected = set(
            Literal(f)
            for f in ("application/json", "application/ld+json",
                      "application/n-triples",
                      'text/provenance-notation; charset="UTF-8"',
                      'text/turtle; charset="UTF-8"', "application/xml"))
        assert formats == expected, "Did not match expected PROV media types"

    if nested:
        # Check for additional PROVs
        # Let's try to find the other wf run ID
        otherRuns = set()
        for p in g.subjects(OA.motivatedBy, PROV.has_provenance):
            if (p, OA.hasTarget, URIRef(uuid.urn)) in g:
                continue
            otherRuns.update(set(g.objects(p, OA.hasTarget)))
        assert otherRuns, "Could not find nested workflow run prov annotations"
Пример #26
0
def date_lit(value):
    return Literal(value, datatype=XSD.dateTime)
Пример #27
0
    same_as = person.get('sameAs')
    if same_as is not None:
        same_as = same_as.split()
        i = 0
        while i < len(same_as):
            same_as_uri = URIRef(same_as[i])
            g.add((person_uri, OWL.sameAs, same_as_uri))
            i += 1

    # person name
    persname = person.find('./tei:persName', tei)
    if persname is not None:
        label = persname.text
        label_lang = persname.get('{http://www.w3.org/XML/1998/namespace}lang')
        if label_lang is not None:
            g.add((person_uri, RDFS.label, Literal(label, lang=label_lang)))
        else:
            g.add((person_uri, RDFS.label, Literal(label)))

    # person type
    listperson = person.find('./...', tei)
    perstype = listperson.get('type')
    perscorr = listperson.get('corresp')
    if perstype is not None:
        g.add((person_uri, DCTERMS.description, Literal(perstype)))
    if perscorr is not None and perscorr.startswith('http'):
        g.add((person_uri, DCTERMS.subject, URIRef(perscorr)))

    # value

    value = etree.tostring(person, pretty_print=True, method="xml")
Пример #28
0
        regCode = str(community[2]) + str(community[3]) + str(community[4])
        county = URIRef("countyCode:" + regCode)
        if community[3] is not None:
            #if the entry contains a community, add its zipcode
            if community[14] is not None:
                zipCode = URIRef("zipCode:" + str(community[14]))
                g.add((zipCode, RDF.type, typePLZ))
                g.add((zipCode, isInCounty, county))
    # if the entry contains the name of the county
    elif len(community) == 8:
        if community[5] is None and community[4] is not None:
            regCode = str(community[2]) + str(community[3]) + str(community[4])
            county = URIRef("countyCode:" + regCode)
            countyName = community[7]
            g.add((county, RDF.type, typeCounty))
            g.add((county, RDFS.label, Literal(countyName)))

#Parsing the VEK
data = get_data("VGR_KreisergebnisseBand3.xlsx")
vek = data["VEK je Einwohner"]
for countyEntry in vek:
    # there are some descriptive entries, that we can filter out easily
    if len(countyEntry) > 5:
        if countyEntry[6] == '3':
            if len(str(countyEntry[2])) == 5:
                county = URIRef("countyCode:" + str(countyEntry[2]))
                g.add((county, hasVEK, Literal(countyEntry[27])))
                # getting the zipcodes in the current county and adding the vek of the county to the zipcodes
                for zipCode, p, o in g.triples((None, isInCounty, county)):
                    g.add((zipCode, hasVEK, Literal(countyEntry[27])))
Пример #29
0
 def _convert_element(self, element, type):
     return Literal(element, datatype=URIRef(type))
def rdfStarLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0):

    errors = ""
    buffer = Buffer()

    def ns_modification(uri):

        for ns in mappings:
            if uri.startswith(ns):
                uri = uri.replace(ns, F"{mappings[ns]}:")
                break

        if uri.__contains__("://"):
            uri = F"<{uri}>"

        return uri

    for count, link in enumerate(result_batch):

        try:

            # GET THE SOURCE AND TARGET URIS
            src_data, trg_data = ns_modification(link['source']), ns_modification(link['target'])

            # GENERATION OF THE LINK
            if src_data and trg_data:

                # The RDFStar subject
                buffer.write(F"{space}### LINK Nbr: {count + offset}\n"
                             F"{space}<<{src_data}    {link_predicate}    {trg_data}>>\n")

                # ANNOTATION OF THE LINK
                # ll_val:has-link-validation               "not_validated" .
                for counter, (feature, value) in enumerate(link.items()):
                    end = ".\n" if counter == len(link) - 1 else ";"

                    current_property = JSON_HEADERS.get(feature, None)

                    if current_property:

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if current_property == VoidPlus.has_validation_ttl:
                            small = link['source'] if link['source'] < link['target'] else link['target']
                            big = link['target'] if small == link['source'] else link['source']
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key) if key is not None else key

                        # NOT APPENDING THE CLUSTER INT ID
                        elif current_property == VoidPlus.cluster_ID_ttl:
                            triple_value = Rsc.cluster_ttl(value) if value is not None else value

                        # APPENDING ANYTHING ELSE
                        else:
                            if current_property == VoidPlus.cluster_Int_ID_ttl:
                                triple_value = None

                            elif value is not None:
                                triple_value = Literal(round(float(value), 5)).n3(MANAGER) \
                                    if Grl.isDecimalLike(value) \
                                    else Literal(value).n3(MANAGER)
                            else:
                                triple_value = value

                        if triple_value is not None:
                            buffer.write(F"{space * 2}{current_property:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                yield buffer.getvalue()
                clearBuffer(buffer)

        except Exception as err:
            errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/rdfStarLinkGenerator] {link}, {err}"