Пример #1
0
def parse_identity_key(legacy_id: Any):
    """Try to convert a given value into a string that we can use to construct a non-obfuscated KGIRI"""
    if isinstance(legacy_id, int):
        key = f"{legacy_id}"
    elif isinstance(legacy_id, str):
        key = legacy_id.translate(special_char_map)

        key = re.sub(
            r"\b('?\w)",
            lambda match: match.group(1).capitalize(),
            inflection.dasherize(_translate_to_human_readable(inflection.underscore(key)))
        )

        # key = inflection.titleize(key)
        key = inflection.parameterize(key, separator='-')
        # key = unidecode(legacy_id)
        # key = stringcase.spinalcase(stringcase.lowercase(key))
        # key = key.replace('"', '')
        # key = key.replace('(', '-')
        # key = key.replace(')', '-')
        # key = key.replace('/', '-')
        # key = key.replace('\\', '-')
        # key = key.replace('=', '-')
        # key = key.replace('>', '-')
        # key = key.replace('<', '-')
        # key = key.replace(':', '-')
        # key = key.replace(',', '-')
        # key = key.replace('|', '-')
        # key = key.replace('&amp;', '-and-')
        # key = key.replace('-&-', '-and-')
    elif isinstance(legacy_id, Timestamp):
        key = Literal(legacy_id).lower()
    else:
        #
        # Do not change this to a call to log.error because that cause circular dependency (TODO to fix that)s
        print(
            f"ERROR: While parsing an identity key: encountered unknown type {type(legacy_id)} for value {legacy_id}",
            file=sys.stderr
        )
        return None
    key = key.replace('--', '-')
    key = key.replace('--', '-')
    key = strip_end(key, '-')
    return key
Пример #2
0
def createBikeGraph(arg, g):

    nspaces = readDict()

    schema = Namespace(nspaces.get('schema'))
    naptan = Namespace(nspaces.get('naptan'))
    owl = Namespace(nspaces.get('owl'))
    xsd = Namespace(nspaces.get('xsd'))
    rdfs = Namespace(nspaces.get('rdfs'))
    vcard = Namespace(nspaces.get('vcard'))
    locationOnt = Namespace(nspaces.get('locationOnt'))
    geom = Namespace(nspaces.get('geom'))
    geo = Namespace(nspaces.get('geo'))
    geosparql = Namespace(nspaces.get('geosparql'))
    rdf = Namespace(nspaces.get('rdf'))
    dcterms = Namespace(nspaces.get('dcterms'))
    dul = Namespace(nspaces.get('dul'))
    locn = Namespace(nspaces.get('locn'))
    dc = Namespace(nspaces.get('dc'))

    bikeid = arg[0].split('_')[1].encode('utf-8')
    bikeGUID = getUid(bikeid, naptan)

    bikeLat, bikeLong = float(arg[8]), float(arg[9])
    bikeLats = str('{:f}'.format(bikeLat))
    bikeLongs = str('{:f}'.format(bikeLong))
    nTotalDocks = str(arg[7].encode('utf-8'))

    address = arg[2].split(',')
    bikeLabel = address[len(address) - 1].lstrip() + ' ' + str(bikeid)


    bikeGeometry = "POINT (" + str(bikeLat) + " " + str(bikeLong) + ")"
    bikeAddress = Literal(re.sub(r'&(?![A-Za-z]+[0-9]*;|#[0-9]+;|#x[0-9a-fA-F]+;)', r'and',arg[2]))
    bikeAddressSplit = Literal(bikeAddress.split(',', 1)[-1])
    bikeAddressLocality = Literal(bikeAddressSplit.replace(' ', '',1))
    bikeCreatedDate = arg[5]

    singleBike = createBikeParkID(bikeGUID)
    singleAddress = createAddress(bikeGUID)
    singleGeometry = createGeometry(bikeGUID)
    bikePublisher = URIRef('https://api.tfl.gov.uk/#BikePoint')
    bikeBusinessType = URIRef('http://data.linkedevents.org/kos/3cixty/bikestation')



    g.add((singleBike, rdf.type, dul.Place))
    g.add((singleBike, rdf.type, locationOnt.bikePark))
    g.add((singleBike, dcterms.identifier, Literal(bikeLabel)))
    g.add((singleBike, dcterms.description, Literal("London TFL Bike hire docks")))
    g.add((singleBike, schema.dateCreated, Literal(bikeCreatedDate, datatype=xsd.dateTime)))
    g.add((singleBike, locationOnt.nTotalDocks, Literal(nTotalDocks, datatype=xsd.int)))
    g.add((singleBike, dc.publisher, bikePublisher))
    g.add((singleBike, locationOnt.businessType, bikeBusinessType))

    g.add((singleBike, geom.geometry, singleGeometry))
    g.add((singleBike, schema.geo, singleGeometry))
    g.add((singleBike, geosparql.hasGeometry, singleGeometry))
    g.add((singleBike, locn.geometry, singleGeometry))

    g.add((singleBike, vcard.hasAddress, singleAddress))
    g.add((singleBike, locn.addresss, singleAddress))
    g.add((singleBike, schema.location, singleAddress))



    g.add((singleGeometry, rdf.type, geosparql.hasGeometry))
    g.add((singleGeometry, rdf.type, geom.geometry))
    g.add((singleGeometry, rdf.type, locn.geometry))
    g.add((singleGeometry, rdf.type, schema.geo))
    g.add((singleGeometry, geo.geometry, Literal(bikeGeometry, datatype=geosparql.wktLiteral)))
    g.add((singleGeometry, geo.lat, Literal(bikeLats, datatype=xsd.double)))
    g.add((singleGeometry, geo.long, Literal(bikeLongs, datatype=xsd.double)))
    g.add((singleGeometry, schema.latitude, Literal(bikeLats, datatype=xsd.double)))
    g.add((singleGeometry, schema.longitude, Literal(bikeLongs, datatype=xsd.double)))




    g.add((singleAddress, rdf.type, locn.address))
    g.add((singleAddress, rdf.type, schema.location))
    g.add((singleAddress, rdf.type, vcard.hasAddress))
    g.add((singleAddress, dcterms.title, bikeAddress))
    g.add((singleAddress, schema.streetAddress, bikeAddress))
    g.add((singleAddress, locn.address, bikeAddress))
    g.add((singleAddress, vcard.street_address, bikeAddress))
    g.add((singleAddress, schema.addressLocality, bikeAddressLocality))

    return g
Пример #3
0
    def literal_match(self, literal: Literal, surface: str):

        dtype = literal.datatype if hasattr(literal, "datatype") else None
        literal, surface = str(literal).strip(), str(surface).strip()

        score = 0.0
        if dtype:
            # Typed literals should match well

            if str(dtype) == str(self.DATETIME):
                try:
                    l = datetime.datetime.fromisoformat(literal).timestamp()

                    yearmatch = YEAR_PATTERN.match(surface)
                    if yearmatch:
                        year = int(yearmatch.groups()[0])
                        s = datetime.datetime(year, 1, 1).timestamp()
                    else:
                        try:
                            s = datetime.datetime.fromisoformat(
                                surface).timestamp()
                        except:
                            s = self._dateparse(surface).timestamp()
                    if s:
                        score = max(0, 1 - (abs(s - l) / (60 * 60 * 24 * 365)))

                        if score:
                            yield LiteralMatchResult(score, literal, dtype)
                            return
                #                         else:
                #                             log.debug(f"No date match ({l},{s}) = {score}")
                except Exception as e:
                    pass
            else:
                try:
                    s = float(surface.replace(",", ""))
                    l = float(literal.replace(",", ""))
                    score = max(0, 1 - (abs(s - l) / max(abs(s), abs(l))))
                    if score > 0.95:
                        yield LiteralMatchResult(score, literal, dtype)
                        return
                except Exception as e:
                    pass

            score = bool(surface.lower() == literal.lower())

        elif surface and literal:
            # Strings may match approximately
            if self.stringmatch == "jaccard":
                stok, ltok = set(surface.lower().split()), set(
                    literal.lower().split())
                if stok and ltok:
                    score = len(stok & ltok) / len(stok | ltok)
            elif self.stringmatch == "levenshtein":
                import Levenshtein

                slow, llow = surface.lower(), literal.lower()
                if slow and llow:
                    m = min(len(slow), len(llow))
                    score = max(0, (m - Levenshtein.distance(slow, llow)) / m)

        if score:
            yield LiteralMatchResult(score, literal, dtype)
Пример #4
0
def kgiri_replace_iri_in_literal(value: Literal):
    if not kgiri_replace_enabled:
        return value
    return Literal(value.replace(kgiri_base_replace, kgiri_base))
Пример #5
0
def commentStatements(user, commentUri, realComment):
    # here you can put more processing on the comment text
    realComment = Literal(realComment.replace("\r", ""), datatype=realComment.datatype) # rdflib n3 can't read these back
    return [(commentUri, CONTENT.encoded, realComment)]