def _layer_to_graph(g, gdb, layer_name, area, mapper): ns = dict(g.namespace_manager.namespaces()) logger.info("Processing layer {}".format(layer_name)) mapping = mapper.schema['schema'][layer_name] # define class class_node = _layer_to_class(g, mapping) # translate features attributes = list(mapper.attributes(layer_name)) for feat in gdb.features_of(layer_name): if mapping['identifier'] is None: continue fid = feat.GetFID() if fid is None or fid <= 0: continue # continue if out of scope if not gdb.within(gdb.geometry_of(feat), area): continue # geometry geom_wkt, gtype = gdb.geometryWKT_of(feat) if geom_wkt is None: continue # node for this feature feat_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(layer_name+mapping['classname'], fid)) add_type(g, feat_node, class_node) add_label(g, feat_node, "{} {} ({})".format(mapping['classname'], fid, gtype)) for k,v in feat.items().items(): if v is None or v in EXCL_VALUES: continue if k in attributes: # create node attr = mapping['attributes'][k] try: if type(v) is str: v = v.strip() attr_node = Literal(v, datatype=URIRef(attr['datatype'])) except UnicodeDecodeError: if type(v) is bytes: v = v.decode('utf-8', 'ignore') attr_node = Literal(v, datatype=URIRef(attr['datatype'])) # link to node attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(mapping['classname'].lower(), attr['property'])) add_property(g, feat_node, attr_node, attr_link) # add geometry node geom_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(geom_wkt, gtype)) add_type(g, geom_node, URIRef(ns['sf'] + _geo_type(gtype))) add_label(g, geom_node, "{} Geometry".format(_geo_type(gtype))) add_property(g, feat_node, geom_node, URIRef(ns['geo'] + 'hasGeometry')) # include WKT geom_wkt_node = Literal(geom_wkt, datatype=URIRef(ns['geo'] + 'wktLiteral')) add_property(g, geom_node, geom_wkt_node, URIRef(ns['geo'] + 'asWKT'))
def enrich(schema, database, graph, time): """ Add OTL enrichments """ # init graph instance g = Graph(identifier=gen_hash("OTL", time)) # enrich _enrich(g, database, schema, graph) # add meta-data add_metadata(g, DEFAULT_SCHEMA_PREFIX, time, "OTL") return g
def _generate_branch(schema, ns_abox, tail, parent, values): g = Graph() if type(tail['tail']) is str: if tail['type'] == "URIRef": class_node = URIRef(tail['head']) node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython())) add_type(g, node, class_node) add_property(g, parent, node, URIRef(tail['property'])) else: node = Literal(schema.compile_value(tail, values), datatype=URIRef(tail['head'])) add_property(g, parent, node, URIRef(tail['property'])) else: class_node = URIRef(tail['head']) node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython())) add_type(g, node, class_node) add_property(g, parent, node, URIRef(tail['property'])) g += _generate_branch(schema, ns_abox, tail['tail'], node, values) return g
def link(schema, source_database, target_database, source_graph, target_graph, time): """ Add interlinks """ # init graph instance g = Graph(identifier=gen_hash("xref", time)) # add links _interlink(g, schema, source_database, target_database, source_graph, target_graph) # add meta-data add_metadata(g, DEFAULT_SCHEMA_PREFIX, time, "XREF") return g
def translate(server, mapper, references, visited, time): """ Translate """ # selected database database = mapper.database_name() # override variables global DEFAULT_NAMESPACE, DEFAULT_PREFIX, DEFAULT_SCHEMA_NAMESPACE, DEFAULT_SCHEMA_PREFIX DEFAULT_NAMESPACE = "http://www.rijkswaterstaat.nl/linked_data/{}/".format( database) DEFAULT_PREFIX = "rws.{}".format(database) DEFAULT_SCHEMA_NAMESPACE = "http://www.rijkswaterstaat.nl/linked_data/schema/{}/".format( database) DEFAULT_SCHEMA_PREFIX = "rws.schema.{}".format(database) # init graph instance g = Graph(identifier=gen_hash(database.upper(), time)) # update namespaces _update_namespaces(g.namespace_manager) i = 0 while True: for referenced_table, referenced_records in references.references( sync=True): if len(referenced_records) <= 0: # skip continue # translate table _table_to_graph(g, server, references, referenced_table, mapper) # update visited records visited.add_references(referenced_table, referenced_records) # sync references.difference_update(visited) i += 1 if references.is_empty(): logger.info("No more references") break if i >= MAX_ITERATIONS: logger.info("Maximum number of iterations reached: {}".format(i)) break return g
def link(schema, source_database, target_database, source_graph, target_graph, include_backlinks, time): """ Add interlinks """ # init graph instance g = Graph(identifier=gen_hash("xref.schema", time)) # add links _interlink(g, schema, source_database, target_database, source_graph, target_graph) if include_backlinks: _interlink(g, schema, target_database, source_database, target_graph, source_graph) # add meta-data add_metadata(g, DEFAULT_SCHEMA_PREFIX, time, "EXTRA", is_ontology=True) return g
def translate(database, mapper, time): """ Translate """ global DEFAULT_NAMESPACE DEFAULT_NAMESPACE += "{}/".format(database) global DEFAULT_PREFIX DEFAULT_PREFIX += ".{}".format(database) # init graph instance g = Graph(identifier=gen_hash(DEFAULT_PREFIX, time)) # update namespaces _update_namespaces(g.namespace_manager) for table in mapper.classes(): logger.info("Processing table {}".format(table)) _table_to_class(g, table, mapper) # add meta-data add_metadata(g, DEFAULT_PREFIX, time, database, is_ontology=True) return g
def translate(gdb, mapper, area, time): """ Translate """ # selected database database = mapper.database_name() # override variables global DEFAULT_NAMESPACE, DEFAULT_PREFIX, DEFAULT_SCHEMA_NAMESPACE, DEFAULT_SCHEMA_PREFIX DEFAULT_NAMESPACE = "http://www.rijkswaterstaat.nl/linked_data/{}/".format(database) DEFAULT_PREFIX = "rws.{}".format(database) DEFAULT_SCHEMA_NAMESPACE = "http://www.rijkswaterstaat.nl/linked_data/schema/{}/".format(database) DEFAULT_SCHEMA_PREFIX = "rws.schema.{}".format(database) # init graph instance g = Graph(identifier=gen_hash(database.upper(), time)) # update namespaces _update_namespaces(g.namespace_manager) for layer in mapper.classes(): _layer_to_graph(g, gdb, layer, area, mapper) return g
def _table_to_graph(g, server, references, table, mapper): ns = dict(g.namespace_manager.namespaces()) logger.info("Processing table {}".format(table)) mapping = mapper.schema['schema'][table] # define class class_node = _table_to_class(g, mapping) # translate records referenced_nodes = list(references.references(table=table)) attributes = list(mapper.attributes(table)) relations = list(mapper.relations(table)) records = server.records(table) for rec in records: if mapping['identifier'] is None: continue if rec[mapping['identifier']] not in referenced_nodes: continue # node for this record rec_node = URIRef( ns[DEFAULT_PREFIX] + gen_hash(mapping['classname'], rec[mapping['identifier']])) add_type(g, rec_node, class_node) add_label( g, rec_node, "{} {}".format(mapping['classname'], rec[mapping['identifier']])) for k, v in rec.items(): if v is None: continue if k in attributes: # create node attr = mapping['attributes'][k] try: if type(v) is str: v = v.strip() attr_node = Literal(v, datatype=URIRef(attr['datatype'])) except UnicodeDecodeError: if type(v) is bytes: v = v.decode('utf-8', 'ignore') attr_node = Literal(v, datatype=URIRef(attr['datatype'])) # link to node attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format( mapping['classname'].lower(), attr['property'])) add_property(g, rec_node, attr_node, attr_link) if k in relations: rel = mapping['relations'][k] # create node referenced_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(rel['targetclassname'], v)) # link to node rel_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format( mapping['classname'].lower(), rel['property'])) add_property(g, rec_node, referenced_node, rel_link) # add back link inverse_rel_link = URIRef( ns[DEFAULT_SCHEMA_PREFIX] + "{}_inv_{}".format(rel['targetclassname'].lower(), mapping['classname'].lower())) add_property(g, referenced_node, rec_node, inverse_rel_link) # store referenced nodes for further processing references.add_reference(rel['targettable'], v)