def spa_subset_insert(specs): src_aligns = specs[St.source][St.link_old] \ if Ls.nt_format(specs[St.source][St.link_old]) else "<{}>".format(specs[St.source][St.link_old]) insert_query = """ ###### INSERT SUBSET LINKSET PREFIX rdf: <{}> PREFIX singleton: <{}> PREFIX alivocab: <{}> INSERT {{ GRAPH <{}> {{ ?subject ?singPre ?object . }} GRAPH singleton:{} {{ ?singPre rdf:singletonPropertyOf alivocab:exactStrSim{} . ?singPre alivocab:hasStrength 1 . ?singPre alivocab:hasEvidence "Aligned by {} ." . }} }} WHERE {{ GRAPH <{}> {{ ?subject a <{}> ; {} ?object . }} ### Create A SINGLETON URI BIND( replace("{}{}{}_#", "#", STRAFTER(str(UUID()),"uuid:")) as ?pre ) BIND(iri(?pre) as ?singPre) }} """.format(Ns.rdf, Ns.singletons, Ns.alivocab, specs[St.linkset], specs[St.linkset_name], specs[St.sameAsCount], specs[St.source][St.graph_name], specs[St.source][St.graph], specs[St.source][St.entity_datatype], src_aligns, Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount]) # print insert_query return insert_query
def linkset_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n alivocab:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS src_cross_check = Ls.format_aligns( source[St.crossCheck]) if St.crossCheck in source else None trg_cross_check = Ls.format_aligns( target[St.crossCheck]) if St.crossCheck in target else None # CROSS CHECK FOR THE WHERE CLAUSE cross_check_where = '' cross_check_where += "\n BIND(iri({}) AS ?src_crossCheck)".format( src_cross_check) if src_cross_check is not None else '' cross_check_where += "\n BIND(iri({}) AS ?trg_crossCheck)".format( trg_cross_check) if trg_cross_check is not None else '' # CROSS CHECK FOR THE INSERT CLAUSE cross_check_insert = '' cross_check_insert += "\n alivocab:crossCheckSubject ?src_crossCheck ;" \ if src_cross_check is not None else '' cross_check_insert += "\n alivocab:crossCheckObject ?trg_crossCheck ;" \ if trg_cross_check is not None else '' # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.linkset_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "intermediate": specs[ St.link_name] = "Exact String Similarity via intermediate dataset" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "The method MATCH VIA INTERMEDIATE DATASET is used to align the" \ " source and the target by using properties that present different " \ "descriptions of a same entity, such as country name and country code. " \ "This is possible by providing an intermediate dataset that binds the " \ "two alternative descriptions to the very same identifier." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are the same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit_value]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.linkset]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.linkset]), " rdfs:label \"{}\" ; ".format(specs[St.linkset_name]), " a void:Linkset ;", " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;{}".format(cross_check_insert), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns){}".format(trg_aligns, cross_check_where), "}") # print query if display is True: print query return query
def spa_subset_metadata(specs): source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.link_old]) # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS src_cross_check = Ls.format_aligns( source[St.crossCheck]) if St.crossCheck in source else None trg_cross_check = Ls.format_aligns( target[St.crossCheck]) if St.crossCheck in target else None # CROSS CHECK FOR THE WHERE CLAUSE cross_check_where = '' cross_check_where += "\n BIND(iri({}) AS ?src_crossCheck)".format( src_cross_check) if src_cross_check is not None else '' cross_check_where += "\n BIND(iri({}) AS ?trg_crossCheck)".format( trg_cross_check) if trg_cross_check is not None else '' # CROSS CHECK FOR THE INSERT CLAUSE cross_check_insert = '' cross_check_insert += "\n alivocab:crossCheckSubject ?src_crossCheck ;" \ if src_cross_check is not None else '' cross_check_insert += "\n alivocab:crossCheckObject ?trg_crossCheck ;" \ if trg_cross_check is not None else '' metadata = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}". \ format("\t###### METADATA", "\tPREFIX prov: <{}>".format(Ns.prov), "\tPREFIX rdfs: <{}>".format(Ns.rdfs), "\tPREFIX void: <{}>".format(Ns.void), "\tPREFIX alivocab: <{}>".format(Ns.alivocab), "\tPREFIX bdb: <{}>".format(Ns.bdb), "\tINSERT", "\t{", "\t ### [SUBSET of {}]".format(source[St.graph]), "\t ### METADATA ABOUT THE SUBSET LINKSET", "\t <{}>".format(specs[St.linkset]), "\t a void:Linkset ;", "\t rdfs:label \"{}\" ; ".format(specs[St.linkset_name]), "\t alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), "\t alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), "\t void:subset <{}> ;".format(source[St.graph]), "\t void:subjectsTarget <{}> ;".format(source[St.graph]), "\t void:objectsTarget <{}> ;".format(target[St.graph]), "\t void:triples {} ;".format(specs[St.triples]), "\t void:linkPredicate <{}{}> ;".format(specs[St.link], specs[St.sameAsCount]), "\t bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), "\t bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), "\t alivocab:singletonGraph <{}{}> ;".format(Ns.singletons, specs[St.linkset_name]), "\t bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), "\t bdb:linksetJustification <{}> ;".format(specs[St.justification]), "\t alivocab:alignsSubjects ?src_aligns ;", "\t alivocab:alignsObjects <{}> ;{}".format(Ns.rsrId, cross_check_insert), "\t rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n\t ### METADATA ABOUT THE LINKSET JUSTIFICATION", "\t <{}>".format(specs[St.justification]), "\t rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n\t ### METADATA ABOUT THE LINKTYPE", "\t <{}{}>".format(specs[St.link], specs[St.sameAsCount]), "\t rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), "\t rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), "\t rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n\t ### ASSERTION METHOD", "\t <{}>".format(specs[St.assertion_method]), "\t alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "\t}", "\tWHERE", "\t{", "\t BIND(iri({}) AS ?src_aligns){}".format(src_aligns, cross_check_where), "\t}" ) # print metadata return metadata
def lens_refine_geo_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n ll:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n ll:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n ll:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n ll:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n ll:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_cross_check = Ls.format_aligns(source[St.crossCheck]) src_long = Ls.format_aligns(source[St.longitude]) src_lat = Ls.format_aligns(source[St.latitude]) trg_cross_check = Ls.format_aligns(target[St.crossCheck]) trg_long = Ls.format_aligns(target[St.longitude]) trg_lat = Ls.format_aligns(target[St.latitude]) # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.lens_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "nearbyGeoSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.lens_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.lens_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit]) specs[St.lens_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.lens]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX ll: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.lens]), " rdfs:label \"{}\" ; ".format(specs[St.lens_name]), " a bdb:Lens ;", " void:triples {} ;".format(specs[St.triples]), " ll:sameAsCount {} ;".format(specs[St.sameAsCount]), " ll:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " ll:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " ll:crossCheckSubject ?src_crossCheck ;", " ll:crossCheckObject ?trg_crossCheck ;", " ll:unit <{}> ;".format(specs[St.unit]), " ll:unitValue {} ;".format(specs[St.unit_value]), " ll:alignsSubjects ( ?src_long ?src_lat ) ;", " ll:alignsObjects ( ?trg_long ?trg_lat ) ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.lens_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " ll:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_crossCheck)".format(src_cross_check), " BIND(iri({}) AS ?trg_crossCheck)".format(trg_cross_check), " BIND(iri({}) AS ?src_long)".format(src_long), " BIND(iri({}) AS ?src_lat)".format(src_lat), " BIND(iri({}) AS ?trg_long)".format(trg_long), " BIND(iri({}) AS ?trg_lat)".format(trg_lat), "}") # print query if display is True: print query return query
def linkset_refined_metadata(specs, display=False): # CONDITIONAL METADATA TO APPEND TO THE REFINED LINKSET extra = "" if St.extended_graph in specs[St.source] and len( specs[St.source][St.extended_graph]) > 0: extra += "\n alivocab:subjectsExtended <{}> ;".format( specs[St.source][St.extended_graph]) if St.extended_graph in specs[St.target] and len( specs[St.target][St.extended_graph]) > 0: extra += "\n alivocab:objectsExtended <{}> ;".format( specs[St.target][St.extended_graph]) if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediatesTarget <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and str(specs[St.delta]) != "0": converted = convert_to_float(str(specs[St.delta])) if math.isnan(converted) is False: extra += "\n alivocab:delta {} ;".format( converted) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.refined_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.refined_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.refined_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxnbrsim": specs[St.link_name] = "Approximate Number Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with an approximate number similarity" \ " in the interval [0 {}].".format(specs[St.delta]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate number similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "intermediate": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This is an implementation of the Exact String Similarity Mechanism over " \ "the aligned predicates." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) # CHECKING WHETHER THE REFINED HAS SOME TRIPLES INSERTED specs[St.triples] = Qry.get_namedgraph_size(specs[St.refined], isdistinct=False) triples = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES IN THE SOURCE".format(triples) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) print "\t>>> {} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples]))) message = "{}<br/>{}<br/>{}".format( "{} CORRESPONDENCES IN THE SOURCE".format(triples), "{} CORRESPONDENCES INSERTED".format(specs[St.triples]), "{} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples])))) if int(specs[St.triples]) > 0: derived_from = specs[St.derivedfrom] if St.derivedfrom in specs else "" intermediate = "\n alivocab:intermediatesTarget <{}> ;".format(specs[St.intermediate_graph]) \ if str(specs[St.mechanism]).lower() == "intermediate" else "" query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.refined]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.refined]), " a void:Linkset ;\n{}".format(derived_from), " rdfs:label \"{}\" ; ".format(specs[St.refined_name]), " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;{}".format(source[St.graph], intermediate), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns)".format(trg_aligns), "}") if display is True: print query print "\t>>> Done generating the metadata" return {"query": query, "message": message} else: return {"query": None, "message": message}
def spa_linkset_subset(specs, activated=False): if activated is True: check = Ls.run_checks(specs, check_type="subset") if check[St.result] != "GOOD TO GO": return check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.linkset_info(specs, specs[St.sameAsCount]) ########################################################## """ 1. GENERATE SUBSET LINKSET INSERT QUERY """ ########################################################## insert_query = spa_subset_insert(specs) # print insert_query ############################################################# """ 2. EXECUTING INSERT SUBSET LINKSET QUERY AT ENDPOINT """ ############################################################# Qry.endpoint(insert_query) ############################################################# """ 3. LINKSET SIZE (NUMBER OF TRIPLES) """ ############################################################# # LINKSET SIZE (NUMBER OF TRIPLES) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset]) print "\t>>> {} TRIPLES INSERTED".format(specs[St.triples]) # NO MATCH FOUND if specs[St.triples] == "0": # logger.warning("WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED.") print "WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED." specs[St.insert_query] = insert_query # metadata = spa_subset_metadata(source, target, data, size) explain_q = "ask {{ GRAPH <{}> {{ ?s <{}> ?o }} }}".format( specs[St.linkset], specs[St.source][St.link_old]) response = Qry.boolean_endpoint_response(explain_q) explain = True if response == "true" else False # print explain if explain is False: # logger.warning("{} DOES NOT EXIST IS {}.".format(data[St.link_old], source[St.graph])) print "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) message = "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) return {St.message: message, St.error_code: 1, St.result: None} # SOME MATCHES WHERE FOUND construct_query = "\n{}\n{}\n{}\n".format( "PREFIX predicate: <{}>".format(Ns.alivocab), "construct { ?x ?y ?z }", "where {{ graph <{}> {{ ?x ?y ?z }} }}".format( specs[St.linkset]), ) # print construct_query construct_response = Qry.endpointconstruct(construct_query) if construct_response is not None: construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.linkset]), 1) # GENERATE LINKSET SINGLETON METADATA QUERY singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format( "PREFIX singMetadata: <{}>".format(Ns.singletons), "PREFIX predicate: <{}>".format(Ns.alivocab), "PREFIX prov: <{}>".format(Ns.prov), "PREFIX rdf: <{}>".format(Ns.rdf), "construct { ?x ?y ?z }", "where {{ graph <{}{}> {{ ?x ?y ?z }} }}".format( Ns.singletons, specs[St.linkset_name]), ) # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY singleton_construct = Qry.endpointconstruct(singleton_metadata_query) if singleton_construct is not None: singleton_construct = singleton_construct.replace( '{', "singMetadata:{}\n{{".format(specs[St.linkset_name]), 1) ############################################################# """ 4. LINKSET METADATA """ ############################################################# # METADATA specs[St.insert_query] = insert_query metadata = Gn.spa_subset_metadata(specs) ############################################################### """ 5. EXECUTING INSERT LINKSET METADATA QUERY AT ENDPOINT """ ############################################################### # EXECUTING METADATA QUERY AT ENDPOINT Qry.endpoint(metadata) print "\t>>> WRITING TO FILE" write_to_file(graph_name=specs[St.linkset_name], metadata=metadata.replace("INSERT DATA", ""), correspondences=construct_response, singletons=singleton_construct, directory=DIRECTORY) print "\tLinkset created as [SUBSET]: ", specs[St.linkset] print "\t*** JOB DONE! ***" message = "The linkset was created as [{}] with {} triples found!".format( specs[St.linkset], specs[St.triples]) return { St.message: message, St.error_code: 0, St.result: specs[St.linkset] }
def specification_2_linkset_subset(specs, activated=False): if activated is True: print Ut.headings("EXECUTING LINKSET SUBSET SPECS...") else: print Ut.headings( "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED") return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None} # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # UPDATE THE SPECS OF SOURCE AND TARGETS update_specification(source) update_specification(target) # GENERATE THE NAME OF THE LINKSET Ls.set_subset_name(specs) # SETTING SOME GENERIC METADATA INFO specs[St.link_name] = "same" specs[St.linkset_name] = specs[St.linkset_name] specs[St.link] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) # COMMENT ON THE LINK PREDICATE specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \ "original <{}> dataset.".format( specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph]) # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \ "used by OrgRef is unknown. Here we assume that it is a curated work " \ "and extracted it as a linkset.", # COMMENT ON THE LINKSET ITSELF specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \ "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \ "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph]) source[St.entity_ns] = str(source[St.entity_datatype]).replace( source[St.entity_name], '') target[St.entity_ns] = str(target[St.entity_datatype]).replace( target[St.entity_name], '') # GENERATE THE LINKSET inserted_linkset = spa_linkset_subset(specs, activated) # print "LINKSET SUBSET RESULT:", inserted_linkset if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): return inserted_linkset if specs[St.triples] > "0": # REGISTER THE ALIGNMENT if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): Urq.register_alignment_mapping(specs, created=False) else: Urq.register_alignment_mapping(specs, created=True) return inserted_linkset else: print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
def load_temp_query(specs, is_source, is_expand=True): # UPDATE THE SPECS OF SOURCE AND TARGETS if is_expand is False: comment_exp = "# " else: comment_exp = "" if is_source is True: info = specs[St.source] load = "_{}_1".format(specs[St.linkset_name]) linkset_triple = "\t\t\t?{} ?predicate ?target".format(info[St.graph_name]) else: info = specs[St.target] load = "_{}_2".format(specs[St.linkset_name]) linkset_triple = "\t\t\t?source ?predicate ?{}".format(info[St.graph_name]) # REPLACE RDF TYPE "a" IN CASE ANOTHER TYPE IS PROVIDED if St.rdf_predicate in info and info[St.rdf_predicate] is not None: rdf_pred = info[St.rdf_predicate] \ if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate]) else: rdf_pred = "a" # FORMATTING THE ALIGNS PROPERTY aligns = info[St.aligns] \ if Ls.nt_format(info[St.aligns]) else "<{}>".format(info[St.aligns]) name = info[St.graph_name] uri = info[St.graph] # ADD THE REDUCER IF SET if St.reducer not in info: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = info[St.reducer] # EXTRACTION QUERY query = """ INSERT {{ GRAPH <{0}load{8}> {{ ?{5} alivocab:hasProperty ?trimmed . }} }} WHERE {{ # THE LINKSET TO EXPAND {12}GRAPH <{9}{10}> {12}{{ {12} {11} . {12}}} GRAPH <{1}> {{ # RESOURCE IS OF A CERTAIN TYPE ?{5} {2} <{7}> . # EXTRACT THE PROPERTY-VALUE TO ALIGN ?{5} {3} ?object . # LOWER CASE OF THE VALUE BIND(lcase(str(?object)) as ?label) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label, ?regexp, '$1$2') AS ?trimmed) }} {6}FILTER NOT EXISTS {6}{{ {6} GRAPH <{4}> {6} {{ {6} {{ ?{5} ?pred ?obj . }} {6} UNION {6} {{ ?obj ?pred ?{5}. }} {6} }} {6}}} }} """.format( # 0 1 2 3 4 ` 5 6 7 8 Ns.tmpgraph, uri, rdf_pred, aligns, reducer, name, reducer_comment, info[St.entity_datatype], load, # 9 10 11 12 Ns.linkset, specs[St.expanded_name], linkset_triple, comment_exp ) return query
def refining(specs, insert_query, activated=False): refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None} diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None} # UPDATE THE SPECS VARIABLE # print "UPDATE THE SPECS VARIABLE" update_specification(specs) update_specification(specs[St.source]) update_specification(specs[St.target]) # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # print "sameAsCount:", specs[St.sameAsCount] if specs[St.sameAsCount] is None: return {'refined': refined, 'difference': diff} # GENERATE THE NAME OF THE LINKSET Ls.set_refined_name(specs) # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset] # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED check = Ls.run_checks(specs, check_type="refine") # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] if check[St.message] == "NOT GOOD TO GO": # refined = check[St.refined] # difference = check["difference"] return check # print "\nREFINED:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] # print "CHECK:", check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.refined_info(specs, specs[St.sameAsCount]) # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM print "1. wasDerivedFrom {}".format(specs[St.linkset]) specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format( specs[St.linkset]) # print "REFINED NAME:", specs[St.refined_name] # print "REFINED:", specs[St.refined] # print "LINKSET TO BE REFINED:", specs[St.linkset] print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE" # metadata_q = Qry.q_linkset_metadata(specs[St.linkset]) metadata_q = """ prefix ll: <{}> SELECT DISTINCT ?type ?singletonGraph {{ # LINKSET METADATA <{}> a ?type ; ll:singletonGraph ?singletonGraph . }} """.format(Ns.alivocab, specs[St.linkset]) print "QUERY:", metadata_q matrix = Qry.sparql_xml_to_matrix(metadata_q) # print "\nMETA DATA: ", matrix if matrix: if matrix[St.message] == "NO RESPONSE": print Ec.ERROR_CODE_1 print matrix[St.message] return {'refined': refined, 'difference': diff} elif matrix[St.result] is None: print matrix[St.message] returned = { St.message: matrix[St.message], St.error_code: 666, St.result: None } return {'refined': returned, 'difference': diff} else: print Ec.ERROR_CODE_1 return {'refined': refined, 'difference': diff} # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE" specs[St.singletonGraph] = matrix[St.result][1][1] # print matrix[St.result][1][0] specs[St.insert_query] = insert_query(specs) print specs[St.insert_query] if type(specs[St.insert_query]) == str: is_run = Qry.boolean_endpoint_response(specs[St.insert_query]) else: print "\n4. RUNNING THE EXTRACTION QUERY" print specs[St.insert_query][0] # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0]) Qry.boolean_endpoint_response(specs[St.insert_query][0]) print "\n5. RUNNING THE FINDING QUERY" print specs[St.insert_query][1] is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1]) print "\n>>> RUN SUCCESSFULLY:", is_run.upper() # NO INSERTION HAPPENED if is_run == "true" or is_run == Ec.ERROR_STARDOG_1: # GENERATE THE # (1) LINKSET METADATA # (2) LINKSET OF CORRESPONDENCES # (3) SINGLETON METADATA # AND WRITE THEM ALL TO FILE print "GENERATING THE METADATA" pro_message = refine_metadata(specs) # SET THE RESULT ASSUMING IT WENT WRONG refined = { St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None } diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None} server_message = "Linksets created as: [{}]".format(specs[St.refined]) message = "The linkset was created as [{}]. <br/>{}".format( specs[St.refined], pro_message) # MESSAGE ABOUT THE INSERTION STATISTICS print "\t", server_message if int(specs[St.triples]) > 0: # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL refined = { St.message: message, St.error_code: 0, St.result: specs[St.linkset] } print "REGISTERING THE ALIGNMENT" if refined[St.message].__contains__("ALREADY EXISTS"): register_alignment_mapping(specs, created=False) else: register_alignment_mapping(specs, created=True) try: print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT" diff_lens_specs = { St.researchQ_URI: specs[St.researchQ_URI], St.subjectsTarget: specs[St.linkset], St.objectsTarget: specs[St.refined] } diff = Df.difference(diff_lens_specs, activated=activated) message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format( diff_lens_specs[St.triples]) print message_2 except Exception as err: print "THE DIFFERENCE FAILED: ", str(err.message) print "\tLinkset created as: ", specs[St.refined] print "\t*** JOB DONE! ***" return {'refined': refined, 'difference': diff} else: print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND" return {'refined': refined, 'difference': diff} else: print "NO MATCH COULD BE FOUND."
def refine_numeric_query(specs): # is_de_duplication = specs[St.source][St.graph] == specs[St.target][St.graph] # number_of_load = '1' if is_de_duplication is True else "2" # PLAIN NUMBER CHECK delta_check = "BIND(ABS(xsd:decimal(?x) - xsd:decimal(?x)) AS ?DELTA)" # DATE CHECK if specs[St.numeric_approx_type].lower() == "date": delta_check = "BIND( (YEAR(xsd:datetime(STR(?x))) - YEAR(xsd:datetime(STR(?y))) ) as ?DELTA )" source = specs[St.source] target = specs[St.target] # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) src_name = specs[St.source][St.graph_name] # src_uri = specs[St.source][St.graph] src_uri = source[St.graph] if St.extended_graph not in source else source[ St.extended_graph] # src_aligns = specs[St.source][St.aligns] trg_name = specs[St.target][St.graph_name] # trg_uri = specs[St.target][St.graph] trg_uri = target[St.graph] if St.extended_graph not in target else target[ St.extended_graph] # trg_aligns = specs[St.target][St.aligns] extract = """ PREFIX ll: <{0}> PREFIX prov: <{1}> PREFIX tempG: <{2}> DROP SILENT GRAPH tempG:load01 ; DROP SILENT GRAPH tempG:load02 ; DROP SILENT GRAPH <{3}> ; DROP SILENT GRAPH <{4}{5}> ; ### 1. LOADING SOURCE AND TARGET TO A TEMPORARY GRAPH INSERT {{ GRAPH tempG:load01 {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{8}_1 ll:relatesTo1 ?srcTrimmed . ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{9}_2 ll:relatesTo3 ?trgTrimmed . }} }} WHERE {{ ### LINKSET TO REFINE graph <{7}> {{ ?{8}_1 ?pred ?{9}_2 . }} ### SOURCE DATASET graph <{10}> {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{8}_1 {12} ?value_1 . bind (lcase(str(?value_1)) as ?src_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?srcTrimmed) }} ### TARGET DATASET graph <{11}> {{ ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{9}_2 {13} ?value_2 . bind (lcase(str(?value_2)) as ?trg_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trgTrimmed) }} }} """.format( # 0 1 2 3 4 5 Ns.alivocab, Ns.prov, Ns.tmpgraph, specs[St.refined], Ns.singletons, specs[St.refined_name], # 6 7 8 9 10 11 12 13 Ns.tmpvocab, specs[St.linkset], src_name, trg_name, src_uri, trg_uri, src_aligns, trg_aligns) find = """ ### 2. FINDING CANDIDATE MATCH BETWEEN THE SOURCE AND TARGET PREFIX ll: <{0}> PREFIX prov: <{1}> PREFIX tempG: <{2}> INSERT {{ ### MATCH FOUND GRAPH <{10}> {{ ?{3}_1 ?newSingletons ?{4}_2 . }} # METADATA OF MATCH FOUND GRAPH <{11}{12}> {{ ?newSingletons rdf:singletonPropertyOf ll:{8}{9} ; prov:wasDerivedFrom ?pred ; ll:hasEvidence ?evidence . }} }} WHERE {{ ### LINKSET TO REFINE graph <{5}> {{ ?{3}_1 ?pred ?{4}_2 . bind( iri(replace("{0}{8}{9}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### SOURCE AND TARGET LOADED TO A TEMPORARY GRAPH GRAPH tempG:load01 {{ ?{3}_1 ll:relatesTo1 ?x . ?{4}_2 ll:relatesTo3 ?y . }} # DELTA APPROX CHECK {6} FILTER( ABS(?DELTA) <= {7} ) BIND(concat("The DELTA of [", ?x, "] and [", ?y, "] is [", STR(ABS(?DELTA)), "] which passed the threshold of [", STR({7}), "]" ) AS ?evidence) }}""".format( # 0 1 2 3 4 5 6 7 Ns.alivocab, Ns.prov, Ns.tmpgraph, src_name, trg_name, specs[St.linkset], delta_check, specs[St.delta], # 8 9 10 11 12 specs[St.mechanism], specs[St.sameAsCount], specs[St.refined], Ns.singletons, specs[St.refined_name]) return [extract, find]
def refine_intermediate_query(specs): source = specs[St.source] target = specs[St.target] # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) src_name = specs[St.source][St.graph_name] # src_uri = specs[St.source][St.graph] src_uri = source[St.graph] if St.extended_graph not in source else source[ St.extended_graph] # src_aligns = specs[St.source][St.aligns] trg_name = specs[St.target][St.graph_name] # trg_uri = specs[St.target][St.graph] trg_uri = target[St.graph] if St.extended_graph not in target else target[ St.extended_graph] # trg_aligns = specs[St.target][St.aligns] insert = """ PREFIX alivocab: <{16}> PREFIX prov: <{17}> DROP SILENT GRAPH <{0}load01> ; DROP SILENT GRAPH <{0}load02> ; DROP SILENT GRAPH <{10}> ; DROP SILENT GRAPH <{14}{15}> ; INSERT {{ GRAPH <{10}> {{ ?{1} ?newSingletons ?{3} . }} ### SINGLETONS' METADATA GRAPH <{14}{15}> {{ ?newSingletons rdf:singletonPropertyOf alivocab:{12}{13} ; prov:wasDerivedFrom ?pred ; alivocab:hasEvidence ?evidence . }} }} WHERE {{ ### LINKSET TO REFINE graph <{5}> {{ ?{1} ?pred ?{3} . bind( iri(replace("{11}{12}{13}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### SOURCE DATASET graph <{6}> {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{1} {2} ?value_1 . bind (lcase(str(?value_1)) as ?src_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?src_trimmed) }} ### TARGET DATASET graph <{7}> {{ ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{3} {4} ?value_2 . bind (lcase(str(?value_2)) as ?trg_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trg_trimmed) }} ### INTERMEDIATE DATASET graph <{9}> {{ ?intermediate_uri ?intPred_1 ?value_3 ; ?intPred_2 ?value_4 . ### VALUES TO LOWER CASE bind (lcase(str(?value_3)) as ?src_val) bind (lcase(str(?value_4)) as ?trg_val) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp }} ; DROP SILENT GRAPH <{0}load01> ;) BIND(REPLACE(?src_val, ?regexp, '$1$2') AS ?src_trimmed) BIND(REPLACE(?trg_val, ?regexp, '$1$2') AS ?trg_trimmed) BIND(concat("[", ?src_trimmed, "] aligns with [", ?trg_trimmed, "]") AS ?evidence) }} DROP SILENT GRAPH <{0}load02> """.format( # 0 1 2 3 4 Ns.tmpgraph, src_name, src_aligns, trg_name, trg_aligns, # 5 6 7 8 9 specs[St.linkset], src_uri, trg_uri, Ns.tmpvocab, specs[St.intermediate_graph], # 10 11 12 13 specs[St.refined], Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount], # 14 15 16 17 Ns.singletons, specs[St.refined_name], Ns.alivocab, Ns.prov) # print insert return insert
def refine_exact_query(specs): source = specs[St.source] target = specs[St.target] src_graph = source[ St.graph] if St.extended_graph not in source else source[ St.extended_graph] trg_graph = target[ St.graph] if St.extended_graph not in target else target[ St.extended_graph] print "src_graph:", src_graph print "trg_graph:", trg_graph # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) # GENERATE THE INSERT QUERY insert_query = """ PREFIX prov: <{}> PREFIX rdf: <{}> PREFIX alivocab: <{}> INSERT {{ ### REFINED LINKSET GRAPH <{}> {{ ?subject ?newSingletons ?object . }} ### SINGLETONS' METADATA GRAPH <{}{}> {{ ?newSingletons rdf:singletonPropertyOf alivocab:{}{} ; ## THIS IS THE TRAIL prov:wasDerivedFrom ?singleton ; ## BUT THIS IS ADDED FOR QUERY SIMPLICITY AND EFFICIENCY ?sP ?sO ; ## THIS IS ITS OWN EVIDENCE alivocab:hasEvidence ?trimmed . }} }} WHERE {{ ### LINKSET GRAPH <{}> {{ ?subject ?singleton ?object . bind( iri(replace("{}{}{}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### METADATA graph <{}> {{ ?singleton ?sP ?sO . }} ### SOURCE DATASET GRAPH <{}> {{ ?subject a <{}> ; {} ?s_label . BIND(lcase(str(?s_label)) as ?label1) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label1, ?regexp, '$1$2') AS ?trimmed) }} ### TARGET DATASET GRAPH <{}> {{ ?object a <{}> ; {} ?o_label . BIND(lcase(str(?o_label)) as ?label2) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label2, ?regexp, '$1$2') AS ?trimmed) }} }} """.format(Ns.prov, Ns.rdf, Ns.alivocab, specs[St.refined], Ns.singletons, specs[St.refined_name], specs[St.mechanism], specs[St.sameAsCount], specs[St.linkset], Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount], specs[St.singletonGraph], src_graph, source[St.entity_datatype], src_aligns, trg_graph, target[St.entity_datatype], trg_aligns) # print insert_query return insert_query
def geo_load_query(specs, is_source): # UPDATE THE SPECS OF SOURCE AND TARGETS if is_source is True: info = specs[St.source] load = "_{}_1".format(specs[St.lens_name]) links = "?resource ?singPre ?target ." else: info = specs[St.target] load = "_{}_2".format(specs[St.lens_name]) links = "?source ?singPre ?resource ." # REPLACE RDF TYPE "rdf:type" IN CASE ANOTHER TYPE IS PROVIDED if St.rdf_predicate in info and info[St.rdf_predicate] is not None: rdf_pred = info[St.rdf_predicate] \ if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate]) else: rdf_pred = "a" # FORMATTING THE LONGITUDE PROPERTY longitude = info[St.longitude] \ if Ls.nt_format(info[St.longitude]) else "<{}>".format(info[St.longitude]) # FORMATTING THE LATITUDE PROPERTY latitude = info[St.latitude] \ if Ls.nt_format(info[St.latitude]) else "<{}>".format(info[St.latitude]) # EXTRACTING THE RESOURCE GRAPH URI LOCAL NAME # name = info[St.graph_name] # EXTRACTING THE RESOURCE GRAPH URI uri = info[St.graph] # ADD THE REDUCER IF SET # if St.reducer not in info: # reducer_comment = "#" # reducer = "" # else: # reducer_comment = "" # reducer = info[St.reducer] if is_source is True: message = """###################################################################### ### INSERTING DATA FROM THE SOURCE ######################################################################""" else: message = """###################################################################### ### INSERTING MESSAGE FROM THE TARGET ######################################################################""" query = """ {5} PREFIX geof: <http://www.opengis.net/def/function/geosparql/> PREFIX wgs: <http://www.w3.org/2003/01/geo/wgs84_pos#> INSERT {{ GRAPH <{0}load{1}> {{ ?resource wgs:long ?longitude . ?resource wgs:lat ?latitude . }} }} WHERE {{ GRAPH <{8}> {{ {9} }} GRAPH <{2}> {{ ### LOCATION COORDINATES ?resource {6} <{7}> . ?resource {3} ?long . ?resource {4} ?lat . ### MAKING SURE THE COORDINATES ARE WELL FORMATTED BIND( STRDT(REPLACE(STR(?long), ",", "."), xsd:float) as ?longitude ) BIND( STRDT(REPLACE(STR(?lat), ",", "."), xsd:float) as ?latitude ) ### MAKING SURE THE COORDINATES AT DIGITS AND NOT LITERALS Filter (?longitude >= 0 || ?longitude <= 0 ) Filter (?latitude >= 0 || ?latitude <= 0 ) ### GENERATE A LOCATION URI BIND( replace("http://risis.eu/#","#", STRAFTER(str(UUID()),"uuid:")) as ?name ) BIND(iri(?name) as ?location) }} }} """.format( # 0 1 2 3 4 5 6 7 Ns.tmpgraph, load, uri, longitude, latitude, message, rdf_pred, info[St.entity_datatype], # 8 9 specs[St.refined], links) # print query return query