def spa_subset_insert(specs): src_aligns = specs[St.source][St.link_old] \ if Ls.nt_format(specs[St.source][St.link_old]) else "<{}>".format(specs[St.source][St.link_old]) insert_query = """ ###### INSERT SUBSET LINKSET PREFIX rdf: <{}> PREFIX singleton: <{}> PREFIX alivocab: <{}> INSERT {{ GRAPH <{}> {{ ?subject ?singPre ?object . }} GRAPH singleton:{} {{ ?singPre rdf:singletonPropertyOf alivocab:exactStrSim{} . ?singPre alivocab:hasStrength 1 . ?singPre alivocab:hasEvidence "Aligned by {} ." . }} }} WHERE {{ GRAPH <{}> {{ ?subject a <{}> ; {} ?object . }} ### Create A SINGLETON URI BIND( replace("{}{}{}_#", "#", STRAFTER(str(UUID()),"uuid:")) as ?pre ) BIND(iri(?pre) as ?singPre) }} """.format(Ns.rdf, Ns.singletons, Ns.alivocab, specs[St.linkset], specs[St.linkset_name], specs[St.sameAsCount], specs[St.source][St.graph_name], specs[St.source][St.graph], specs[St.source][St.entity_datatype], src_aligns, Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount]) # print insert_query return insert_query
def load_temp_query(specs, is_source, is_expand=True): # UPDATE THE SPECS OF SOURCE AND TARGETS if is_expand is False: comment_exp = "# " else: comment_exp = "" if is_source is True: info = specs[St.source] load = "_{}_1".format(specs[St.linkset_name]) linkset_triple = "\t\t\t?{} ?predicate ?target".format(info[St.graph_name]) else: info = specs[St.target] load = "_{}_2".format(specs[St.linkset_name]) linkset_triple = "\t\t\t?source ?predicate ?{}".format(info[St.graph_name]) # REPLACE RDF TYPE "a" IN CASE ANOTHER TYPE IS PROVIDED if St.rdf_predicate in info and info[St.rdf_predicate] is not None: rdf_pred = info[St.rdf_predicate] \ if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate]) else: rdf_pred = "a" # FORMATTING THE ALIGNS PROPERTY aligns = info[St.aligns] \ if Ls.nt_format(info[St.aligns]) else "<{}>".format(info[St.aligns]) name = info[St.graph_name] uri = info[St.graph] # ADD THE REDUCER IF SET if St.reducer not in info: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = info[St.reducer] # EXTRACTION QUERY query = """ INSERT {{ GRAPH <{0}load{8}> {{ ?{5} alivocab:hasProperty ?trimmed . }} }} WHERE {{ # THE LINKSET TO EXPAND {12}GRAPH <{9}{10}> {12}{{ {12} {11} . {12}}} GRAPH <{1}> {{ # RESOURCE IS OF A CERTAIN TYPE ?{5} {2} <{7}> . # EXTRACT THE PROPERTY-VALUE TO ALIGN ?{5} {3} ?object . # LOWER CASE OF THE VALUE BIND(lcase(str(?object)) as ?label) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label, ?regexp, '$1$2') AS ?trimmed) }} {6}FILTER NOT EXISTS {6}{{ {6} GRAPH <{4}> {6} {{ {6} {{ ?{5} ?pred ?obj . }} {6} UNION {6} {{ ?obj ?pred ?{5}. }} {6} }} {6}}} }} """.format( # 0 1 2 3 4 ` 5 6 7 8 Ns.tmpgraph, uri, rdf_pred, aligns, reducer, name, reducer_comment, info[St.entity_datatype], load, # 9 10 11 12 Ns.linkset, specs[St.expanded_name], linkset_triple, comment_exp ) return query
def refine_numeric_query(specs): # is_de_duplication = specs[St.source][St.graph] == specs[St.target][St.graph] # number_of_load = '1' if is_de_duplication is True else "2" # PLAIN NUMBER CHECK delta_check = "BIND(ABS(xsd:decimal(?x) - xsd:decimal(?x)) AS ?DELTA)" # DATE CHECK if specs[St.numeric_approx_type].lower() == "date": delta_check = "BIND( (YEAR(xsd:datetime(STR(?x))) - YEAR(xsd:datetime(STR(?y))) ) as ?DELTA )" source = specs[St.source] target = specs[St.target] # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) src_name = specs[St.source][St.graph_name] # src_uri = specs[St.source][St.graph] src_uri = source[St.graph] if St.extended_graph not in source else source[ St.extended_graph] # src_aligns = specs[St.source][St.aligns] trg_name = specs[St.target][St.graph_name] # trg_uri = specs[St.target][St.graph] trg_uri = target[St.graph] if St.extended_graph not in target else target[ St.extended_graph] # trg_aligns = specs[St.target][St.aligns] extract = """ PREFIX ll: <{0}> PREFIX prov: <{1}> PREFIX tempG: <{2}> DROP SILENT GRAPH tempG:load01 ; DROP SILENT GRAPH tempG:load02 ; DROP SILENT GRAPH <{3}> ; DROP SILENT GRAPH <{4}{5}> ; ### 1. LOADING SOURCE AND TARGET TO A TEMPORARY GRAPH INSERT {{ GRAPH tempG:load01 {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{8}_1 ll:relatesTo1 ?srcTrimmed . ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{9}_2 ll:relatesTo3 ?trgTrimmed . }} }} WHERE {{ ### LINKSET TO REFINE graph <{7}> {{ ?{8}_1 ?pred ?{9}_2 . }} ### SOURCE DATASET graph <{10}> {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{8}_1 {12} ?value_1 . bind (lcase(str(?value_1)) as ?src_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?srcTrimmed) }} ### TARGET DATASET graph <{11}> {{ ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{9}_2 {13} ?value_2 . bind (lcase(str(?value_2)) as ?trg_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trgTrimmed) }} }} """.format( # 0 1 2 3 4 5 Ns.alivocab, Ns.prov, Ns.tmpgraph, specs[St.refined], Ns.singletons, specs[St.refined_name], # 6 7 8 9 10 11 12 13 Ns.tmpvocab, specs[St.linkset], src_name, trg_name, src_uri, trg_uri, src_aligns, trg_aligns) find = """ ### 2. FINDING CANDIDATE MATCH BETWEEN THE SOURCE AND TARGET PREFIX ll: <{0}> PREFIX prov: <{1}> PREFIX tempG: <{2}> INSERT {{ ### MATCH FOUND GRAPH <{10}> {{ ?{3}_1 ?newSingletons ?{4}_2 . }} # METADATA OF MATCH FOUND GRAPH <{11}{12}> {{ ?newSingletons rdf:singletonPropertyOf ll:{8}{9} ; prov:wasDerivedFrom ?pred ; ll:hasEvidence ?evidence . }} }} WHERE {{ ### LINKSET TO REFINE graph <{5}> {{ ?{3}_1 ?pred ?{4}_2 . bind( iri(replace("{0}{8}{9}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### SOURCE AND TARGET LOADED TO A TEMPORARY GRAPH GRAPH tempG:load01 {{ ?{3}_1 ll:relatesTo1 ?x . ?{4}_2 ll:relatesTo3 ?y . }} # DELTA APPROX CHECK {6} FILTER( ABS(?DELTA) <= {7} ) BIND(concat("The DELTA of [", ?x, "] and [", ?y, "] is [", STR(ABS(?DELTA)), "] which passed the threshold of [", STR({7}), "]" ) AS ?evidence) }}""".format( # 0 1 2 3 4 5 6 7 Ns.alivocab, Ns.prov, Ns.tmpgraph, src_name, trg_name, specs[St.linkset], delta_check, specs[St.delta], # 8 9 10 11 12 specs[St.mechanism], specs[St.sameAsCount], specs[St.refined], Ns.singletons, specs[St.refined_name]) return [extract, find]
def refine_intermediate_query(specs): source = specs[St.source] target = specs[St.target] # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) src_name = specs[St.source][St.graph_name] # src_uri = specs[St.source][St.graph] src_uri = source[St.graph] if St.extended_graph not in source else source[ St.extended_graph] # src_aligns = specs[St.source][St.aligns] trg_name = specs[St.target][St.graph_name] # trg_uri = specs[St.target][St.graph] trg_uri = target[St.graph] if St.extended_graph not in target else target[ St.extended_graph] # trg_aligns = specs[St.target][St.aligns] insert = """ PREFIX alivocab: <{16}> PREFIX prov: <{17}> DROP SILENT GRAPH <{0}load01> ; DROP SILENT GRAPH <{0}load02> ; DROP SILENT GRAPH <{10}> ; DROP SILENT GRAPH <{14}{15}> ; INSERT {{ GRAPH <{10}> {{ ?{1} ?newSingletons ?{3} . }} ### SINGLETONS' METADATA GRAPH <{14}{15}> {{ ?newSingletons rdf:singletonPropertyOf alivocab:{12}{13} ; prov:wasDerivedFrom ?pred ; alivocab:hasEvidence ?evidence . }} }} WHERE {{ ### LINKSET TO REFINE graph <{5}> {{ ?{1} ?pred ?{3} . bind( iri(replace("{11}{12}{13}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### SOURCE DATASET graph <{6}> {{ ### SOURCE DATASET AND ITS ALIGNED PREDICATE ?{1} {2} ?value_1 . bind (lcase(str(?value_1)) as ?src_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?src_value, ?regexp, '$1$2') AS ?src_trimmed) }} ### TARGET DATASET graph <{7}> {{ ### TARGET DATASET AND ITS ALIGNED PREDICATE ?{3} {4} ?value_2 . bind (lcase(str(?value_2)) as ?trg_value) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?trg_value, ?regexp, '$1$2') AS ?trg_trimmed) }} ### INTERMEDIATE DATASET graph <{9}> {{ ?intermediate_uri ?intPred_1 ?value_3 ; ?intPred_2 ?value_4 . ### VALUES TO LOWER CASE bind (lcase(str(?value_3)) as ?src_val) bind (lcase(str(?value_4)) as ?trg_val) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp }} ; DROP SILENT GRAPH <{0}load01> ;) BIND(REPLACE(?src_val, ?regexp, '$1$2') AS ?src_trimmed) BIND(REPLACE(?trg_val, ?regexp, '$1$2') AS ?trg_trimmed) BIND(concat("[", ?src_trimmed, "] aligns with [", ?trg_trimmed, "]") AS ?evidence) }} DROP SILENT GRAPH <{0}load02> """.format( # 0 1 2 3 4 Ns.tmpgraph, src_name, src_aligns, trg_name, trg_aligns, # 5 6 7 8 9 specs[St.linkset], src_uri, trg_uri, Ns.tmpvocab, specs[St.intermediate_graph], # 10 11 12 13 specs[St.refined], Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount], # 14 15 16 17 Ns.singletons, specs[St.refined_name], Ns.alivocab, Ns.prov) # print insert return insert
def refine_exact_query(specs): source = specs[St.source] target = specs[St.target] src_graph = source[ St.graph] if St.extended_graph not in source else source[ St.extended_graph] trg_graph = target[ St.graph] if St.extended_graph not in target else target[ St.extended_graph] print "src_graph:", src_graph print "trg_graph:", trg_graph # FORMATTING THE ALIGNS PROPERTY src_aligns = source[St.aligns] \ if Ls.nt_format(source[St.aligns]) else "<{}>".format(source[St.aligns]) trg_aligns = target[St.aligns] \ if Ls.nt_format(target[St.aligns]) else "<{}>".format(target[St.aligns]) # GENERATE THE INSERT QUERY insert_query = """ PREFIX prov: <{}> PREFIX rdf: <{}> PREFIX alivocab: <{}> INSERT {{ ### REFINED LINKSET GRAPH <{}> {{ ?subject ?newSingletons ?object . }} ### SINGLETONS' METADATA GRAPH <{}{}> {{ ?newSingletons rdf:singletonPropertyOf alivocab:{}{} ; ## THIS IS THE TRAIL prov:wasDerivedFrom ?singleton ; ## BUT THIS IS ADDED FOR QUERY SIMPLICITY AND EFFICIENCY ?sP ?sO ; ## THIS IS ITS OWN EVIDENCE alivocab:hasEvidence ?trimmed . }} }} WHERE {{ ### LINKSET GRAPH <{}> {{ ?subject ?singleton ?object . bind( iri(replace("{}{}{}_#", "#", strafter(str(uuid()), "uuid:") )) as ?newSingletons ) }} ### METADATA graph <{}> {{ ?singleton ?sP ?sO . }} ### SOURCE DATASET GRAPH <{}> {{ ?subject a <{}> ; {} ?s_label . BIND(lcase(str(?s_label)) as ?label1) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label1, ?regexp, '$1$2') AS ?trimmed) }} ### TARGET DATASET GRAPH <{}> {{ ?object a <{}> ; {} ?o_label . BIND(lcase(str(?o_label)) as ?label2) # VALUE TRIMMING BIND('^\\\\s+(.*?)\\\\s*$|^(.*?)\\\\s+$' AS ?regexp) BIND(REPLACE(?label2, ?regexp, '$1$2') AS ?trimmed) }} }} """.format(Ns.prov, Ns.rdf, Ns.alivocab, specs[St.refined], Ns.singletons, specs[St.refined_name], specs[St.mechanism], specs[St.sameAsCount], specs[St.linkset], Ns.alivocab, specs[St.mechanism], specs[St.sameAsCount], specs[St.singletonGraph], src_graph, source[St.entity_datatype], src_aligns, trg_graph, target[St.entity_datatype], trg_aligns) # print insert_query return insert_query
def geo_load_query(specs, is_source): # UPDATE THE SPECS OF SOURCE AND TARGETS if is_source is True: info = specs[St.source] load = "_{}_1".format(specs[St.lens_name]) links = "?resource ?singPre ?target ." else: info = specs[St.target] load = "_{}_2".format(specs[St.lens_name]) links = "?source ?singPre ?resource ." # REPLACE RDF TYPE "rdf:type" IN CASE ANOTHER TYPE IS PROVIDED if St.rdf_predicate in info and info[St.rdf_predicate] is not None: rdf_pred = info[St.rdf_predicate] \ if Ls.nt_format(info[St.rdf_predicate]) else "<{}>".format(info[St.rdf_predicate]) else: rdf_pred = "a" # FORMATTING THE LONGITUDE PROPERTY longitude = info[St.longitude] \ if Ls.nt_format(info[St.longitude]) else "<{}>".format(info[St.longitude]) # FORMATTING THE LATITUDE PROPERTY latitude = info[St.latitude] \ if Ls.nt_format(info[St.latitude]) else "<{}>".format(info[St.latitude]) # EXTRACTING THE RESOURCE GRAPH URI LOCAL NAME # name = info[St.graph_name] # EXTRACTING THE RESOURCE GRAPH URI uri = info[St.graph] # ADD THE REDUCER IF SET # if St.reducer not in info: # reducer_comment = "#" # reducer = "" # else: # reducer_comment = "" # reducer = info[St.reducer] if is_source is True: message = """###################################################################### ### INSERTING DATA FROM THE SOURCE ######################################################################""" else: message = """###################################################################### ### INSERTING MESSAGE FROM THE TARGET ######################################################################""" query = """ {5} PREFIX geof: <http://www.opengis.net/def/function/geosparql/> PREFIX wgs: <http://www.w3.org/2003/01/geo/wgs84_pos#> INSERT {{ GRAPH <{0}load{1}> {{ ?resource wgs:long ?longitude . ?resource wgs:lat ?latitude . }} }} WHERE {{ GRAPH <{8}> {{ {9} }} GRAPH <{2}> {{ ### LOCATION COORDINATES ?resource {6} <{7}> . ?resource {3} ?long . ?resource {4} ?lat . ### MAKING SURE THE COORDINATES ARE WELL FORMATTED BIND( STRDT(REPLACE(STR(?long), ",", "."), xsd:float) as ?longitude ) BIND( STRDT(REPLACE(STR(?lat), ",", "."), xsd:float) as ?latitude ) ### MAKING SURE THE COORDINATES AT DIGITS AND NOT LITERALS Filter (?longitude >= 0 || ?longitude <= 0 ) Filter (?latitude >= 0 || ?latitude <= 0 ) ### GENERATE A LOCATION URI BIND( replace("http://risis.eu/#","#", STRAFTER(str(UUID()),"uuid:")) as ?name ) BIND(iri(?name) as ?location) }} }} """.format( # 0 1 2 3 4 5 6 7 Ns.tmpgraph, load, uri, longitude, latitude, message, rdf_pred, info[St.entity_datatype], # 8 9 specs[St.refined], links) # print query return query