def refine_metadata(specs): # GENERATE GENERIC METADATA metadata = Gn.linkset_refined_metadata(specs) if int(specs[St.triples]) > 0: # print metadata is_inserted = Qry.boolean_endpoint_response(metadata["query"]) print ">>> THE METADATA IS SUCCESSFULLY INSERTED:", is_inserted # GENERATE LINKSET CONSTRUCT QUERY construct_query = "\n{}\n{}\n{}\n{}\n{}\n".format( "PREFIX predicate: <{}>".format(Ns.alivocab), "PREFIX src{}: <{}>".format(specs[St.source][St.graph_name], specs[St.source][St.graph_ns]), "PREFIX trg{}: <{}>".format(specs[St.target][St.graph_name], specs[St.target][St.graph_ns]), "construct { ?x ?y ?z }", "where {{ graph <{}> {{ ?x ?y ?z }} }}".format( specs[St.refined]), ) # GENERATE LINKSET SINGLETON METADATA QUERY singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format( "PREFIX singMetadata: <{}>".format(Ns.singletons), "PREFIX predicate: <{}>".format(Ns.alivocab), "PREFIX prov: <{}>".format(Ns.prov), "PREFIX rdf: <{}>".format(Ns.rdf), "construct { ?x ?y ?z }", "where {{ graph <{}> {{ ?x ?y ?z }} }}".format( specs[St.singleton]), ) # GET THE CORRESPONDENCES INSERTED USING A THE CONSTRUCT QUERY singleton_construct = Qry.endpointconstruct(singleton_metadata_query) if singleton_construct is not None: singleton_construct = \ singleton_construct.replace('{', "singMetadata:{}\n{{".format(specs[St.refined_name]), 1) # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY construct_response = Qry.endpointconstruct(construct_query) if construct_response is not None: construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.refined]), 1) # WRITE TO FILE print "\t>>> WRITING THE METADATA YO FILE TO FILE" write_to_file(graph_name=specs[St.refined_name], metadata=metadata["query"].replace("INSERT DATA", ""), correspondences=construct_response, singletons=singleton_construct, directory=DIRECTORY) return metadata["message"]
def expand_approx(specs, theta, stop_words_string, stop_symbols_string, linkset2expand, reorder=True): data = None inserted_1 = 0 inserted_2 = 0 total = 0 count= 0 abort = False for is_source in [True, False]: count += 1 print Ut.headings("********* PASS {} *********").format(count) # if is_source is False: # specs[St.corr_reducer] = data[St.result] # print data[St.result] data = prefixed_inverted_index( specs, theta=theta, reorder=reorder, stop_words_string=stop_words_string, stop_symbols_string=stop_symbols_string, expands=True, is_source=is_source, linkset2expand=linkset2expand, check_file=False) if count == 1: inserted_1 += data['inserted'] total += inserted_1 else: inserted_2 += data['inserted'] total += inserted_2 if data[St.message].__contains__('ALREADY EXISTS'): abort = True print "\n>>> THE PROCESS IS BEING ABORTED AS THE FIRST " \ "PASS REVEALS THE EXISTENCE OF AN EXPANSION OF THE GRAPH." break if abort is False: # REMOVE DUPLICATES print "REMOVING REPETITION" if data is not None and data[St.result] is not None: print "\t", Qry.remove_repetition_same_direction(data[St.result]) # PRINT THE FINAL TRIPLE COUNT final_inserted = Qry.get_triples_count(data[St.result]) if final_inserted is None: final_inserted = 0 else: final_inserted = int(final_inserted) print "\nOVERALL STATS:\n\tCORRESPONDENCES DISCOVERED AT PASS 1 : {}\n\tCORRESPONDENCES DISCOVERED AT PASS 2 : {}".format( inserted_1, inserted_2) print "\tOVERALL CORRESPONDENCES DISCOVERED : {}".format(total) print "\tTOTAL REPEATED CORRESPONDENCES REMOVED : {}".format(total - final_inserted) print "\tTOTAL CORRESPONDENCES INSERTED : {}".format(final_inserted) # print data return data
def cluster_2_linkset_metadata(specs): # METADATA # A TARGET COMBINES A DATATYPE AND A LIST OF PROPERTIES alignment_targets = target_datatype_properties(specs[St.targets], "alignmentTarget", specs[St.linkset_name]) query = """ # CREATION OF A LINKSET OF MIXED-RESOURCES PREFIX ll: <{0}> PREFIX void: <{1}> PREFIX rdfs: <{2}> PREFIX bdb: <{3}> PREFIX prov: <{4}> PREFIX singleton: <{5}> prefix linkset: <{6}> PREFIX llTarget: <{7}> prefix stardog: <tag:stardog:api:context:> INSERT {{ # GENERIC METADATA linkset:{8} rdfs:label "{8}" ; a void:Linkset ; ll:alignsMechanism <{9}exact> . {10} }} WHERE {{ {11} }} """.format( Ns.alivocab, Ns.void, Ns.rdfs, Ns.bdb, Ns.prov, Ns.singletons, Ns.linkset, Ns.alignmentTarget, # 8 9 10 11 specs[St.linkset_name], Ns.mechanism, alignment_targets["list"], alignment_targets["binds"]) specs["metadata"] = query Qry.boolean_endpoint_response(query)
def main_alignment(alignment): # **************************************************************************** # GIVEN AN ALIGNMENT, RETURN THE MAIN ONE # **************************************************************************** try: # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query_search = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query_search) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment except ValueError: traceback.print_exc() return alignment
def register_evolution(research_question_uri, alignment_uri, evolution_str): if alignment_uri.__contains__("<<"): alignment_uri = str(alignment_uri).replace("<<", "<").replace(">>", ">") bind = "BIND(iri(\"{}\") AS ?LINK)".format(alignment_uri) query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> INSERT DATA {{ {0} GRAPH <{1}> {{ ?LINK alivocab:evolution ""\"{2}\""" . }} }} """.format(bind, research_question_uri, evolution_str) else: query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> INSERT DATA {{ GRAPH <{0}> {{ <{1}> alivocab:evolution ""\"{2}\""" . }} }} """.format(research_question_uri, alignment_uri, evolution_str) # print query registered = Qry.boolean_endpoint_response(query) print "\t>>> IS EVOLUTION REGISTERED FOR {}?: {}".format( alignment_uri, registered)
def export_flat_alignment_service(alignment): alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{0}> PREFIX linkset: <{1}> PREFIX lens: <{2}> PREFIX singletons: <{3}> CONSTRUCT {{ ?srcCorr ll:mySameAs ?trgCorr . ?trgCorr ll:mySameAs ?srcCorr . }} WHERE {{ BIND( {4} as ?alignment ) # THE ALIGNMENT GRAPH WITH EXPLICIT SYMMETRY GRAPH ?alignment {{ ?srcCorr ?singleton ?trgCorr . }} }} ; CONSTRUCT {{ ?alignment ?pred ?obj . ?obj ?predicate ?object . }} WHERE {{ # THE METADATA BIND( {4} as ?alignment ) ?alignment ?pred ?obj . OPTIONAL {{ ?obj ?predicate ?object . }} }} """.format( Ns.alivocab, Ns.linkset, Ns.lens, Ns.singletons, alignment, ) print query exit(0) # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query) # REMOVE EMPTY LINES triples = len(regex.findall('ll:mySameAs', alignment_construct)) alignment_construct = "\n".join( [line for line in alignment_construct.splitlines() if line.strip()]) result = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format( triples, alignment) + alignment_construct message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) return {'result': result, 'message': message}
def check_constraint(): text = constraint_text.lower() text = text.split(",") # CONSTRAINT BUILDER c_builder = Buffer.StringIO() if constraint_targets is not None: for dictionary in constraint_targets: graph = dictionary[St.graph] data_list = dictionary[St.data] properties = data_list[0][St.properties] prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0]) # WRITING THE CONSTRAINT ON THE GRAPH graph_q = """ {{ GRAPH <{0}> {{ ?lookup {1} ?constraint . }} }} """.format(graph, prop) c_builder.write(graph_q) if len(c_builder.getvalue()) == 0 else \ c_builder.write("UNION {}".format(graph_q)) # WRITING THE FILTER if len(c_builder.getvalue()) > 0: for i in range(0, len(text)): if i == 0 : c_builder.write(""" FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) else: c_builder.write(""" || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) c_builder.write(")") # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue()) # print query response = Qry.sparql_xml_to_matrix(query) if response[St.result] is None: return False return True
def linkset_stats(linkset): query = """ PREFIX void: <http://rdfs.org/ns/void#> PREFIX bdb: <http://vocabularies.bridgedb.org/ops#> PREFIX ll: <http://risis.eu/alignment/predicate/> SELECT DISTINCT ?dataset ?datatype ?alignsMechanism ?total (COUNT (DISTINCT ?RESOURCE) as ?subtotal) (ROUND((COUNT(DISTINCT ?RESOURCE) / ?total)*10000) /100 as ?percentage) {{ <{0}> ll:alignsMechanism ?alignsMechanism . {{ <{0}> bdb:subjectsDatatype ?datatype ; void:subjectsTarget ?dataset . graph <{0}> {{ ?RESOURCE ?p ?o . }} }} UNION {{ <{0}> bdb:objectsDatatype ?datatype ; void:objectsTarget ?dataset . graph <{0}> {{ ?o ?p ?RESOURCE . }} }} {{ SELECT (COUNT(DISTINCT ?RESOURCE) as ?total) ?dataset ?datatype {{ graph ?dataset {{ ?RESOURCE a ?datatype . }} }} GROUP BY ?dataset ?datatype }} }} GROUP BY ?dataset ?datatype ?total ?alignsMechanism """.format(linkset) Qry.display_result(query=query, spacing=60, is_activated=True)
def properties(graph, datatype=None): comment = "# " if datatype is None else "" datatype = datatype if Ut.is_nt_format(datatype) is True else "<{}>".format(datatype) graph = graph if Ut.is_nt_format(graph) is True else "<{}>".format(graph) properties = """ # <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> SELECT DISTINCT ?predicate WHERE {{ GRAPH {} {{ {}?subj {} ?type . ?subj ?predicate ?obj . }} }} """.format(graph, comment, datatype) print properties Qr.display_result(query=properties, spacing=50, limit=0, is_activated=True)
def diff_meta(specs): """ :param specs: is of type dictionary. For this, it needs the following keys: lens_name: the name of this lens lens: the URI of the lens about to be created lens_target_triples: predicate object for each graph directly involved in the lens triples: The number of triples in this graph expectedTriples: Because of possible triple removal, this provides the sum of all correspondences from all direct target graphs removedDuplicates: The number of removed triples in case of duplicates insert_query: the insert query that let to the creation of the current lens. :return: """ specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) metadata = """ ################################################################## ### METADATA ### for the lens: {0} ################################################################## PREFIX rdfs: <{1}> PREFIX alivocab: <{2}> PREFIX void: <{3}> PREFIX bdb: <{4}> PREFIX lensOp: <{5}> PREFIX specific: <{12}> INSERT DATA {{ ### RESOURCE <{0}> a bdb:Lens ; rdfs:label "{10}" ; alivocab:operator lensOp:difference ; void:triples {6} ; void:subjectsTarget <{7}> ; void:objectsTarget <{8}> ; alivocab:singletonGraph specific:{10} ; bdb:assertionMethod <{9}{10}> . ### ASSERTION METHOD" <{9}{10}> alivocab:sparql \"\"\"{11}\"\"\" . }}""".format(specs[St.lens], Ns.rdfs, Ns.alivocab, Ns.void, Ns.bdb, Ns.lensOp, specs[St.triples], specs[St.subjectsTarget], specs[St.objectsTarget], Ns.method, specs[St.lens_name], specs[St.insert_query], Ns.singletons) # print metadata return metadata
def register_dataset_mapping(question_uri, mapping, activated=True): if activated: print "\nREGISTERING A [DATASET-MAPPING]" \ "\n======================================================" \ "========================================================" ds_mapping_query = ds_mapping(question_uri, mapping) inserted = Qry.boolean_endpoint_response(ds_mapping_query) message = "THE DATASET MAPPING WAS SUCCESSFULLY INSERTED." if inserted \ else "DUE TO A SYSTEM FAILURE, THE MAPPING COULD NOT BE INSERTED." print message return {St.message: message, St.result: message}
def linkset_wasderivedfrom(refined_linkset_uri): query = """ select * {{ <{}> <http://www.w3.org/ns/prov#wasDerivedFrom> ?wasDerivedFrom . }} """.format(refined_linkset_uri) # print query dictionary_result = Qry.sparql_xml_to_matrix(query) # print dictionary_result # print dictionary_result if dictionary_result: if dictionary_result[St.result]: return dictionary_result[St.result][1][0] return None
def added(early_version, late_version, stat=False, display=True, activated=False): if activated is False: print "\nTHE FUNCTION [added] IS NOT ACTIVATED" return {St.subject: None, St.predicate: None, St.triples: None} if stat is False: subj_added = subject(late_version, early_version, count=stat) prop_added = predicate(late_version, early_version, count=stat) # RESPONSE FOR TRIPLES ADDED resp_subj_added = Qr.sparql_xml_to_matrix(subj_added) resp_prop_added = Qr.sparql_xml_to_matrix(prop_added) status = (resp_subj_added[St.result] is not None and len(resp_subj_added[St.result]) > 1) or \ (resp_prop_added[St.result] is not None and len(resp_prop_added[St.result]) > 1) if display is True: # DISPLAY THE RESULTS FOR SUBJECT ADDED print "\n>>> DISPLAY THE RESULTS FOR SUBJECT ADDED" Qr.display_matrix(resp_subj_added, limit=10, is_activated=True) # DISPLAY THE RESULTS FOR PREDICATE ADDED print "\n>>> DISPLAY THE RESULTS FOR PREDICATE ADDED" Qr.display_matrix(resp_prop_added, limit=10, is_activated=True) return {"status": status, St.subject: resp_subj_added[St.result], St.predicate: resp_prop_added[St.result]} else: subj_added = subject(late_version, early_version, count=stat) prop_added = predicate(late_version, early_version, count=stat) resp_subj_added = Qr.sparql_xml_to_matrix(subj_added) resp_prop_added = Qr.sparql_xml_to_matrix(prop_added) status = (resp_subj_added[St.result] is not None and int(resp_subj_added[St.result][1][0]) > 0)\ or (resp_prop_added[St.result] is not None and int(resp_prop_added[St.result][1][0]) > 0) return {"status": status, St.subject: resp_subj_added[St.result][1][0], St.predicate: resp_prop_added[St.result][1][0]}
def main_alignment(alignment): # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment
def export_flat_alignment_and_metadata(alignment): flat = export_flat_alignment(alignment) alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{0}> PREFIX linkset: <{1}> PREFIX lens: <{2}> PREFIX singletons: <{3}> CONSTRUCT {{ ?alignment ?pred ?obj . ?obj ?predicate ?object . }} WHERE {{ BIND ({4} AS ?alignment) # THE METADATA ?alignment ?pred ?obj . OPTIONAL {{ ?obj ?predicate ?object . }} }} #LIMIT 10 """.format(Ns.alivocab, Ns.linkset, Ns.lens, Ns.singletons, alignment) # print query # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query, clean=False) # REMOVE EMPTY LINES triples = flat["triples"] # triples = len(re.findall('ll:mySameAs', alignment_construct)) alignment_construct = "\n".join([line for line in alignment_construct.splitlines() if line.strip()]) + "\n\n" + \ flat['result'] result = "### GENERIC METADATA FOR \n### LINKSET: {}\n\n{}".format( alignment, alignment_construct) message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) print result return {'result': result, 'message': message}
def get_corr_reducer(graph): query = """ SELECT ?uri1 ?uri2 {{ GRAPH <{}> {{ ?uri1 ?p ?uri2 . }} }}""".format(graph) alignment = Qry.sparql_xml_to_matrix(query) table_matrix = alignment[St.result] reducer_dict = {} if len(table_matrix) > 0: for row in table_matrix[1:]: src_uri = row[0].strip() trg_uri = row[1].strip() if len(row) == 2 and (src_uri, trg_uri) not in reducer_dict: reducer_dict[(src_uri, trg_uri)] = 1 return reducer_dict
def export_flat_alignment(alignment): print Ut.headings("EXPORTING THE ALIGNMENT WITH NO METADATA") print "Export for: {}".format(alignment) alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{}> CONSTRUCT {{ ?x ll:mySameAs ?z }} WHERE {{ GRAPH {} {{ ?x ?y ?z }} }} order by ?x """.format(Ns.alivocab, alignment) # print query # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query) # REMOVE EMPTY LINES # COMMA IS COUNTED WHENEVER THERE ARE MORE OBJECTS FOR THE SUBJECT triples = len(regex.findall('ll:mySameAs', alignment_construct)) + len( regex.findall(',', alignment_construct)) alignment_construct = "\n".join( [line for line in alignment_construct.splitlines() if line.strip()]) alignment_construct = alignment_construct.replace( "{", "{}\n{{".format(alignment)) # RESULTS result = "### TRIPLE COUNT: {0}\n### LINKSET: {1}\n".format( triples, alignment) + alignment_construct message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) return {'result': result, 'message': message, "triples": triples}
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET. THE REDUCER OR (DATASET REDUCER) HELPS ELIMINATING # THE COMPUTATION OF SIMILARITY FOR INSTANCES THAT WHERE ALREADY MATCHED print "\nLOADING: {} {}".format(dataset_specs[St.graph], dataset_specs[St.entity_datatype]) if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format(dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query if table_matrix[St.result]: print "\tINPUT SIZE: {}".format(str(len(table_matrix[St.result]) - 1)) return table_matrix[St.result]
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format( dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query return table_matrix[St.result]
def linkset_evolution_composition(alignment_mapping): question_uri = alignment_mapping[St.researchQ_URI] linkset_uri = alignment_mapping[ St.refined] if St.refined in alignment_mapping else alignment_mapping[ St.linkset] # 1.1 GET THE LINKSET ALIGNMENT alignment_query = PREFIX + """ construct {{ <{1}> a <http://risis.eu/class/AlignmentMapping> ; alivocab:alignsSubjects ?srcAligns ; alivocab:alignsObjects ?trgAligns ; alivocab:alignsMechanism ?mechanism . }} where {{ #BIND(iri(replace('http://risis.eu/activity/idea_algmt_#','#',SUBSTR(str(uuid()), 40))) as ?alignmentMapping) <{1}> alivocab:alignsSubjects ?alignsSubjects ; alivocab:alignsObjects ?alignsObjects ; alivocab:alignsMechanism ?alignsMechanism . bind( str( ?alignsSubjects) as ?srcAligns ) bind( str( ?alignsObjects ) as ?trgAligns ) bind( str( ?alignsMechanism) as ?mechanism ) }} """.format(question_uri, linkset_uri) construct = Qry.endpointconstruct(alignment_query) # print construct composition = re.findall('{.*a <.*?> ;(.*)}', construct, re.S) if composition: return composition[0] return None
def register_lens(specs, is_created=True): # inverse = "" if is_created is True: created = "alivocab:created" inverse = "prov:used" print "REGISTERING [{}] AS CREATED".format(specs[St.lens]) else: created = "prov:used\t\t" inverse = "alivocab:created" print "REGISTERING [{}] AS IMPORTED".format(specs[St.lens]) query = PREFIX + """ INSERT {{ GRAPH <{0}> {{ <{0}> {1} <{2}> . <{2}> a bdb:Lens . }} }} WHERE {{ GRAPH <{0}> {{ FILTER NOT EXISTS {{ <{0}> {3} <{2}> . }} }} }}""".format(specs[St.researchQ_URI], created, specs[St.lens], inverse) # print query registered = Qry.boolean_endpoint_response(query) print "\t>>> IS THE LENS REGISTERED?:", registered
def geo_match(specs): # geo_query(ls_specs_1, True) # geo_query(ls_specs_1, False) # geo_match_query(ls_specs_1) drop_1 = """ PREFIX tmp: <{0}> DROP SILENT GRAPH tmp:load_{1}_1 ; drop silent graph tmp:load_{1}_2 """.format(Ns.tmpgraph, specs[St.lens_name], Ns.lens, Ns.singletons) drop_2 = """ PREFIX lens: <{0}> PREFIX singletons: <{1}> drop silent graph lens:{2} ; drop silent graph singletons:{2} """.format(Ns.lens, Ns.singletons, specs[St.lens_name]) print "\n\t4.1 >>> DROPPING GRAPH LOAD_1 & LOAD_2 IF THEY EXIST" # print drop_1 print "\t", Qry.boolean_endpoint_response(drop_1) # print drop_2 print "\t", Qry.boolean_endpoint_response(drop_2) print "\n\t4.2 >>> LOADING SOURCE INTO GRAPH LOAD-1" # print geo_load_query(specs, True) print "\t", Qry.boolean_endpoint_response(geo_load_query(specs, True)) print "\n\t4.3 >>> LOADING SOURCE INTO GRAPH LOAD-2" # print geo_load_query(specs, False) print "\t", Qry.boolean_endpoint_response(geo_load_query(specs, False)) print "\n\t4.4 >>> LOOKING FOR GEO-SIM BETWEEN SOURCE AND TARGET" print geo_match_query(specs) print "\t", Qry.boolean_endpoint_response(geo_match_query(specs)) print "\n\t4.5 >>> DROPPING GRAPH LOAD_1 & LOAD_2" print "\t", Qry.boolean_endpoint_response(drop_1)
def register_research_question(question): print "REGISTERING A RESEARCH QUESTION." \ "\n======================================================" \ "========================================================" if True: # CHECK WHETHER THE RESEARCH QUESTION ALREADY EXISTS question = to_bytes(to_unicode(question, "utf-8")) existence_query = check_rq_existence(question) check = Qry.boolean_endpoint_response(existence_query) # LOOK FOR A RESEARCH QUESTION OF THE SAME NAMES GRAPH find_query = find_rq(question) # AN INTERNAL PROBLEM OCCURRED if check is None: return check # THE RESEARCH QUESTION WAS ALREADY REGISTERED elif check == "true": find = Qry.sparql_xml_to_matrix(find_query) # print find if find: if find[St.result]: message = MESSAGE_1.replace("@", find[St.result][1][0]) print message return { St.message: message.replace("@", "<br/>"), St.result: find[St.result][1][0] } return find else: return find # REGISTERING YOUR RESEARCH QUESTION else: print "REGISTERING THE RESEARCH QUESTION" ins_rq = research_question(question) # print ins_rq inserted = Qry.boolean_endpoint_response(ins_rq) print "INSERTED RESULT:", inserted # THE REGISTRATION WAS NOT SUCCESSFUL if inserted is None: print "THE RESEARCH QUESTION WAS REGISTERED" print MESSAGE_3 # THE REGISTRATION WAS SUCCESSFUL. RETRIEVE THE URI if inserted == "true" or inserted == STARDOG_BOOLEAN_BUG_MESSAGE: print "THE RESEARCH QUESTION IS REGISTERED" find = Qry.sparql_xml_to_matrix(find_query) if find: if find[St.result]: message = MESSAGE_2.replace("@", find[St.result][1][0]) print message return { St.message: message.replace("@", "<br/>"), St.result: find[St.result][1][0] } return { St.message: MESSAGE_4.replace("@", "<br/>"), St.result: None } else: return find print { St.message: MESSAGE_3.replace("@", "<br/>"), St.result: None }
def linkset_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n alivocab:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS src_cross_check = Ls.format_aligns( source[St.crossCheck]) if St.crossCheck in source else None trg_cross_check = Ls.format_aligns( target[St.crossCheck]) if St.crossCheck in target else None # CROSS CHECK FOR THE WHERE CLAUSE cross_check_where = '' cross_check_where += "\n BIND(iri({}) AS ?src_crossCheck)".format( src_cross_check) if src_cross_check is not None else '' cross_check_where += "\n BIND(iri({}) AS ?trg_crossCheck)".format( trg_cross_check) if trg_cross_check is not None else '' # CROSS CHECK FOR THE INSERT CLAUSE cross_check_insert = '' cross_check_insert += "\n alivocab:crossCheckSubject ?src_crossCheck ;" \ if src_cross_check is not None else '' cross_check_insert += "\n alivocab:crossCheckObject ?trg_crossCheck ;" \ if trg_cross_check is not None else '' # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.linkset_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "intermediate": specs[ St.link_name] = "Exact String Similarity via intermediate dataset" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "The method MATCH VIA INTERMEDIATE DATASET is used to align the" \ " source and the target by using properties that present different " \ "descriptions of a same entity, such as country name and country code. " \ "This is possible by providing an intermediate dataset that binds the " \ "two alternative descriptions to the very same identifier." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are the same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit_value]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.linkset]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.linkset]), " rdfs:label \"{}\" ; ".format(specs[St.linkset_name]), " a void:Linkset ;", " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;{}".format(cross_check_insert), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns){}".format(trg_aligns, cross_check_where), "}") # print query if display is True: print query return query
def lens_refine_geo_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n ll:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n ll:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n ll:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n ll:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n ll:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_cross_check = Ls.format_aligns(source[St.crossCheck]) src_long = Ls.format_aligns(source[St.longitude]) src_lat = Ls.format_aligns(source[St.latitude]) trg_cross_check = Ls.format_aligns(target[St.crossCheck]) trg_long = Ls.format_aligns(target[St.longitude]) trg_lat = Ls.format_aligns(target[St.latitude]) # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.lens_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "nearbyGeoSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.lens_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.lens_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit]) specs[St.lens_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.lens]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX ll: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.lens]), " rdfs:label \"{}\" ; ".format(specs[St.lens_name]), " a bdb:Lens ;", " void:triples {} ;".format(specs[St.triples]), " ll:sameAsCount {} ;".format(specs[St.sameAsCount]), " ll:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " ll:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " ll:crossCheckSubject ?src_crossCheck ;", " ll:crossCheckObject ?trg_crossCheck ;", " ll:unit <{}> ;".format(specs[St.unit]), " ll:unitValue {} ;".format(specs[St.unit_value]), " ll:alignsSubjects ( ?src_long ?src_lat ) ;", " ll:alignsObjects ( ?trg_long ?trg_lat ) ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.lens_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " ll:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_crossCheck)".format(src_cross_check), " BIND(iri({}) AS ?trg_crossCheck)".format(trg_cross_check), " BIND(iri({}) AS ?src_long)".format(src_long), " BIND(iri({}) AS ?src_lat)".format(src_lat), " BIND(iri({}) AS ?trg_long)".format(trg_long), " BIND(iri({}) AS ?trg_lat)".format(trg_lat), "}") # print query if display is True: print query return query
def linkset_refined_metadata(specs, display=False): # CONDITIONAL METADATA TO APPEND TO THE REFINED LINKSET extra = "" if St.extended_graph in specs[St.source] and len( specs[St.source][St.extended_graph]) > 0: extra += "\n alivocab:subjectsExtended <{}> ;".format( specs[St.source][St.extended_graph]) if St.extended_graph in specs[St.target] and len( specs[St.target][St.extended_graph]) > 0: extra += "\n alivocab:objectsExtended <{}> ;".format( specs[St.target][St.extended_graph]) if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediatesTarget <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and str(specs[St.delta]) != "0": converted = convert_to_float(str(specs[St.delta])) if math.isnan(converted) is False: extra += "\n alivocab:delta {} ;".format( converted) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.refined_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.refined_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.refined_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxnbrsim": specs[St.link_name] = "Approximate Number Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with an approximate number similarity" \ " in the interval [0 {}].".format(specs[St.delta]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate number similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "intermediate": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This is an implementation of the Exact String Similarity Mechanism over " \ "the aligned predicates." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) # CHECKING WHETHER THE REFINED HAS SOME TRIPLES INSERTED specs[St.triples] = Qry.get_namedgraph_size(specs[St.refined], isdistinct=False) triples = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES IN THE SOURCE".format(triples) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) print "\t>>> {} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples]))) message = "{}<br/>{}<br/>{}".format( "{} CORRESPONDENCES IN THE SOURCE".format(triples), "{} CORRESPONDENCES INSERTED".format(specs[St.triples]), "{} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples])))) if int(specs[St.triples]) > 0: derived_from = specs[St.derivedfrom] if St.derivedfrom in specs else "" intermediate = "\n alivocab:intermediatesTarget <{}> ;".format(specs[St.intermediate_graph]) \ if str(specs[St.mechanism]).lower() == "intermediate" else "" query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.refined]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.refined]), " a void:Linkset ;\n{}".format(derived_from), " rdfs:label \"{}\" ; ".format(specs[St.refined_name]), " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;{}".format(source[St.graph], intermediate), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns)".format(trg_aligns), "}") if display is True: print query print "\t>>> Done generating the metadata" return {"query": query, "message": message} else: return {"query": None, "message": message}
def spa_linkset_subset(specs, activated=False): if activated is True: check = Ls.run_checks(specs, check_type="subset") if check[St.result] != "GOOD TO GO": return check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.linkset_info(specs, specs[St.sameAsCount]) ########################################################## """ 1. GENERATE SUBSET LINKSET INSERT QUERY """ ########################################################## insert_query = spa_subset_insert(specs) # print insert_query ############################################################# """ 2. EXECUTING INSERT SUBSET LINKSET QUERY AT ENDPOINT """ ############################################################# Qry.endpoint(insert_query) ############################################################# """ 3. LINKSET SIZE (NUMBER OF TRIPLES) """ ############################################################# # LINKSET SIZE (NUMBER OF TRIPLES) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset]) print "\t>>> {} TRIPLES INSERTED".format(specs[St.triples]) # NO MATCH FOUND if specs[St.triples] == "0": # logger.warning("WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED.") print "WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED." specs[St.insert_query] = insert_query # metadata = spa_subset_metadata(source, target, data, size) explain_q = "ask {{ GRAPH <{}> {{ ?s <{}> ?o }} }}".format( specs[St.linkset], specs[St.source][St.link_old]) response = Qry.boolean_endpoint_response(explain_q) explain = True if response == "true" else False # print explain if explain is False: # logger.warning("{} DOES NOT EXIST IS {}.".format(data[St.link_old], source[St.graph])) print "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) message = "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) return {St.message: message, St.error_code: 1, St.result: None} # SOME MATCHES WHERE FOUND construct_query = "\n{}\n{}\n{}\n".format( "PREFIX predicate: <{}>".format(Ns.alivocab), "construct { ?x ?y ?z }", "where {{ graph <{}> {{ ?x ?y ?z }} }}".format( specs[St.linkset]), ) # print construct_query construct_response = Qry.endpointconstruct(construct_query) if construct_response is not None: construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.linkset]), 1) # GENERATE LINKSET SINGLETON METADATA QUERY singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format( "PREFIX singMetadata: <{}>".format(Ns.singletons), "PREFIX predicate: <{}>".format(Ns.alivocab), "PREFIX prov: <{}>".format(Ns.prov), "PREFIX rdf: <{}>".format(Ns.rdf), "construct { ?x ?y ?z }", "where {{ graph <{}{}> {{ ?x ?y ?z }} }}".format( Ns.singletons, specs[St.linkset_name]), ) # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY singleton_construct = Qry.endpointconstruct(singleton_metadata_query) if singleton_construct is not None: singleton_construct = singleton_construct.replace( '{', "singMetadata:{}\n{{".format(specs[St.linkset_name]), 1) ############################################################# """ 4. LINKSET METADATA """ ############################################################# # METADATA specs[St.insert_query] = insert_query metadata = Gn.spa_subset_metadata(specs) ############################################################### """ 5. EXECUTING INSERT LINKSET METADATA QUERY AT ENDPOINT """ ############################################################### # EXECUTING METADATA QUERY AT ENDPOINT Qry.endpoint(metadata) print "\t>>> WRITING TO FILE" write_to_file(graph_name=specs[St.linkset_name], metadata=metadata.replace("INSERT DATA", ""), correspondences=construct_response, singletons=singleton_construct, directory=DIRECTORY) print "\tLinkset created as [SUBSET]: ", specs[St.linkset] print "\t*** JOB DONE! ***" message = "The linkset was created as [{}] with {} triples found!".format( specs[St.linkset], specs[St.triples]) return { St.message: message, St.error_code: 0, St.result: specs[St.linkset] }
def specification_2_linkset_subset(specs, activated=False): if activated is True: print Ut.headings("EXECUTING LINKSET SUBSET SPECS...") else: print Ut.headings( "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED") return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None} # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # UPDATE THE SPECS OF SOURCE AND TARGETS update_specification(source) update_specification(target) # GENERATE THE NAME OF THE LINKSET Ls.set_subset_name(specs) # SETTING SOME GENERIC METADATA INFO specs[St.link_name] = "same" specs[St.linkset_name] = specs[St.linkset_name] specs[St.link] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) # COMMENT ON THE LINK PREDICATE specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \ "original <{}> dataset.".format( specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph]) # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \ "used by OrgRef is unknown. Here we assume that it is a curated work " \ "and extracted it as a linkset.", # COMMENT ON THE LINKSET ITSELF specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \ "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \ "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph]) source[St.entity_ns] = str(source[St.entity_datatype]).replace( source[St.entity_name], '') target[St.entity_ns] = str(target[St.entity_datatype]).replace( target[St.entity_name], '') # GENERATE THE LINKSET inserted_linkset = spa_linkset_subset(specs, activated) # print "LINKSET SUBSET RESULT:", inserted_linkset if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): return inserted_linkset if specs[St.triples] > "0": # REGISTER THE ALIGNMENT if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): Urq.register_alignment_mapping(specs, created=False) else: Urq.register_alignment_mapping(specs, created=True) return inserted_linkset else: print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
def linkset_composition(alignment_mapping, request_ask_select_or_insert="ask", get_composition=False): question_uri = alignment_mapping[St.researchQ_URI] linkset_uri = alignment_mapping[ St.refined] if St.refined in alignment_mapping else alignment_mapping[ St.linkset] # 1.1 GET THE LINKSET ALIGNMENT linkset_alignment_query = get_linkset_alignment(question_uri, linkset_uri) # print "ALIGNMENT QUERY:", linkset_alignment_query construct = Qry.endpointconstruct(linkset_alignment_query) # print "CONSTRUCT:", construct composition_init = re.findall('{(.*\)).*<.*> a <.*?> ;.*}', construct, re.S) if len(composition_init) > 0: composition_init = composition_init[0] else: composition_init = "" # print "COMPOSITION BINDINGS:", composition_init composition = re.findall('{.*a <.*?> ;(.*)}', construct, re.S) if get_composition: return composition[0] if len(composition) == 0: # INSPECT linkset_alignment_query = get_linkset_alignment(question_uri, linkset_uri) # print "construct", construct print "\tcomposition:", type(composition), len( composition), composition print "\tTHE LINKSET <{}> DOES NOT EXIST".format(linkset_uri) print linkset_alignment_query return None composition_str = composition[0] composition_str = composition_str.replace("\t\t", "\t\t\t\t") # print "COMPOSITION STRING EXTRACTED:", composition_str ask = "ASK" where = "" if request_ask_select_or_insert.upper() == "SELECT *": ask = "SELECT " elif request_ask_select_or_insert.upper() == "INSERT": ask = "INSERT" where = """ WHERE {{ {} BIND(iri(replace('http://risis.eu/activity/idea_algmt_#','#',SUBSTR(str(uuid()), 40))) as ?alignmentMapping) }}""".format(composition_init) # SO THAT IT IS NOT INSERTED MORE THAN ONES composition_init = "" # 1.2 CHECK WHETHER THE ALIGNMENT WAS REGISTERED query = PREFIX + """ {0} {{ {4} GRAPH <{1}> {{ <{1}> alivocab:created ?alignmentMapping . ?alignmentMapping a <http://risis.eu/class/AlignmentMapping> ;{2}\t\t\t\t###@SLOT\n\t\t}} }} {3}""".format(ask, question_uri, composition_str, where, composition_init) if ask: return query
def register_alignment_mapping(alignment_mapping, created): print "\nREGISTERING AN [ALIGNMENT-MAPPING]" question_uri = alignment_mapping[St.researchQ_URI] # MAKE SURE THE WRITE URI IS USED WHEN REGISTERING A REFINED LINKSET linkset_uri = alignment_mapping[ St.refined] if St.refined in alignment_mapping else alignment_mapping[ St.linkset] print "\tLINKSET TO REGISTER:", linkset_uri # LINKSET EXISTS if linkset_uri: # 1 CHECK WHETHER THE ALIGNMENT WAS REGISTERED ask_query = linkset_composition(alignment_mapping, request_ask_select_or_insert="ask") # print ask_query if ask_query is None: return ask = Qry.boolean_endpoint_response(ask_query) # print ask_query print "\t>>> ASK WHETHER THE [ALIGNMENT] WAS REGISTERED:", ask # 2 THE ALIGNMENT WAS NOT REGISTERED if ask == "false": # REGISTER THE ALIGNMENT-MAPPING insert_alignment_query = linkset_composition( alignment_mapping, request_ask_select_or_insert="insert") insert_alignment = Qry.boolean_endpoint_response( insert_alignment_query) # print insert_alignment_query print "\t>>> IS THE [ALIGNMENT] NOW INSERTED?:", insert_alignment # 2.1 RETRIEVE THE ALIGNMENT-MAPPING URI alignment_uri = None alignment_uri_query = ask_query.replace( "ASK", "SELECT ?alignmentMapping") alignment_uri_resp = Qry.sparql_xml_to_matrix(alignment_uri_query) if alignment_uri_resp: if alignment_uri_resp[St.result]: alignment_uri = alignment_uri_resp[St.result][1][0] print "\t>>> ALIGNMENT REGISTERED AS:", alignment_uri if alignment_uri: # IF WE ARE DEALING WITH A REFINED LINKSET, REGISTER ITS EVOLUTION if St.refined in alignment_mapping: print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET TO\n\t{}".format( alignment_uri) evolution_str = linkset_evolution(question_uri, linkset_uri) register_evolution(question_uri, alignment_uri, evolution_str) # 2.2 ADD THE LINKSET TO THE ALIGNMENT assign_ls_query = linkset_createdorused(question_uri, alignment_uri, alignment_mapping, is_created=created) is_linkset_registered = Qry.boolean_endpoint_response( assign_ls_query) print ">>> IS THE [LINKSET] REGISTERED?:", is_linkset_registered # 3 THE ALIGNMENT WAS REGISTERED else: # CHECK IF THE LINKSET WAS REGISTERED # is_linkset_registered_query = ask_query.replace("> .", "> ;\n\t\t?pred\t<{}> .".format(linkset_uri)) # is_linkset_registered_query = is_linkset_registered_query.replace(">\" .", ">\" ;\n\t\t?pred\t<{}> .".format(linkset_uri)) is_linkset_registered_query = ask_query.replace( "###@SLOT", "\n\t\t\t?alignmentMapping ?pred\t<{}> .".format(linkset_uri)) # print "CHECKING WHETHER THE LINKSET WAS TRULY REGISTERED QUERY:", is_linkset_registered_query is_linkset_registered = Qry.boolean_endpoint_response( is_linkset_registered_query) # print is_linkset_registered_query print "\t>>> ASK WHETHER [LINKSET] WAS REGISTERED?:", is_linkset_registered if is_linkset_registered == "false": # RETRIEVE THE ALIGNMENT-MAPPING URI alignment_uri = None alignment_uri_query = ask_query.replace( "ASK", "SELECT ?alignmentMapping") # print "alignment_uri_query:", alignment_uri_query alignment_uri_resp = Qry.sparql_xml_to_matrix( alignment_uri_query) if alignment_uri_resp: if alignment_uri_resp[St.result]: alignment_uri = alignment_uri_resp[St.result][1][0] if alignment_uri: # IF WE ARE DEALING WITH A REFINED LINKSET, # REGISTER ITS EVOLUTION IF NOT REGISTERED YET if St.refined in alignment_mapping: print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET" evolution_str = linkset_evolution( question_uri, linkset_uri) register_evolution(question_uri, alignment_uri, evolution_str) # 2.3 ADD THE LINKSET TO THE ALIGNMENT assign_ls_query = linkset_createdorused(question_uri, alignment_uri, alignment_mapping, is_created=created) is_linkset_registered = Qry.boolean_endpoint_response( assign_ls_query) print "\t>>> IS LINKSET NOW REGISTERED?:", is_linkset_registered