def diff_meta(specs): """ :param specs: is of type dictionary. For this, it needs the following keys: lens_name: the name of this lens lens: the URI of the lens about to be created lens_target_triples: predicate object for each graph directly involved in the lens triples: The number of triples in this graph expectedTriples: Because of possible triple removal, this provides the sum of all correspondences from all direct target graphs removedDuplicates: The number of removed triples in case of duplicates insert_query: the insert query that let to the creation of the current lens. :return: """ specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) metadata = """ ################################################################## ### METADATA ### for the lens: {0} ################################################################## PREFIX rdfs: <{1}> PREFIX alivocab: <{2}> PREFIX void: <{3}> PREFIX bdb: <{4}> PREFIX lensOp: <{5}> PREFIX specific: <{12}> INSERT DATA {{ ### RESOURCE <{0}> a bdb:Lens ; rdfs:label "{10}" ; alivocab:operator lensOp:difference ; void:triples {6} ; void:subjectsTarget <{7}> ; void:objectsTarget <{8}> ; alivocab:singletonGraph specific:{10} ; bdb:assertionMethod <{9}{10}> . ### ASSERTION METHOD" <{9}{10}> alivocab:sparql \"\"\"{11}\"\"\" . }}""".format(specs[St.lens], Ns.rdfs, Ns.alivocab, Ns.void, Ns.bdb, Ns.lensOp, specs[St.triples], specs[St.subjectsTarget], specs[St.objectsTarget], Ns.method, specs[St.lens_name], specs[St.insert_query], Ns.singletons) # print metadata return metadata
def linkset_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n alivocab:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) # cCROSS CHECK INFORMATION IS USED IN CASE THE ALIGN PROPERTY APPEARS MEANINGLESS src_cross_check = Ls.format_aligns( source[St.crossCheck]) if St.crossCheck in source else None trg_cross_check = Ls.format_aligns( target[St.crossCheck]) if St.crossCheck in target else None # CROSS CHECK FOR THE WHERE CLAUSE cross_check_where = '' cross_check_where += "\n BIND(iri({}) AS ?src_crossCheck)".format( src_cross_check) if src_cross_check is not None else '' cross_check_where += "\n BIND(iri({}) AS ?trg_crossCheck)".format( trg_cross_check) if trg_cross_check is not None else '' # CROSS CHECK FOR THE INSERT CLAUSE cross_check_insert = '' cross_check_insert += "\n alivocab:crossCheckSubject ?src_crossCheck ;" \ if src_cross_check is not None else '' cross_check_insert += "\n alivocab:crossCheckObject ?trg_crossCheck ;" \ if trg_cross_check is not None else '' # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.linkset_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "intermediate": specs[ St.link_name] = "Exact String Similarity via intermediate dataset" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "The method MATCH VIA INTERMEDIATE DATASET is used to align the" \ " source and the target by using properties that present different " \ "descriptions of a same entity, such as country name and country code. " \ "This is possible by providing an intermediate dataset that binds the " \ "two alternative descriptions to the very same identifier." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are the same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit_value]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.linkset]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.linkset]), " rdfs:label \"{}\" ; ".format(specs[St.linkset_name]), " a void:Linkset ;", " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;{}".format(cross_check_insert), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns){}".format(trg_aligns, cross_check_where), "}") # print query if display is True: print query return query
def lens_refine_geo_metadata(specs, display=False): extra = "" if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n ll:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n ll:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n ll:intermediate <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n ll:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and len(str(specs[St.delta])) > 0: extra += "\n ll:delta {} ;".format( str(specs[St.delta])) source = specs[St.source] target = specs[St.target] src_cross_check = Ls.format_aligns(source[St.crossCheck]) src_long = Ls.format_aligns(source[St.longitude]) src_lat = Ls.format_aligns(source[St.latitude]) trg_cross_check = Ls.format_aligns(target[St.crossCheck]) trg_long = Ls.format_aligns(target[St.longitude]) trg_lat = Ls.format_aligns(target[St.latitude]) # specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.lens_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "nearbyGeoSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.lens_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.lens_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "nearbygeosim": specs[St.link_name] = "Near by Geo-Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities near each other by at most {} <{}>.". \ format(specs[St.unit_value], specs[St.unit]) specs[St.lens_comment] = "Linking <{}> to <{}> based on their nearby Geo-Similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) specs[St.triples] = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.lens]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX ll: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.lens]), " rdfs:label \"{}\" ; ".format(specs[St.lens_name]), " a bdb:Lens ;", " void:triples {} ;".format(specs[St.triples]), " ll:sameAsCount {} ;".format(specs[St.sameAsCount]), " ll:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;".format(source[St.graph]), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " ll:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " ll:crossCheckSubject ?src_crossCheck ;", " ll:crossCheckObject ?trg_crossCheck ;", " ll:unit <{}> ;".format(specs[St.unit]), " ll:unitValue {} ;".format(specs[St.unit_value]), " ll:alignsSubjects ( ?src_long ?src_lat ) ;", " ll:alignsObjects ( ?trg_long ?trg_lat ) ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.lens_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " ll:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_crossCheck)".format(src_cross_check), " BIND(iri({}) AS ?trg_crossCheck)".format(trg_cross_check), " BIND(iri({}) AS ?src_long)".format(src_long), " BIND(iri({}) AS ?src_lat)".format(src_lat), " BIND(iri({}) AS ?trg_long)".format(trg_long), " BIND(iri({}) AS ?trg_lat)".format(trg_lat), "}") # print query if display is True: print query return query
def spa_linkset_subset(specs, activated=False): if activated is True: check = Ls.run_checks(specs, check_type="subset") if check[St.result] != "GOOD TO GO": return check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.linkset_info(specs, specs[St.sameAsCount]) ########################################################## """ 1. GENERATE SUBSET LINKSET INSERT QUERY """ ########################################################## insert_query = spa_subset_insert(specs) # print insert_query ############################################################# """ 2. EXECUTING INSERT SUBSET LINKSET QUERY AT ENDPOINT """ ############################################################# Qry.endpoint(insert_query) ############################################################# """ 3. LINKSET SIZE (NUMBER OF TRIPLES) """ ############################################################# # LINKSET SIZE (NUMBER OF TRIPLES) specs[St.triples] = Qry.get_namedgraph_size(specs[St.linkset]) print "\t>>> {} TRIPLES INSERTED".format(specs[St.triples]) # NO MATCH FOUND if specs[St.triples] == "0": # logger.warning("WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED.") print "WE DID NOT INSERT A METADATA AS NO TRIPLE WAS INSERTED." specs[St.insert_query] = insert_query # metadata = spa_subset_metadata(source, target, data, size) explain_q = "ask {{ GRAPH <{}> {{ ?s <{}> ?o }} }}".format( specs[St.linkset], specs[St.source][St.link_old]) response = Qry.boolean_endpoint_response(explain_q) explain = True if response == "true" else False # print explain if explain is False: # logger.warning("{} DOES NOT EXIST IS {}.".format(data[St.link_old], source[St.graph])) print "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) message = "{} DOES NOT EXIST IS {}.".format( specs[St.source][St.link_old], specs[St.source][St.graph]) return {St.message: message, St.error_code: 1, St.result: None} # SOME MATCHES WHERE FOUND construct_query = "\n{}\n{}\n{}\n".format( "PREFIX predicate: <{}>".format(Ns.alivocab), "construct { ?x ?y ?z }", "where {{ graph <{}> {{ ?x ?y ?z }} }}".format( specs[St.linkset]), ) # print construct_query construct_response = Qry.endpointconstruct(construct_query) if construct_response is not None: construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.linkset]), 1) # GENERATE LINKSET SINGLETON METADATA QUERY singleton_metadata_query = "\n{}\n{}\n{}\n{}\n{}\n{}\n\n".format( "PREFIX singMetadata: <{}>".format(Ns.singletons), "PREFIX predicate: <{}>".format(Ns.alivocab), "PREFIX prov: <{}>".format(Ns.prov), "PREFIX rdf: <{}>".format(Ns.rdf), "construct { ?x ?y ?z }", "where {{ graph <{}{}> {{ ?x ?y ?z }} }}".format( Ns.singletons, specs[St.linkset_name]), ) # GET THE SINGLETON METADATA USING THE CONSTRUCT QUERY singleton_construct = Qry.endpointconstruct(singleton_metadata_query) if singleton_construct is not None: singleton_construct = singleton_construct.replace( '{', "singMetadata:{}\n{{".format(specs[St.linkset_name]), 1) ############################################################# """ 4. LINKSET METADATA """ ############################################################# # METADATA specs[St.insert_query] = insert_query metadata = Gn.spa_subset_metadata(specs) ############################################################### """ 5. EXECUTING INSERT LINKSET METADATA QUERY AT ENDPOINT """ ############################################################### # EXECUTING METADATA QUERY AT ENDPOINT Qry.endpoint(metadata) print "\t>>> WRITING TO FILE" write_to_file(graph_name=specs[St.linkset_name], metadata=metadata.replace("INSERT DATA", ""), correspondences=construct_response, singletons=singleton_construct, directory=DIRECTORY) print "\tLinkset created as [SUBSET]: ", specs[St.linkset] print "\t*** JOB DONE! ***" message = "The linkset was created as [{}] with {} triples found!".format( specs[St.linkset], specs[St.triples]) return { St.message: message, St.error_code: 0, St.result: specs[St.linkset] }
def linkset_refined_metadata(specs, display=False): # CONDITIONAL METADATA TO APPEND TO THE REFINED LINKSET extra = "" if St.extended_graph in specs[St.source] and len( specs[St.source][St.extended_graph]) > 0: extra += "\n alivocab:subjectsExtended <{}> ;".format( specs[St.source][St.extended_graph]) if St.extended_graph in specs[St.target] and len( specs[St.target][St.extended_graph]) > 0: extra += "\n alivocab:objectsExtended <{}> ;".format( specs[St.target][St.extended_graph]) if St.reducer in specs[St.source] and len( specs[St.source][St.reducer]) > 0: extra += "\n alivocab:subjectsReducer <{}> ;".format( specs[St.source][St.reducer]) if St.reducer in specs[St.target] and len( specs[St.target][St.reducer]) > 0: extra += "\n alivocab:objectsReducer <{}> ;".format( specs[St.target][St.reducer]) if St.intermediate_graph in specs and len( specs[St.intermediate_graph]) > 0: extra += "\n alivocab:intermediatesTarget <{}> ;".format( specs[St.intermediate_graph]) if St.threshold in specs and len(str(specs[St.threshold])) > 0: extra += "\n alivocab:threshold {} ;".format( str(specs[St.threshold])) if St.delta in specs and str(specs[St.delta]) != "0": converted = convert_to_float(str(specs[St.delta])) if math.isnan(converted) is False: extra += "\n alivocab:delta {} ;".format( converted) source = specs[St.source] target = specs[St.target] src_aligns = Ls.format_aligns(source[St.aligns]) trg_aligns = Ls.format_aligns(target[St.aligns]) specs[St.singleton] = "{}{}".format(Ns.singletons, specs[St.refined_name]) specs[St.link] = "{}{}{}".format(Ns.alivocab, "exactStrSim", specs[St.sameAsCount]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.refined_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.refined_name]) specs[St.link_comment] = "The predicate <{}> used in this linkset is a property that reflects an entity " \ "linking approach based on the <{}{}> mechanism.". \ format(specs[St.link], Ns.mechanism, specs[St.mechanism]) if str(specs[St.mechanism]).lower() == "exactstrsim": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "We assume that entities with the aligned predicates sharing the " \ "exact same content are same. This assumption applies when dealing " \ "with entities such as Organisation." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "identity": specs[St.link_name] = "Same URI" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[ St. justification_comment] = "We assume that entities with the same URI are identical." specs[St.linkset_comment] = "Linking <{}> to <{}> based on their identical URI using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxnbrsim": specs[St.link_name] = "Approximate Number Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with an approximate number similarity" \ " in the interval [0 {}].".format(specs[St.delta]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate number similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "approxstrsim": specs[St.link_name] = "Approximate String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This includes entities with a string similarity in the interval [{} 1[.".\ format(specs[St.threshold]) specs[St.linkset_comment] = "Linking <{}> to <{}> based on their approximate string similarity" \ " using the mechanism: {}". \ format(source[St.graph], target[St.graph], specs[St.mechanism]) elif str(specs[St.mechanism]).lower() == "intermediate": specs[St.link_name] = "Exact String Similarity" specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.mechanism]) specs[St.justification_comment] = "This is an implementation of the Exact String Similarity Mechanism over " \ "the aligned predicates." specs[St.linkset_comment] = "Linking <{}> to <{}> by aligning {} with {} using the mechanism: {}". \ format(source[St.graph], target[St.graph], src_aligns, trg_aligns, specs[St.mechanism]) # CHECKING WHETHER THE REFINED HAS SOME TRIPLES INSERTED specs[St.triples] = Qry.get_namedgraph_size(specs[St.refined], isdistinct=False) triples = Qry.get_namedgraph_size(specs[St.linkset], isdistinct=False) print "\t>>> {} CORRESPONDENCES IN THE SOURCE".format(triples) print "\t>>> {} CORRESPONDENCES INSERTED".format(specs[St.triples]) print "\t>>> {} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples]))) message = "{}<br/>{}<br/>{}".format( "{} CORRESPONDENCES IN THE SOURCE".format(triples), "{} CORRESPONDENCES INSERTED".format(specs[St.triples]), "{} CORRESPONDENCES DO NOT COMPLY WITH THE NEW CONDITION".format( str(int(triples) - int(specs[St.triples])))) if int(specs[St.triples]) > 0: derived_from = specs[St.derivedfrom] if St.derivedfrom in specs else "" intermediate = "\n alivocab:intermediatesTarget <{}> ;".format(specs[St.intermediate_graph]) \ if str(specs[St.mechanism]).lower() == "intermediate" else "" query = "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}\n{}". \ format("##################################################################", "### METADATA FOR {}".format(specs[St.refined]), "##################################################################", "PREFIX prov: <{}>".format(Ns.prov), "PREFIX alivocab: <{}>".format(Ns.alivocab), "PREFIX rdfs: <{}>".format(Ns.rdfs), "PREFIX void: <{}>".format(Ns.void), "PREFIX bdb: <{}>".format(Ns.bdb), "INSERT", "{", " <{}>".format(specs[St.refined]), " a void:Linkset ;\n{}".format(derived_from), " rdfs:label \"{}\" ; ".format(specs[St.refined_name]), " void:triples {} ;".format(specs[St.triples]), " alivocab:sameAsCount {} ;".format(specs[St.sameAsCount]), " alivocab:alignsMechanism <{}{}> ;".format(Ns.mechanism, specs[St.mechanism]), " void:subjectsTarget <{}> ;{}".format(source[St.graph], intermediate), " void:objectsTarget <{}> ;".format(target[St.graph]), " void:linkPredicate <{}> ;".format(specs[St.link]), " bdb:subjectsDatatype <{}> ;".format(source[St.entity_datatype]), " bdb:objectsDatatype <{}> ;".format(target[St.entity_datatype]), " alivocab:singletonGraph <{}> ;".format(specs[St.singleton]), " bdb:assertionMethod <{}> ;".format(specs[St.assertion_method]), " bdb:linksetJustification <{}> ;{}".format(specs[St.justification], extra), " alivocab:alignsSubjects ?src_aligns ;", " alivocab:alignsObjects ?trg_aligns ;", " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.linkset_comment]), "\n ### METADATA ABOUT THE LINKTYPE", " <{}>".format(specs[St.link]), " rdfs:comment \"\"\"{}\"\"\" ;".format(specs[St.link_comment]), " rdfs:label \"{} {}\" ;".format(specs[St.link_name], specs[St.sameAsCount]), " rdfs:subPropertyOf <{}> .".format(specs[St.link_subpropertyof]), "\n ### METADATA ABOUT THE LINKSET JUSTIFICATION", " <{}>".format(specs[St.justification]), " rdfs:comment \"\"\"{}\"\"\" .".format(specs[St.justification_comment]), "\n ### ASSERTION METHOD", " <{}>".format(specs[St.assertion_method]), " alivocab:sparql \"\"\"{}\"\"\" .".format(specs[St.insert_query]), "}", "WHERE", "{", " BIND(iri({}) AS ?src_aligns)".format(src_aligns), " BIND(iri({}) AS ?trg_aligns)".format(trg_aligns), "}") if display is True: print query print "\t>>> Done generating the metadata" return {"query": query, "message": message} else: return {"query": None, "message": message}
def union(specs, activated=False): if activated is False: # logger.warning("THE FUNCTION IS NOT ACTIVATED") print("THE FUNCTION IS NOT ACTIVATED") return { St.message: "THE FUNCTION IS NOT ACTIVATED.", St.error_code: 1, St.result: None } print "\nEXECUTING UNION SPECS" \ "\n======================================================" \ "========================================================" """ THE generate_lens_name FUNCTION RETURNS THE NAME OF THE UNION AND A QUERY THAT ALLOWS TO ASk WHETHER THE LENS TO BE CREATED EXIST BY CHECKING WHETHER THERE EXISTS A LENS WITH THE SAME COMPOSITION IN TERMS GRAPHS USED FOR THE UNION """ # SET THE NAME OF THE UNION-LENS print "1. DATASETS:", len(specs[St.datasets]) for ds in specs[St.datasets]: print "\t- {}".format(ds) info = Lu.generate_lens_name(specs[St.datasets]) specs[St.lens] = "{}{}".format(Ns.lens, info["name"]) print "\n2. LENS: ", info["name"] # CHECK WHETHER THE LENS EXISTS check = run_checks(specs, info["query"]) if check[St.result] != "GOOD TO GO": if check[St.message].__contains__("ALREADY EXISTS"): Urq.register_lens(specs, is_created=False) return check # print "AFTER CHECK" # PREPARATION FOR THE CREATION OF THE LENS specs[St.lens_target_triples] = "" specs[St.expectedTriples] = 0 specs[St.insert_query] = "" lens = specs[St.lens] source = "{}{}".format(Ns.tmpgraph, "load00") message_2 = Ec.ERROR_CODE_8.replace("#", specs[St.lens]) count = -1 insert_ans = False try: # GO THROUGH THE LINKSETS/LENSES IN THE LENS # 1-SUM UP THE EXPECTED NUMBER OF TRIPLES # 2-GENERATE THE TRIPLES REPRESENTATION OF GHE GRAPHS COMPOSING THIS LENS # 3-GENERATE THE INSERT QUERY FOR MOVING BOTH LINKSET AND SINGLETON GRAPHS TO THE UNION GRAPH total_size = 0 # LOAD ALL GRAPHS IN LOAD00 specs[St.insert_query] += "DROP SILENT GRAPH <{}{}> ;\n".format( Ns.tmpgraph, "load00") # ITERATE THROUGH THE PROVIDED GRAPHS for linkset in specs[St.datasets]: # print "TARGET: ", linkset count += 1 # GET THE TOTAL NUMBER OF CORRESPONDENCE TRIPLES INSERTED curr_triples = Qry.get_triples(linkset) # PROBABLY THE LINKSET HAS NO SUCH PROPERTY " void:triples ?triples ." if curr_triples is None: curr_triples = Qry.get_triples_count(linkset) total_size += int(curr_triples) print "{} Contains {} triples".format(linkset, curr_triples) if curr_triples is not None: specs[St.expectedTriples] += int(curr_triples) else: # THE IS A PROBLEM WITH THE GRAPH FOR SEVERAL POSSIBLE REASONS return { St.message: message_2.replace("\n", "<br/>"), St.error_code: 1, St.result: None } # GENERATE TRIPLES OUT OF THE TARGETS specs[ St. lens_target_triples] += "\n\t void:target <{}> ;".format( linkset) # GET THE INSERT QUERY # BOTH THE LINKSET AND THE SINGLETONS ARE MOVED TO A SINGLE GRAPH partial_query = Qry.q_copy_graph(source, source, linkset) if count == 0: specs[St.insert_query] += partial_query else: specs[St.insert_query] += " ;\n{}".format(partial_query) # INTERSECTION MANIPULATION OVER THE UNION (SOURCE) insert_query = union_insert_q(lens, source, specs[St.lens_name]) # print "manipulation:", manipulation specs[St.insert_query] += " ;\n{}".format(insert_query) # GENERATE THE LENS UNION if activated is True: # print specs[St.insert_query] insert_ans = Qry.boolean_endpoint_response(specs[St.insert_query]) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) if specs[St.triples] == "0": message = Ec.ERROR_CODE_9 print message # return None return { St.message: message.replace("\n", "<br/>"), St.error_code: 1, St.result: None } # CHECK WHETHER THE RESULT CONTAINS DUPLICATES contains_duplicated = Qry.contains_duplicates(lens) print "Contains Opposite Direction Duplicated:", contains_duplicated # IF IT DOES, REMOVE THE DUPLICATES if contains_duplicated is True: # logger.warning("THE LENS CONTAINS DUPLICATES.") print "THE LENS CONTAINS DUPLICATES." Qry.remove_duplicates(lens) # logger.warning("THE DUPLICATES ARE NOW REMOVED.") print "THE DUPLICATES ARE NOW REMOVED." print "Number of triples loaded : {}".format( total_size) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) print "\t>>> INSERTED: {}\n\t>>> INSERTED TRIPLES: {}".format( insert_ans, specs[St.triples]) print "Inserted : {}".format(specs[St.triples]) print "Removed : {}".format(total_size - int(specs[St.triples])) # LOAD THE METADATA # NOT GOOD AS THE LENS ALSO HAS A SINGLETON GRAPH # inserted_correspondences = int(Qry.get_union_triples(lens)) inserted_correspondences = int(specs[St.triples]) # print "inserted_correspondences:", inserted_correspondences specs[St.removedDuplicates] = specs[ St.expectedTriples] - inserted_correspondences metadata = Gn.union_meta(specs) # print "METADATA:", metadata meta_ans = Qry.boolean_endpoint_response(metadata) print "\t>>> IS THE METADATA GENERATED AND INSERTED? {}".format( meta_ans) construct_response = Qry.get_constructed_graph(specs[St.lens]) if construct_response is not None: print "\t>>> WRITING TO FILE" construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.lens]), 1) write_to_file(graph_name=specs[St.lens_name], metadata=None, correspondences=construct_response, directory=DIRECTORY) print "\tLens created as : ", specs[St.lens] # REGISTER THE LINKSET Urq.register_lens(specs, is_created=True) # return specs[St.lens] message = "THE LENS WAS CREATED as {}. " \ "With initially {} triples loaded, {} duplicated triples were found and removed.".\ format(specs[St.lens], total_size, total_size - int(specs[St.triples])) print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } except Exception as err: # logger.warning(err) if insert_ans == "true": "DROP THE INSERTED UNION" drop_linkset(lens, activated=True) print "ERROR IN UNION LENS CREATION:", err return {St.message: ERROR_CODE_11, St.error_code: 11, St.result: None}
def lens_transitive(specs, activated=False): # CHECK BOTH DATASETS FOR SAME MECHANISM print "GENERATE THE LENS NAME" Lu.composition_lens_name(specs) print "GET THE SAME AS COUNT" specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.lens_operation]) # print same_as_count # GENERATE THE INSERT QUERY FOR TRANSITIVITY # transitive_analyses = lens_transitive_query(specs) # if transitive_analyses is None: # return # specs[St.insert_query] = transitive_analyses[1] # print insert_query # exit(0) # specs['is_transitive_by'] = transitive_analyses[0] ln = get_uri_local_name(specs[St.lens]) sg = specs[St.subjectsTarget] tg = specs[St.objectsTarget] ssg = "{}{}".format(Ns.singletons, get_uri_local_name(sg)) tsg = "{}{}".format(Ns.singletons, get_uri_local_name(tg)) print "SOURCE: {}".format(sg) print "TARGET: {}".format(tg) print "1. GENERATING THE INSERT QUERY" specs[St.insert_query] = transitive_insert_query(ln, sg, tg, ssg, tsg) if activated is True: # RUN THE QUERY AT THE END POINT print "2. RUNNING THE INSERT QUERY" Qry.boolean_endpoint_response(specs[St.insert_query]) # GET THE SIZE OF THE LENS JUST CREATED ABOVE print "3. ETTING THE SIZE OF THE LENS JUST INSERTED" size = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) # IF ACTIVATED, INSERT THE METADATA if size > 0: # GENERATE THE METADATA ABOUT THE LENS JUST CREATED print "4. SOME {} TRANSITIVE TRIPLES WERE FOUND".format(size) metadata = transitive_metadata(specs, size) # print metadata print "5. INSERTING THE METADATA" Qry.boolean_endpoint_response(metadata) print "6. REGISTER THE LENS" Urq.register_lens(specs, is_created=True) # RUN A CORRESPONDENCE CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC print "7. GENERATE THE CONSTRUCT FOR FILE DUMP" construct_correspondence = Qry.endpointconstruct( Qry.construct_namedgraph(specs[St.lens])) if construct_correspondence is not None: construct_correspondence = construct_correspondence.replace( '{', "<{}>\n{{".format(specs[St.lens]), 1) # RUN A SINGLETON METADATA CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC construct_singletons = Qry.endpointconstruct( Qry.construct_namedgraph("{}{}".format(Ns.singletons, specs[St.lens_name]))) if construct_singletons is not None: construct_singletons = construct_singletons. \ replace('{', "<{}{}>\n{{".format(Ns.singletons, specs[St.lens_name]), 1) # WRITE TO FILE print "WRITING TO FILE" write_to_file(graph_name=ln, metadata=metadata, directory=DIRECTORY, correspondences=construct_correspondence, singletons=construct_singletons) # return specs[St.lens] message = "THE LENS WAS CREATED!<br/>URI = {}".format( specs[St.lens]) print message print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } if activated is False: logger.warning( "THE FUNCTION IS NOT ACTIVATED BUT THE METADATA THAT IS " "SUPPOSED TO BE ENTERED IS WRITEN TO THE CONSOLE.")
def enrich(specs, directory, endpoint): # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME # specs[St.graph] = "http://grid.ac/20170712" print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT" print "GRAPH:", specs[St.graph] print "ENTITY TYPE:", specs[St.entity_datatype] print "LAT PREDICATE:", specs[St.long_predicate] print "LONG PREDICATE:", specs[St.lat_predicate] print "FILE DIRECTORY:", directory name = Ut.get_uri_local_name(specs[St.graph]) print endpoint data_1 = Qry.virtuoso_request( "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]), endpoint) data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"]) if len(data_1) > 0: data_1 = data_1[0].strip() == "true" if data_1 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE data_2 = Qry.virtuoso_request( "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint) data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"]) if len(data_2) > 0: data_2 = data_2[0].strip() == "true" if data_2 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") if data_1 is False or data_2 is False: message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST." return { St.message: message, St.result: 'The dataset {} ' 'cannot be enriched with GADM boundary at the moment.'.format( specs[St.graph]) } total = 0 limit = 20000 date = datetime.date.isoformat(datetime.date.today()).replace('-', '') f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep, name, date) b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep, name, date, Ut.batch_extension()) # MAKE SURE THE FOLDER EXISTS try: if not os.path.exists(directory): os.makedirs(directory) except OSError as err: print "\n\t[utility_LOAD_TRIPLE_STORE:]", err return print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES." count_query = enrich_query(specs, limit=0, offset=0, is_count=True) print count_query count_res = Qry.virtuoso_request(count_query, endpoint) result = count_res['result'] # GET THE TOTAL NUMBER OF TRIPLES if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) # NUMBER OF REQUEST NEEDED iterations = total / limit if total % limit == 0 else total / limit + 1 print "\n2. TOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") print "3. GENERATING THE BATCH FILE TEXT" enriched_graph = "{}_enriched".format(specs[St.graph]) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], enriched_graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() # RUN THE ITERATIONS for i in range(0, iterations): offset = i * 20000 + 1 print "\tROUND: {} OFFSET: {}".format(i + 1, offset) # print "\t\t1. GENERATING THE ENRICHMENT QUERY" virtuoso = enrich_query(specs, limit=limit, offset=offset, is_count=False) # print virtuoso # exit(0) # print Qry.virtuoso(virtuoso)["result"] # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE" writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"]) writer.close() print "\n4. RUNNING THE BATCH FILE" print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." # os.system(b_path) # RUN THE BATCH FILE print "\tFILE: {}".format(f_path) print "\tBATCH: {}\n".format(b_path) os.chmod(b_path, 0o777) Ut.batch_load(b_path) if os.path.exists(b_path) is True: os.remove(b_path) # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER). # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING size = Qry.get_namedgraph_size(enriched_graph) print "JOB DONE...!!!!!!" return { St.message: "The select dataset was enriched with the GADM boundary as {}. " "{} triples were created.".format(enriched_graph, size), St.result: enriched_graph }
def intersecting(specs, activated=False): if activated is False: print("THE FUNCTION [intersecting] IS NOT ACTIVATED") return { St.message: "THE FUNCTION [intersecting] IS NOT ACTIVATED.", St.error_code: 1, St.result: None } print Ut.headings("EXECUTING INTERSECTION SPECS...") # 1. GENERATE THE LENS NAME lens_name = generate_lens_name(specs['datasets'], operator="intersection") specs[St.lens] = "{}{}".format(Ns.lens, lens_name['name']) Ut.update_specification(specs) # ********************************** # 3. GOOD TO GO CHECK # ********************************** query = """ SELECT * {{ <{}> ?predicate ?object . }} """.format(specs[St.lens]) check = Lens_Union.run_checks(specs, query, operator="intersection") # NOT GOOD TO GO, IT ALREADY EXISTS if check[St.message].__contains__("ALREADY EXISTS"): return { St.message: check[St.message], St.error_code: 71, St.result: specs[St.lens] } # ********************************** # GOOD TO GO # ********************************** else: try: specs[St.lens_target_triples] = "" # DOCUMENTING START TIME lens_start = time.time() print "\n4. GENERATE THE INSERT QUERY" specs[St.insert_query] = intersection_extended( specs, lens_name=specs[St.lens_name]) print specs[St.insert_query] print "\n5. >>> LOOKING FOR INTERSECTING LINKS" print "\t", Qry.boolean_endpoint_response(specs[St.insert_query]) print "\n6. EXTRACTING THE NUMBER OF TRIPLES" specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format( Ns.lens, specs[St.lens_name])) lens_end = time.time() diff = lens_end - lens_start print " \n>>> Executed so far in : {:<14}".format( str(datetime.timedelta(seconds=diff))) if int(specs[St.triples]) > 0: for linkset in specs[St.datasets]: specs[St.lens_target_triples] += \ "\n\t void:target <{}> ;".format(linkset) print "\n7. INSERTING THE GENERIC METADATA" metadata = Gn.intersection_meta(specs) # print metadata Qry.boolean_endpoint_response(metadata) # print "\n8. WRITING TO FILE" server_message = "Linksets created as: {}".format( specs[St.lens]) message = "The linkset was created as [{}] with {} triples found!".format( specs[St.lens], specs[St.triples]) print "\n\t", server_message Urq.register_lens(specs, is_created=True) ls_end_2 = time.time() diff = ls_end_2 - lens_end print ">>> Executed in : {:<14}".format( str(datetime.timedelta(seconds=diff))) print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } else: print "The linkset was not generated as no match could be found" print "\t*** JOB DONE! ***" return { St.message: "The linkset was not generated as no match could be found", St.error_code: 4, St.result: None } except Exception as err: traceback.print_exc() return { St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None } # specs = { # 'lens_operation': u'intersection', # 'datasets': [u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_altLabel_P1079405301', # u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_altLabel_P1661430032', # u'http://risis.eu/linkset/grid_20170712_eter_2014_approxStrSim_Organization_label_N1860664105'], # 'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'} # specs_2 = {'lens_operation': u'intersection', # 'datasets': [u'http://risis.eu/lens/union_Grid_20170712_Eter_2014_N291690309', # u'http://risis.eu/lens/union_Orgreg_20170718_Eter_2014_P1061032980', # u'http://risis.eu/lens/union_Orgreg_20170718_Grid_20170712_N1966224323'], # 'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'} # # specs_3 = {'lens_operation': u'intersection', # 'datasets': [ # u'http://risis.eu/linkset/orgreg_20170718_grid_20170712_approxStrSim_University_Entity_current_name_English_N682223883', # u'http://risis.eu/linkset/orgreg_20170718_grid_20170712_approxStrSim_University_Entity_current_name_English_P2117262605', # u'http://risis.eu/lens/union_Grid_20170712_Eter_2014_N291690309', # u'http://risis.eu/lens/union_Orgreg_20170718_Eter_2014_P1061032980', # u'http://risis.eu/lens/union_Orgreg_20170718_Grid_20170712_N1966224323'], # 'researchQ_URI': u'http://risis.eu/activity/idea_67a6ce'} # # # # print intersection_extended(specs_3, "lens_name", display=False) # import Alignments.Manage.AdminGraphs as adm # adm.drop_a_lens("http://risis.eu/lens/intersection_Grid_20170712_Eter_2014_P1326988364", display=True, activated=True) # print intersecting(specs, activated=True)
def refine_lens(specs, activated=False, check_file=False): try: message = Ec.ERROR_CODE_0.replace('\n', "<br/>") if activated is False: print Ut.headings("THE FUNCTION [refine_lens] IS NOT ACTIVATED") return {St.message: message, St.error_code: 4, St.result: None} # 1. UPDATING THE SPECS BY CHANGING LINKSET TO TENS specs[St.refined] = specs['linkset'] specs.pop('linkset') Ut.update_specification(specs) # CHECKING WHETHER THE LENS IS REFINENABLE # Refine.is_refinable(specs[St.refined]) # PRINTING THE SPECIFICATIONS # lensUt.print_specs(specs) # ASSIGN THE SAME AS COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) message = Ec.ERROR_CODE_4.replace('\n', "<br/>") if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # 2. SET THE LENS NAME # ******************************* print "\n2. SET THE LENS NAME" # ******************************* lensUt.lens_refine_name(specs, 'refine') #******************************* # GOOD TO GO CHECK # ******************************* query = """ SELECT * {{ <{}> ?predicate ?object . }} """.format(specs[St.lens]) check = Lens_Union.run_checks(specs, query, operator="refine") # NOT GOOD TO GO, IT ALREADY EXISTS if check[St.message].__contains__("ALREADY EXISTS"): return { St.message: check[St.message], St.error_code: 71, St.result: specs[St.lens] } # ******************************* # GOOD TO GO # ******************************* else: lens_start = time.time() # UPDATE THE SPECIFICATION Ut.update_specification(specs[St.source]) Ut.update_specification(specs[St.target]) # PRINTING THE SPECIFICATIONS lensUt.print_specs(specs) ######################################################################## print """\n4. EXECUTING THE GEO-MATCH """ ######################################################################## geo_match(specs) ######################################################################## print """\n5. EXTRACT THE NUMBER OF TRIPLES """ ######################################################################## specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format( Ns.lens, specs[St.lens_name])) ######################################################################## print """\n6. ASSIGN THE SPARQL INSERT QUERY """ ######################################################################## specs[St.insert_query] = "{} ;\n{};\n{}".format( geo_load_query(specs, True), geo_load_query(specs, False), geo_match_query(specs)) lens_end = time.time() diff = lens_end - lens_start print "\n\t>>> Executed so far in : {:<14}".format( str(datetime.timedelta(seconds=diff))) if int(specs[St.triples]) > 0: ######################################################################## print """\n4. INSERTING THE GENERIC METADATA """ ######################################################################## metadata = Gn.lens_refine_geo_metadata(specs) Qry.boolean_endpoint_response(metadata) ######################################################################## print """\n5. WRITING TO FILE """ ######################################################################## src = [source[St.graph_name], "", source[St.entity_ns]] trg = [target[St.graph_name], "", target[St.entity_ns]] # linkset_path = "D:\datasets\Linksets\ExactName" linkset_path = DIRECTORY writelinkset(src, trg, specs[St.lens_name], linkset_path, metadata, check_file=check_file) server_message = "Linksets created as: {}".format( specs[St.lens]) message = "The linkset was created as [{}] with {} triples found!".format( specs[St.lens], specs[St.triples]) print "\n\t", server_message Urq.register_lens(specs, is_created=True) ls_end_2 = time.time() diff = ls_end_2 - lens_end print ">>> Executed in : {:<14}".format( str(datetime.timedelta(seconds=diff))) print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } else: print "\tThe linkset was not generated as no match could be found" print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 4, St.result: None } except Exception as err: traceback.print_exc() return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None} # print geo_load_query(specs, is_source=True) # print geo_load_query(specs, is_source=False) # geo_match_query(specs) # traceback.print_exception() # import Alignments.Manage.AdminGraphs as adm # adm.drop_a_lens("http://risis.eu/lens/refine_union_Grid_20170712_Eter_2014_N291690309", display=True, activated=True) # refine_lens(specs_example, activated=True, check_file=False) # # adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Eter_2014_P1061032980", display=True, activated=True) # refine_lens(specs_example_2, activated=True, check_file=False) # # adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Grid_20170712_N1966224323", display=True, activated=True) # refine_lens(specs_example_3, activated=True, check_file=False)