def insert_metadatas(self,accessL): """ Insert the metadatas into the parent graph """ self.log.debug('--- insert_metadatas ---') sqb = SparqlQueryBuilder(self.settings, self.session) query_laucher = QueryLauncher(self.settings, self.session) valAcces = 'public' if accessL else 'private' ttl = '<' + self.graph + '> prov:generatedAtTime "' + self.timestamp + '"^^xsd:dateTime .\n' ttl += '<' + self.graph + '> dc:creator "' + self.session['username'] + '" .\n' ttl += '<' + self.graph + '> :accessLevel "' + valAcces + '" .\n' ttl += '<' + self.graph + '> foaf:Group "' + self.session['group'] + '" .\n' ttl += '<' + self.graph + '> prov:wasDerivedFrom "' + self.name + '" .\n' ttl += '<' + self.graph + '> dc:hasVersion "' + get_distribution('Askomics').version + '" .\n' ttl += '<' + self.graph + '> prov:describesService "' + os.uname()[1] + '" .\n' if self.is_defined("askomics.endpoint"): ttl += '<' + self.graph + '> prov:atLocation "' + self.get_param("askomics.endpoint") + '" .\n' else: raise ValueError("askomics.endpoint does not exit.") sparql_header = sqb.header_sparql_config('') query_laucher.insert_data(ttl, self.graph, sparql_header)
def create_user_graph(self): """ Create a subgraph for the user. All his data will be inserted in this subgraph """ query_laucher = QueryLauncher(self.settings, self.session) sqa = SparqlQueryAuth(self.settings, self.session) ttl = '<' + self.settings['askomics.graph'] + ':' + self.username + \ '> rdfg:subGraphOf <' + self.settings['askomics.graph'] + '>' header_ttl = sqa.header_sparql_config(ttl) query_laucher.insert_data(ttl, self.settings["askomics.graph"], header_ttl)
def create_user_graph(self): """ Create a subgraph for the user. All his data will be inserted in this subgraph """ query_laucher = QueryLauncher(self.settings, self.session) sqa = SparqlQueryAuth(self.settings, self.session) ttl = '<' + self.settings['askomics.graph'] + ':' + self.username + \ '> rdfg:subGraphOf <' + self.settings['askomics.graph'] + '>' header_ttl = sqa.header_sparql_config(ttl) query_laucher.insert_data(ttl, self.settings["askomics.graph"], header_ttl)
def get_metadatas(self): """ Create metadatas and insert them into AskOmics main graph. """ self.log.debug("====== INSERT METADATAS ======") sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) ttlMetadatas = "<" + self.metadatas['graphName'] + "> " + "prov:generatedAtTime " + '"' + self.metadatas['loadDate'] + '"^^xsd:dateTime .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:creator " + '"' + self.metadatas['username'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:wasDerivedFrom " + '"' + self.metadatas['fileName'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:hasVersion " + '"' + self.metadatas['version'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:describesService " + '"' + self.metadatas['server'] + '"^^xsd:string .' sparqlHeader = sqb.header_sparql_config("") ql.insert_data(ttlMetadatas, self.get_param("askomics.graph"), sparqlHeader)
def get_metadatas(self): """ Create metadatas and insert them into AskOmics main graph. """ self.log.debug("====== INSERT METADATAS ======") sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) ttlMetadatas = "<" + self.metadatas['graphName'] + "> " + "prov:generatedAtTime " + '"' + self.metadatas['loadDate'] + '"^^xsd:dateTime .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:creator " + '"' + self.metadatas['username'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:wasDerivedFrom " + '"' + self.metadatas['fileName'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "dc:hasVersion " + '"' + self.metadatas['version'] + '"^^xsd:string .\n' ttlMetadatas += "<" + self.metadatas['graphName'] + "> " + "prov:describesService " + '"' + self.metadatas['server'] + '"^^xsd:string .' sparqlHeader = sqb.header_sparql_config("") ql.insert_data(ttlMetadatas, self.get_param("askomics.graph"), sparqlHeader)
def persist_user(self, host_url): """ Persist all user infos in the TS """ query_laucher = QueryLauncher(self.settings, self.session) sqa = SparqlQueryAuth(self.settings, self.session) #check if user is the first. if yes, set him admin if self.get_number_of_users() == 0: admin = 'true' blocked = 'false' self.set_admin(True) self.set_blocked(False) else: admin = 'false' blocked = 'true' self.set_admin(False) self.set_blocked(True) chunk = ':' + self.username + ' rdf:type foaf:Person ;\n' indent = len(self.username) * ' ' + ' ' chunk += indent + 'foaf:name \"' + self.username + '\" ;\n' chunk += indent + ':password \"' + self.sha256_pw + '\" ;\n' chunk += indent + 'foaf:mbox <mailto:' + self.email + '> ;\n' chunk += indent + ':isadmin \"' + admin + '\"^^xsd:boolean ;\n' chunk += indent + ':isblocked \"' + blocked + '\"^^xsd:boolean ;\n' chunk += indent + ':randomsalt \"' + self.randomsalt + '\" .\n' header_ttl = sqa.header_sparql_config(chunk) query_laucher.insert_data(chunk, self.settings["askomics.users_graph"], header_ttl) emails = self.get_admins_emails() # Send a mail to all admins body = 'Hello,\n' body += 'User \'' + self.username + '\' just created an account on Askomics.\n' body += 'Log into the admin interface in order to unblock this user, or contact him ' body += 'at ' + self.email + '.\n\n\n' body += host_url + '\n\n' self.send_mails(host_url, emails, '[AskOmics@' + host_url + '] New account created', body)
def load_data_from_file(self, fp, urlbase): """ Load a locally created ttl file in the triplestore using http (with load_data(url)) or with the filename for Fuseki (with fuseki_load_data(fp.name)). :param fp: a file handle for the file to load :param urlbase:the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http. :return: a dictionnary with information on the success or failure of the operation """ if not fp.closed: fp.flush() # This is required as otherwise, data might not be really written to the file before being sent to triplestore sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) graphName = "askomics:graph:" + self.name + '_' + self.timestamp self.metadatas['graphName'] = graphName ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ." sparqlHeader = sqb.header_sparql_config("") ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader) url = urlbase+"/ttl/"+os.path.basename(fp.name) self.log.debug(url) data = {} try: if self.is_defined("askomics.file_upload_url"): queryResults = ql.upload_data(fp.name, graphName) self.metadatas['server'] = queryResults.headers['Server'] self.metadatas['loadDate'] = self.timestamp else: queryResults = ql.load_data(url, graphName) self.metadatas['server'] = queryResults.info()['server'] self.metadatas['loadDate'] = self.timestamp data['status'] = 'ok' except Exception as e: self._format_exception(e, data=data) finally: if self.settings["askomics.debug"]: data['url'] = url else: os.remove(fp.name) # Everything ok, remove temp file self.get_metadatas() return data
def importMoSate(self,mo,state): ''' Import in the TPS all triplet necessary to defined an askomics module ''' rdf = ":"+self.escape['entity'](mo['module'])+" rdfs:label " + self.escape['text'](mo['module'])+";\n" rdf += " rdfs:comment " + self.escape['text'](mo['comment'])+";\n" rdf += " :module_version " + self.escape['text'](mo['version'])+";\n" rdf += " :module_state " + self.escape['text'](state)+"" if (state == 'ok'): rdf += ";\n :module_graph " + '<'+mo['graph']+'>.\n' else: rdf += ".\n" sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) sh = sqb.header_sparql_config('') ql.insert_data(rdf, self.graph_modules , sh)
def load_data_from_file(self, fp, urlbase): """ Load a locally created ttl file in the triplestore using http (with load_data(url)) or with the filename for Fuseki (with fuseki_load_data(fp.name)). :param fp: a file handle for the file to load :param urlbase:the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http. :return: a dictionnary with information on the success or failure of the operation """ if not fp.closed: fp.flush() # This is required as otherwise, data might not be really written to the file before being sent to triplestore sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) graphName = "urn:sparql:" + self.name + '_' + self.timestamp self.metadatas['graphName'] = graphName ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ." sparqlHeader = sqb.header_sparql_config("") ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader) url = urlbase+"/ttl/"+os.path.basename(fp.name) self.log.debug(url) data = {} try: if self.is_defined("askomics.file_upload_url"): queryResults = ql.upload_data(fp.name, graphName) self.metadatas['server'] = queryResults.headers['Server'] self.metadatas['loadDate'] = self.timestamp else: queryResults = ql.load_data(url, graphName) self.metadatas['server'] = queryResults.info()['server'] self.metadatas['loadDate'] = self.timestamp data['status'] = 'ok' except Exception as e: self._format_exception(e, data=data) finally: if self.settings["askomics.debug"]: data['url'] = url else: os.remove(fp.name) # Everything ok, remove temp file self.get_metadatas() return data
def test_statistics(self): #load files self.it.empty() self.it.load_test2() ql = QueryLauncher(self.settings, self.request.session) queryResults = ql.insert_data(':sujet :predicat :objet .', 'test', 'prefix :<test>') server = queryResults.info()['server'] self.request.json_body = {'namedGraphs': ['test']} self.askview.delete_graph() data = self.askview.statistics() assert data['ntriples'] == 279 assert data['nclasses'] == '6' assert data['nentities'] == '19' assert data['ngraphs'] == '5' assert data['class'] == { 'Personne': { 'count': '7' }, 'Sexe': { 'count': '2' }, 'Instrument': { 'count': '2' } } for key in data['metadata'].keys(): self.assertRegexpMatches( key, r'^urn:sparql:(instrument|enseigne|connait|joue|personne)\.tsv_[0-9]+\.[0-9]+$' ) for key2 in data['metadata'][key]: self.assertRegexpMatches( key2, r'^(version|username|filename|loadDate|server)$') if key2 == 'version': assert data['metadata'][key][key2] == '2.0' elif key2 == 'username': assert data['metadata'][key][key2] == getpass.getuser() elif key2 == 'filename': self.assertRegexpMatches( data['metadata'][key][key2], r'^(instrument|enseigne|connait|joue|personne)\.tsv$') elif key2 == 'loadDate': self.assertRegexpMatches(data['metadata'][key][key2], r'^[0-9]+\.[0-9]+$') elif key2 == 'server': assert data['metadata'][key][key2] == server
def persist_user(self,host_url): """ Persist all user infos in the TS """ query_laucher = QueryLauncher(self.settings, self.session) sqa = SparqlQueryAuth(self.settings, self.session) #check if user is the first. if yes, set him admin if self.get_number_of_users() == 0: admin = 'true' blocked = 'false' self.set_admin(True) self.set_blocked(False) else: admin = 'false' blocked = 'true' self.set_admin(False) self.set_blocked(True) chunk = ':' + self.username + ' rdf:type foaf:Person ;\n' indent = len(self.username) * ' ' + ' ' chunk += indent + 'foaf:name \"' + self.username + '\" ;\n' chunk += indent + ':password \"' + self.sha256_pw + '\" ;\n' chunk += indent + 'foaf:mbox <mailto:' + self.email + '> ;\n' chunk += indent + ':isadmin \"' + admin + '\"^^xsd:boolean ;\n' chunk += indent + ':isblocked \"' + blocked + '\"^^xsd:boolean ;\n' chunk += indent + ':randomsalt \"' + self.randomsalt + '\" .\n' header_ttl = sqa.header_sparql_config(chunk) query_laucher.insert_data(chunk, self.settings["askomics.users_graph"], header_ttl) emails = self.get_admins_emails() # Send a mail to all admins body = 'Hello,\n' body += 'User \'' + self.username + '\' just created an account on Askomics.\n' body += 'Log into the admin interface in order to unblock this user, or contact him ' body += 'at ' + self.email + '.\n\n\n' body += host_url + '\n\n' self.send_mails(host_url, emails, '[AskOmics@'+ host_url + '] New account created', body)
def importMoSate(self, mo, state): ''' Import in the TPS all triplet necessary to defined an askomics module ''' rdf = ":" + self.escape['entity']( mo['module']) + " rdfs:label " + self.escape['text']( mo['module']) + ";\n" rdf += " rdfs:comment " + self.escape['text'](mo['comment']) + ";\n" rdf += " :module_version " + self.escape['text'](mo['version']) + ";\n" rdf += " :module_state " + self.escape['text'](state) + "" if (state == 'ok'): rdf += ";\n :module_graph " + '<' + mo['graph'] + '>.\n' else: rdf += ".\n" sqb = SparqlQueryBuilder(self.settings, self.session) ql = QueryLauncher(self.settings, self.session) sh = sqb.header_sparql_config('') ql.insert_data(rdf, self.graph_modules, sh)
def add_jdoe_in_users(self): """Insert a John Doe User username is jdoe mail is [email protected] password is iamjohndoe not admin and not blocked """ query_laucher = QueryLauncher(self.settings, self.request.session) sqa = SparqlQueryAuth(self.settings, self.request.session) chunk = ':jdoe rdf:type foaf:Person ;\n' indent = len('jdoe') * ' ' + ' ' chunk += indent + 'foaf:name \"jdoe\" ;\n' chunk += indent + ':password \"23df582b51c3482b677c8eac54872b8bd0a49bfadc853628b8b8bd4806147b54\" ;\n' #iamjohndoe chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n' chunk += indent + ':isadmin \"false\"^^xsd:boolean ;\n' chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n' chunk += indent + ':randomsalt \"00000000000000000000\" .\n' header_ttl = sqa.header_sparql_config(chunk) query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
def add_jsmith_in_users(self): """Insert a Jane Smith User username is jsmith mail is [email protected] password is iamjanesmith not admin and not blocked """ query_laucher = QueryLauncher(self.settings, self.request.session) sqa = SparqlQueryAuth(self.settings, self.request.session) chunk = ':jsmith rdf:type foaf:Person ;\n' indent = len('jsmith') * ' ' + ' ' chunk += indent + 'foaf:name \"jsmith\" ;\n' chunk += indent + ':password \"db64872417dcc1488a72b034cbe75268f52eb2486807af096dd2f4c620694efc\" ;\n' #iamjanesmith chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n' chunk += indent + ':isadmin \"false\"^^xsd:boolean ;\n' chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n' chunk += indent + ':randomsalt \"00000000000000000000\" .\n' header_ttl = sqa.header_sparql_config(chunk) query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
def add_another_admin_in_users(self): """Insert an admin User username is otheradmin mail is [email protected] password is iamadmin admin and not blocked """ query_laucher = QueryLauncher(self.settings, self.request.session) sqa = SparqlQueryAuth(self.settings, self.request.session) chunk = ':otheradmin rdf:type foaf:Person ;\n' indent = len('otheradmin') * ' ' + ' ' chunk += indent + 'foaf:name \"otheradmin\" ;\n' chunk += indent + ':password \"682cf6a90d94758bdedcf854e8d784e3d5d360a36cd65a2c49eaff214998c23a\" ;\n' #iamadmin chunk += indent + 'foaf:mbox <mailto:[email protected]> ;\n' chunk += indent + ':isadmin \"true\"^^xsd:boolean ;\n' chunk += indent + ':isblocked \"false\"^^xsd:boolean ;\n' chunk += indent + ':randomsalt \"00000000000000000000\" .\n' header_ttl = sqa.header_sparql_config(chunk) query_laucher.insert_data(chunk, 'urn:sparql:test_askomics:users', header_ttl)
def test_statistics(self): #load files self.it.empty() self.it.load_test2() ql = QueryLauncher(self.settings, self.request.session) queryResults = ql.insert_data(':sujet :predicat :objet .', 'test', 'prefix :<test>') server = queryResults.info()['server'] self.request.json_body = {'namedGraphs': ['test']} self.askview.delete_graph() data = self.askview.statistics() assert data['ntriples'] == 279 assert data['nclasses'] == '6' assert data['nentities'] == '19' assert data['ngraphs'] == '5' assert data['class'] == { 'Personne': {'count': '7'}, 'Sexe': {'count': '2'}, 'Instrument': {'count': '2'} } for key in data['metadata'].keys(): self.assertRegexpMatches(key, r'^urn:sparql:(instrument|enseigne|connait|joue|personne)\.tsv_[0-9]+\.[0-9]+$') for key2 in data['metadata'][key]: self.assertRegexpMatches(key2, r'^(version|username|filename|loadDate|server)$') if key2 == 'version': assert data['metadata'][key][key2] == '2.0' elif key2 == 'username': assert data['metadata'][key][key2] == getpass.getuser() elif key2 == 'filename': self.assertRegexpMatches(data['metadata'][key][key2], r'^(instrument|enseigne|connait|joue|personne)\.tsv$') elif key2 == 'loadDate': self.assertRegexpMatches(data['metadata'][key][key2], r'^[0-9]+\.[0-9]+$') elif key2 == 'server': assert data['metadata'][key][key2] == server
def persist(self, urlbase, public): """ insert the ttl sourcefile in the TS """ pathttl = self.get_rdf_user_directory() shutil.copy(self.path, pathttl) data = None method = 'load' if self.get_param("askomics.upload_user_data_method"): method = self.get_param("askomics.upload_user_data_method") if method == 'load': fil_open = open(pathttl + '/' + os.path.basename(self.path)) data = self.load_data_from_file(fil_open, urlbase) else: chunk = self.file_get_contents(pathttl + '/' + os.path.basename(self.path)) query_lauch = QueryLauncher(self.settings, self.session) data = query_lauch.insert_data(chunk, self.graph, '') self.insert_metadatas(public) return data
def persist(self, urlbase, public): """ insert the ttl sourcefile in the TS """ pathttl = self.get_rdf_user_directory() shutil.copy(self.path, pathttl) data = None method = 'load' if self.get_param("askomics.upload_user_data_method"): method = self.get_param("askomics.upload_user_data_method") if method == 'load': fil_open = open(pathttl + '/' + os.path.basename(self.path)) data = self.load_data_from_file(fil_open, urlbase) else: chunk = self.file_get_contents(pathttl + '/' + os.path.basename(self.path)) query_lauch = QueryLauncher(self.settings, self.session) data = query_lauch.insert_data(chunk, self.graph, '') self.insert_metadatas(public) return data
def persist(self, urlbase, public): """ Store the current source file in the triple store :param urlbase: the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http. :return: a dictionnary with information on the success or failure of the operation :rtype: Dict """ self.insert_metadatas(public) content_ttl = self.get_turtle() ql = QueryLauncher(self.settings, self.session) # use insert data instead of load sparql procedure when the dataset is small total_triple_count = 0 chunk_count = 1 chunk = "" pathttl = self.get_rdf_user_directory() method = 'load' if self.get_param("askomics.upload_user_data_method"): method = self.get_param("askomics.upload_user_data_method") if method == 'load': fp = None triple_count = 0 for triple in content_ttl: chunk += triple + '\n' triple_count += 1 # with open('/tmp/DEBUGTTL' + str(triple_count), 'w') as debug_file: # debug_file.write(chunk) if triple_count > int(self.settings[ 'askomics.max_content_size_to_update_database']): # Temp file must be accessed by http so we place it in askomics/ttl/ dir fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_" + self.alphanum_name, suffix=".ttl", mode="w", delete=False) # We have reached the maximum chunk size, load it and then we will start a new chunk self.log.debug("Loading ttl chunk %s file %s" % (chunk_count, fp.name)) header_ttl = self.get_turtle_template(chunk) fp.write(header_ttl + '\n') fp.write(chunk) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data chunk = "" total_triple_count += triple_count triple_count = 0 chunk_count += 1 # Load the last chunk if triple_count > 0: self.log.debug("Loading ttl chunk %s (last)" % (chunk_count)) fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_" + self.alphanum_name, suffix=".ttl", mode="w", delete=False) header_ttl = self.get_turtle_template(chunk) fp.write(header_ttl + '\n') fp.write(chunk) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data total_triple_count += triple_count # Data is inserted, now insert the abstraction # We get the abstraction now as we need first to parse the whole file to have category_values abstraction_ttl = self.get_abstraction() domain_knowledge_ttl = self.get_domain_knowledge() header_ttl = self.get_turtle_template(abstraction_ttl + "\n" + domain_knowledge_ttl) fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_" + self.alphanum_name, suffix=".ttl", mode="w", delete=False) fp.write(header_ttl + '\n') fp.write(abstraction_ttl + '\n') fp.write(domain_knowledge_ttl + '\n') self.log.debug("Loading ttl abstraction file %s" % (fp.name)) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data data['total_triple_count'] = total_triple_count else: sqb = SparqlQueryBuilder(self.settings, self.session) triple_count = 0 chunk = "" for triple in content_ttl: chunk += triple + '\n' triple_count += 1 if triple_count > int( self.settings[ 'askomics.max_content_size_to_update_database'] ) / 10: # FIXME the limit is much lower than for load # We have reached the maximum chunk size, load it and then we will start a new chunk self.log.debug("Inserting ttl chunk %s" % (chunk_count)) try: header_ttl = sqb.header_sparql_config(chunk) queryResults = ql.insert_data(chunk, self.graph, header_ttl) except Exception as e: return self._format_exception(e) chunk = "" total_triple_count += triple_count triple_count = 0 chunk_count += 1 # Load the last chunk if triple_count > 0: self.log.debug("Inserting ttl chunk %s (last)" % (chunk_count)) try: header_ttl = sqb.header_sparql_config(chunk) queryResults = ql.insert_data(chunk, self.graph, header_ttl) except Exception as e: return self._format_exception(e) total_triple_count += triple_count # Data is inserted, now insert the abstraction # We get the abstraction now as we need first to parse the whole file to have category_values abstraction_ttl = self.get_abstraction() domain_knowledge_ttl = self.get_domain_knowledge() chunk += abstraction_ttl + '\n' chunk += domain_knowledge_ttl + '\n' self.log.debug("Inserting ttl abstraction") try: header_ttl = sqb.header_sparql_config(chunk) ql.insert_data(chunk, self.graph, header_ttl) except Exception as e: return self._format_exception(e) self.metadatas['graphName'] = self.graph sparqlHeader = sqb.header_sparql_config("") data = {} if 'server' in queryResults.info(): self.metadatas['server'] = queryResults.info()['server'] else: self.metadatas['server'] = 'unknown' data['status'] = 'ok' data['total_triple_count'] = total_triple_count data['expected_lines_number'] = self.get_number_of_lines() return data
def test_generateAbstractAskomicsRDF(self): import os from askomics.libaskomics.rdfdb.SparqlQueryBuilder import SparqlQueryBuilder from askomics.libaskomics.rdfdb.QueryLauncher import QueryLauncher m = ModulesManager(self.settings, self.request.session) sqb = SparqlQueryBuilder(self.settings, self.request.session) ql = QueryLauncher(self.settings, self.request.session) sh = sqb.header_sparql_config('') rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>. <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. """ ql.insert_data(rdf, "urn:test:askomics", sh) m.generateAbstractAskomicsRDF("urn:test:askomics") rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>. <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1". <http://bidon/Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2". <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1"; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>. """ ql.insert_data(rdf, "urn:test:askomics2", sh) m.generateAbstractAskomicsRDF("urn:test:askomics2") rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http=bidon=Type2>. <http=bidon=Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http=bidon=Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http=bidon=Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1". <http=bidon=Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2". <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1"; <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>. """ ql.insert_data(rdf, "urn:test:askomics3", sh) m.generateAbstractAskomicsRDF("urn:test:askomics3")
def test_generateAbstractAskomicsRDF(self): import os from askomics.libaskomics.rdfdb.SparqlQueryBuilder import SparqlQueryBuilder from askomics.libaskomics.rdfdb.QueryLauncher import QueryLauncher m = ModulesManager(self.settings, self.request.session) sqb = SparqlQueryBuilder(self.settings, self.request.session) ql = QueryLauncher(self.settings, self.request.session) sh = sqb.header_sparql_config('') rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>. <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. """ ql.insert_data(rdf, "urn:test:askomics", sh) m.generateAbstractAskomicsRDF("urn:test:askomics") rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://bidon/Type2>. <http://bidon/Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http://bidon/Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1". <http://bidon/Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2". <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1"; <http://www.w3.org/2000/01/rdf-schema#domain> <http://bidon/Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>. """ ql.insert_data(rdf, "urn:test:askomics2", sh) m.generateAbstractAskomicsRDF("urn:test:askomics2") rdf = """ <http://bidon/relationTest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#ObjectProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "relationBidon" ; <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http=bidon=Type2>. <http=bidon=Type1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http=bidon=Type2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Class>. <http=bidon=Type1> <http://www.w3.org/2000/01/rdf-schema#label> "Type1". <http=bidon=Type2> <http://www.w3.org/2000/01/rdf-schema#label> "Type2". <http://bidon/Attribute1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#DatatypeProperty> ; <http://www.w3.org/2000/01/rdf-schema#label> "Attribute1"; <http://www.w3.org/2000/01/rdf-schema#domain> <http=bidon=Type1> ; <http://www.w3.org/2000/01/rdf-schema#range> <http://www.w3.org/2001/XMLSchema#int>. """ ql.insert_data(rdf, "urn:test:askomics3", sh) m.generateAbstractAskomicsRDF("urn:test:askomics3")
def persist(self, urlbase,method): """ Store the current source file in the triple store :param urlbase: the base URL of current askomics instance. It is used to let triple stores access some askomics temporary ttl files using http. :return: a dictionnary with information on the success or failure of the operation :rtype: Dict """ content_ttl = self.get_turtle() ql = QueryLauncher(self.settings, self.session) # use insert data instead of load sparql procedure when the dataset is small total_triple_count = 0 chunk_count = 1 chunk = "" pathttl = self.get_ttl_directory() if method == 'load': fp = None triple_count = 0 for triple in content_ttl: chunk += triple + '\n' triple_count += 1 if triple_count > int(self.settings['askomics.max_content_size_to_update_database']): # Temp file must be accessed by http so we place it in askomics/ttl/ dir fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False) # We have reached the maximum chunk size, load it and then we will start a new chunk self.log.debug("Loading ttl chunk %s file %s" % (chunk_count, fp.name)) header_ttl = self.get_turtle_template(chunk) fp.write(header_ttl + '\n') fp.write(chunk) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data chunk = "" total_triple_count += triple_count triple_count = 0 chunk_count += 1 # Load the last chunk if triple_count > 0: self.log.debug("Loading ttl chunk %s (last)" % (chunk_count)) fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False) header_ttl = self.get_turtle_template(chunk) fp.write(header_ttl + '\n') fp.write(chunk) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data os.remove(fp.name) # Everything ok, remove previous temp file total_triple_count += triple_count # Data is inserted, now insert the abstraction # We get the abstraction now as we need first to parse the whole file to have category_values abstraction_ttl = self.get_abstraction() domain_knowledge_ttl = self.get_domain_knowledge() header_ttl = self.get_turtle_template(abstraction_ttl+"\n"+domain_knowledge_ttl) fp = tempfile.NamedTemporaryFile(dir=pathttl, prefix="tmp_"+self.metadatas['fileName'], suffix=".ttl", mode="w", delete=False) fp.write(header_ttl + '\n') fp.write(abstraction_ttl + '\n') fp.write(domain_knowledge_ttl + '\n') self.log.debug("Loading ttl abstraction file %s" % (fp.name)) fp.close() data = self.load_data_from_file(fp, urlbase) if data['status'] == 'failed': return data data['total_triple_count'] = total_triple_count os.remove(fp.name) else: sqb = SparqlQueryBuilder(self.settings, self.session) graphName = "askomics:graph:" + self.name + '_' + self.timestamp triple_count = 0 chunk = "" for triple in content_ttl: chunk += triple + '\n' triple_count += 1 if triple_count > int(self.settings['askomics.max_content_size_to_update_database']) / 10: # FIXME the limit is much lower than for load # We have reached the maximum chunk size, load it and then we will start a new chunk self.log.debug("Inserting ttl chunk %s" % (chunk_count)) try: header_ttl = sqb.header_sparql_config(chunk) queryResults = ql.insert_data(chunk, graphName, header_ttl) except Exception as e: return self._format_exception(e) chunk = "" total_triple_count += triple_count triple_count = 0 chunk_count += 1 # Load the last chunk if triple_count > 0: self.log.debug("Inserting ttl chunk %s (last)" % (chunk_count)) try: header_ttl = sqb.header_sparql_config(chunk) queryResults = ql.insert_data(chunk, graphName, header_ttl) except Exception as e: return self._format_exception(e) total_triple_count += triple_count # Data is inserted, now insert the abstraction # We get the abstraction now as we need first to parse the whole file to have category_values abstraction_ttl = self.get_abstraction() domain_knowledge_ttl = self.get_domain_knowledge() chunk += abstraction_ttl + '\n' chunk += domain_knowledge_ttl + '\n' self.log.debug("Inserting ttl abstraction") try: header_ttl = sqb.header_sparql_config(chunk) ql.insert_data(chunk, graphName, header_ttl) except Exception as e: return self._format_exception(e) ttlNamedGraph = "<" + graphName + "> " + "rdfg:subGraphOf" + " <" + self.get_param("askomics.graph") + "> ." self.metadatas['graphName'] = graphName sparqlHeader = sqb.header_sparql_config("") ql.insert_data(ttlNamedGraph, self.get_param("askomics.graph"), sparqlHeader) data = {} self.metadatas['server'] = queryResults.info()['server'] self.metadatas['loadDate'] = self.timestamp data['status'] = 'ok' data['total_triple_count'] = total_triple_count self.get_metadatas() data['expected_lines_number'] = self.get_number_of_lines() return data