def enrich(specs, directory, endpoint): # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME # specs[St.graph] = "http://grid.ac/20170712" print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT" print "GRAPH:", specs[St.graph] print "ENTITY TYPE:", specs[St.entity_datatype] print "LAT PREDICATE:", specs[St.long_predicate] print "LONG PREDICATE:", specs[St.lat_predicate] print "FILE DIRECTORY:", directory name = Ut.get_uri_local_name(specs[St.graph]) print endpoint data_1 = Qry.virtuoso_request( "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]), endpoint) data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"]) if len(data_1) > 0: data_1 = data_1[0].strip() == "true" if data_1 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE data_2 = Qry.virtuoso_request( "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint) data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"]) if len(data_2) > 0: data_2 = data_2[0].strip() == "true" if data_2 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") if data_1 is False or data_2 is False: message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST." return { St.message: message, St.result: 'The dataset {} ' 'cannot be enriched with GADM boundary at the moment.'.format( specs[St.graph]) } total = 0 limit = 20000 date = datetime.date.isoformat(datetime.date.today()).replace('-', '') f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep, name, date) b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep, name, date, Ut.batch_extension()) # MAKE SURE THE FOLDER EXISTS try: if not os.path.exists(directory): os.makedirs(directory) except OSError as err: print "\n\t[utility_LOAD_TRIPLE_STORE:]", err return print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES." count_query = enrich_query(specs, limit=0, offset=0, is_count=True) print count_query count_res = Qry.virtuoso_request(count_query, endpoint) result = count_res['result'] # GET THE TOTAL NUMBER OF TRIPLES if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) # NUMBER OF REQUEST NEEDED iterations = total / limit if total % limit == 0 else total / limit + 1 print "\n2. TOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") print "3. GENERATING THE BATCH FILE TEXT" enriched_graph = "{}_enriched".format(specs[St.graph]) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], enriched_graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() # RUN THE ITERATIONS for i in range(0, iterations): offset = i * 20000 + 1 print "\tROUND: {} OFFSET: {}".format(i + 1, offset) # print "\t\t1. GENERATING THE ENRICHMENT QUERY" virtuoso = enrich_query(specs, limit=limit, offset=offset, is_count=False) # print virtuoso # exit(0) # print Qry.virtuoso(virtuoso)["result"] # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE" writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"]) writer.close() print "\n4. RUNNING THE BATCH FILE" print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." # os.system(b_path) # RUN THE BATCH FILE print "\tFILE: {}".format(f_path) print "\tBATCH: {}\n".format(b_path) os.chmod(b_path, 0o777) Ut.batch_load(b_path) if os.path.exists(b_path) is True: os.remove(b_path) # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER). # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING size = Qry.get_namedgraph_size(enriched_graph) print "JOB DONE...!!!!!!" return { St.message: "The select dataset was enriched with the GADM boundary as {}. " "{} triples were created.".format(enriched_graph, size), St.result: enriched_graph }
if Ut.OPE_SYS != 'windows': print "MAC BATCH: {}".format(writers[St.batch_output_path]) os.chmod(writers[St.batch_output_path], 0o777) if count > 0: print "\n{} INSTANCES FOUND.".format(count) print "6. RUNNING THE BATCH FILE FOR LOADING THE CORRESPONDENCES INTO THE TRIPLE STORE\n\t\t{}", writers[ St.batch_output_path] if Svr.settings[St.split_sys] is True: print "THE DATA IS BEING LOADED OVER HTTP POST." else: print "THE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." # os.system(writers[St.batch_output_path]) Ut.batch_load(writers[St.batch_output_path]) # inserted = Qry.insert_size(specs[St.linkset], isdistinct=False) metadata = Gn.linkset_metadata(specs, display=False).replace("INSERT DATA", "") writers[St.meta_writer].write(to_unicode(metadata)) if int(specs[St.triples]) > 0: Qry.boolean_endpoint_response(metadata) writers[St.meta_writer].close() # REGISTER THE ALIGNMENT # if check[St.result].__contains__("ALREADY EXISTS"): # Urq.register_alignment_mapping(specs, created=False) # else: # Urq.register_alignment_mapping(specs, created=True) Urq.register_alignment_mapping(specs, created=False)
import Alignments.Utility as Ut import os import Alignments.Settings as St import Alignments.Server_Settings as Srv # https://docupub.com/pdfmerge/ # directory = "C:\Program Files\stardog-4.1.3\data" # Ut.listening(directory) print "STOPPING THE STARDOG SERVER" lock_file = [ name for name in os.listdir(Srv.settings[St.stardog_data_path]) if name.endswith('.lock') ] if len(lock_file) > 0: off = Ut.batch_load("C:\stardogStop.bat") print "RESPONSE: {}".format(off["result"]) else: print "THE SERVER WAS NOT ON."