Exemplo n.º 1
0
def importRDFfiles():
    global WORKER_INSTANCES
    inst_str = "number of log rows: " + str(nr_of_log_rows) + "\n"
    inst_str += "number of processed rows: " + str(line_counter) + "\n"
    inst_str += "length of POST list: " + str(postListSize) + "\n"

    list_len = len(WORKER_INSTANCES)
    inst_str += "number of worker instaces: " + str(list_len) + "\n"
    #print("WORKERS " + str(WORKER_INSTANCES))
    if list_len > 0:
        for instance in WORKER_INSTANCES:
            wwwname = instance['name']
            inst_str += "\nworkername: " + wwwname + "\nmachineType: " + instance[
                'machineType'] + "\n"
            wwwip = instance['networkInterfaces'][0]['accessConfigs'][0][
                'natIP']
            www_data = dict()
            www_data["ip"] = wwwip
            www_data["name"] = wwwname
            www_data["statfile"] = mylargefile
            p = subprocess.Popen(
                ["python3", "download_rdf_files.py",
                 json.dumps(www_data)])
            #Wait for process to terminate.
            out, err = p.communicate()
        #add info about instances
        comm.saveStatistics(mylargefile, inst_str + "\n\n")
    else:
        comm.printException(comm.pathToSaveDownloadErrors,
                            errString='No instances to list.')
Exemplo n.º 2
0
def importRDFfiles():
    global WORKER_INSTANCES
    inst_str = "number of log rows: " + str(nr_of_log_rows) + "\n"
    inst_str += "number of processed rows: " + str(line_counter) + "\n"
    inst_str += "length of POST list: " + str(postListSize) + "\n"

    list_len = len(WORKER_INSTANCES)
    inst_str += "number of worker instaces: " + str(list_len) + "\n" 
    #print("WORKERS " + str(WORKER_INSTANCES))
    if list_len > 0:
        for instance in WORKER_INSTANCES:
            wwwname = instance['name']
            inst_str += "\nworkername: " + wwwname + "\nmachineType: " + instance['machineType'] + "\n"
            wwwip = instance['networkInterfaces'][0]['accessConfigs'][0]['natIP']
            www_data = dict()
            www_data["ip"] = wwwip
            www_data["name"] = wwwname
            www_data["statfile"] = mylargefile
            p = subprocess.Popen(["python3", "download_rdf_files.py", json.dumps(www_data)])
            #Wait for process to terminate.
            out, err = p.communicate()
        #add info about instances
        comm.saveStatistics(mylargefile, inst_str + "\n\n")
    else:
        comm.printException(comm.pathToSaveDownloadErrors, errString='No instances to list.')
Exemplo n.º 3
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-

# enable debugging
import cgitb
cgitb.enable()

import os, sys
import json
from rdflib import Graph
import commonVariables as comm

if __name__ == '__main__':
    www_data = json.loads(sys.argv[1])
    mylargefile = www_data["statfile"]
    #save info about amount of triples
    for fname in comm.rdfFnames:
        g_path = comm.pathToRDFdir + fname + ".rdf"
        if (os.path.exists(g_path)):
            g_old = Graph()
            g_old.parse(g_path)
        if (mylargefile is not ""):
            note = (fname + " nr of triples: " + str(len(g_old)) + "\n")
            comm.saveStatistics(mylargefile, note)
Exemplo n.º 4
0
                            worker_counter += 1
                            if (worker_counter > (len(ipList) - 1)):
                                #start over from first worker in list
                                worker_counter = 0
                #dont let memory to grow too buzy
                if (len(distinct_urls) > 1000):
                    distinct_urls = set()
        #ylejaagi postitamine
        if (len(urlsList) > 0):
            #send list of urls to worker
            worker_counter = postToWorker.detectConnection(
                ipList, worker_counter, urlsList)

        #start of statistics!
        comm.saveStatistics(
            mylargefile, "###########################\n chunksize: " +
            str(comm.chunksize) + "\n")

        #the time spent
        end = datetime.datetime.now()
        span = end - start
        #save statistics!
        note = "creating RDFs: \n" + "time spent (h:m:s.mm): " + str(
            span) + " \n\n"
        comm.saveStatistics(mylargefile, note)
        #print("totalseconds: ", span.total_seconds())

        ###
        ###
        #List instances, aggregate all rdf files into 3 files in master
        start = datetime.datetime.now()
Exemplo n.º 5
0
                     del urlsList[:] #empty list of urls
                     #prepare next worker
                     worker_counter += 1
                     if (worker_counter > (len(ipList)-1)):
                         #start over from first worker in list
                         worker_counter = 0
         #dont let memory to grow too buzy
         if (len(distinct_urls) > 1000):
             distinct_urls = set()  
 #ylejaagi postitamine
 if(len(urlsList) > 0):
     #send list of urls to worker
     worker_counter = postToWorker.detectConnection(ipList, worker_counter, urlsList)
 
 #start of statistics!
 comm.saveStatistics(mylargefile, "###########################\n chunksize: "+ str(comm.chunksize)+"\n")
 
 #the time spent
 end = datetime.datetime.now()
 span = end-start
 #save statistics!
 note = "creating RDFs: \n"+"time spent (h:m:s.mm): " + str(span) + " \n\n"
 comm.saveStatistics(mylargefile, note)
 #print("totalseconds: ", span.total_seconds()) 
 
 ###
 ###
 #List instances, aggregate all rdf files into 3 files in master
 start = datetime.datetime.now()
 importRDFfiles()
 ##delete folder, where RDF-files where collected (in master VM)
Exemplo n.º 6
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-

# enable debugging
import cgitb
cgitb.enable()

import os, sys
import json
from rdflib import Graph
import commonVariables as comm

if __name__ == '__main__':
    www_data = json.loads(sys.argv[1])
    mylargefile = www_data["statfile"]
        #save info about amount of triples
    for fname in comm.rdfFnames:
        g_path = comm.pathToRDFdir + fname + ".rdf" 
        if (os.path.exists(g_path)):
            g_old = Graph()
            g_old.parse(g_path)
        if(mylargefile is not ""):
            note=(fname + " nr of triples: " + str(len(g_old)) + "\n")
            comm.saveStatistics(mylargefile, note)