def parse_gdata_file(filename): db = None storagept = '' storageptstring = 'gdata' start = filename.find('gdata') if start < 0: print('Could not find storage point (e.g gdata1) in filename {}'.format(filename)) raise else: storagept = filename[start:start+len(storageptstring)+1] print('Storage Point: {}'.format(storagept)) with open(filename) as f: # Need this loop to support old method of having multiple dumps per file while True: # Need this try block to gracefully exit the above loop when end of file try: for line in f: if line.startswith("%%%%%%%%%%%%%%%%%"): # Grab date string date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y") year, quarter = datetoyearquarter(date) # Gobble another line line = f.next() break else: next # Assume a certain structure .... line = f.next() project = line.split()[4].strip(':') if not project in databases: dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,date.year)) databases[project] = ProjectDataset(project,dbfile) db = databases[project] # Gobble the three header lines line = f.next(); line = f.next(); line = f.next() for line in f: try: (folder,user,size,inodes,scandate) = line.strip(os.linesep).split() except: break db.adduser(user) if (verbose): print('Adding gdata ',folder,user,size,inodes,scandate) db.addgdatausage(storagept,folder,user,parse_size(size.upper()),inodes,scandate) except: break
def parse_short_file(filename): db = None with open(filename) as f: # Need this loop to support old method of having multiple dumps per file while True: # Need this try block to gracefully exit the above loop when end of file try: for line in f: if line.startswith("%%%%%%%%%%%%%%%%%"): # Grab date string date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y") year, quarter = datetoyearquarter(date) # Gobble another line line = f.next() break else: next # Assume a certain structure .... line = f.next() project = line.split()[4].strip(':') if not project in databases: dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,date.year)) databases[project] = ProjectDataset(project,dbfile) db = databases[project] # Gobble the three header lines line = f.next(); line = f.next(); line = f.next() for line in f: try: (folder,user,size,inodes,scandate) = line.strip(os.linesep).split() except: break db.adduser(user) if verbose: print 'Adding short ',folder,user,size,inodes,scandate db.addshortusage(folder,user,parse_size(size.upper()),inodes,scandate) except: break
def parse_qstat_json_dump(filename, dbfile, verbose=False): db = JobsDataset("sqlite:///{}".format(dbfile)) numrecords = db.getnumrecords() nentries = 0 with open(filename) as f: data = json.load(f) if 'Jobs' in data: data = data['Jobs'] for jobid, info in data.items(): if jobid == '_default': continue try: # Strip off '.r-man2' suffix if it exists jobid = jobid.split('.')[0] # Must have ctime = maybe_get_time(info, 'ctime', must=True) qtime = maybe_get_time(info, 'qtime', must=True) mtime = maybe_get_time(info, 'mtime', must=True) # Store all times as offset from creation time in seconds qtime = (qtime - ctime).total_seconds() mtime = (mtime - ctime).total_seconds() """ B Array job: at least one subjob has started. E Job is exiting after having run. F Job is finished. H Job is held. M Job was moved to another server. Q Job is queued. R Job is running. S Job is suspended. T Job is being moved to new location. U Cycle-harvesting job is suspended due to keyboard activity. W Job is waiting for its submitter-assigned start time to be reached. X Subjob has completed execution or has been deleted. """ # Put in some logic checking for job_state? stime = maybe_get_time(info, 'stime') # Needed to calculate time in the queue if stime is None: start = datetime.datetime.now() stime = -1. else: start = stime stime = (stime - ctime).total_seconds() # Create a derived field which is the total time spend queuing before # job started waitime = (start - ctime).total_seconds() # year = int(info['qtime'].split()[-1]) year = ctime.year username = info['Job_Owner'].split('@')[0] resources = info['Resource_List'] resources_used = info.get('resources_used',{}) maxwalltime = walltime_to_seconds(resources['walltime']) walltime = walltime_to_seconds(resources_used.get('walltime', None)) maxmem = int(parse_size(resources.get('mem', '0b').upper())) ncpus = resources.get('ncpus', None) mem = int(parse_size(resources_used.get('mem', '0b').upper())) cputime = walltime_to_seconds(resources_used.get('cput', None)) try: cpuutil = cputime/(walltime*ncpus) except ZeroDivisionError: cpuutil = -1. exe = strip_ml(info.get('executable', '')) arglist = strip_ml(info.get('argument_list', '')) subarglist = info.get('Submit_arguments', '') # Use -999 to signify no exit status exit_status = info.get('Exit_status',-999) if verbose: print(year, info['queue'], jobid, info['project'], username, info['job_state'], info['Job_Name'], resources['jobprio'], exe, arglist + subarglist, ctime, mtime, qtime, stime, waitime, maxwalltime, maxmem, ncpus, walltime, mem, cputime, cpuutil, exit_status) db.addjob(year, info['queue'], jobid, info['project'], username, info['job_state'], info['Job_Name'], resources['jobprio'], exe, arglist + subarglist, ctime, mtime, qtime, stime, waitime, maxwalltime, maxmem, ncpus, walltime, mem, cputime, cpuutil, exit_status) nentries += 1 except: print("Error parsing {}".format(jobid)) print(info) raise newrecords = db.getnumrecords() - numrecords print("Found {} entries. Added {} new records, {} records updated or unchanged".format(nentries, newrecords, nentries - newrecords))
def parse_SU_file(filename): insystem = False; instorage = False; inuser = False with open(filename) as f: year = ''; quarter = '' for line in f: if line.startswith("%%%%%%%%%%%%%%%%%"): # Grab date string date = datetime.datetime.strptime(f.next().strip(os.linesep), "%a %b %d %H:%M:%S %Z %Y").date() elif line.startswith("Usage Report:") and "Compute" in line: words = line.split() project = words[2].split('=')[1] year, quarter = words[4].split('=')[1].split('.') print year, quarter startdate, enddate = words[5].split('-') startdate = datetime.datetime.strptime(startdate.strip('('),"%d/%m/%Y").date() enddate = datetime.datetime.strptime(enddate.strip(')'),"%d/%m/%Y").date() if not project in databases: dbfile = 'sqlite:///'+os.path.join(dbfileprefix,"usage_{}_{}.db".format(project,year)) databases[project] = ProjectDataset(project,dbfile) db = databases[project] db.addquarter(year,quarter,startdate,enddate) elif line.startswith("Total Grant:"): total = line.split()[2] db.addgrant(year,quarter,total) elif line.startswith("System Queue"): insystem = True f.next() elif insystem: try: (system,queue,weight,usecpu,usewall,usesu,tmp,tmp,tmp) = line.strip(os.linesep).split() except: insystem = False continue db.addsystemqueue(system,queue,weight) db.addprojectusage(date,system,queue,usecpu,usewall,usesu) elif line.startswith("Batch Queue Usage per User"): inuser = True # Gobble three lines f.next(); f.next(); f.next() elif inuser: try: (user,usecpu,usewall,usesu,tmp) = line.strip(os.linesep).split() except: inuser = False continue db.adduser(user) if verbose: print 'Add usage ',date,user,usecpu,usewall,usesu db.adduserusage(date,user,usecpu,usewall,usesu) elif line.startswith("System StoragePt"): instorage = True f.next() elif instorage: try: (systemname,storagept,grant,tmp,tmp,igrant,tmp,tmp) = line.strip(os.linesep).split() except: instorage = False continue print(year, quarter) db.addsystemstorage(systemname,storagept,year,quarter,parse_size(grant.upper()),parse_inodenum(igrant))