def fixExitStatusLogLine(line,lineno): """ One time helper function. It adds exit_status property to the jobs. """ try: date,event,fulljobid,attrs = line.split(';') except ValueError: log(LOG_WARNING, "skipping invalid line %d: '%s'" % (lineno,line)) return log(LOG_DEBUG, "processing accounting line: %s:%s:%s ..." %(date, event, fulljobid)) attrdir = {} try: for key,val in map(lambda x: x.split('=',1), attrs.split()): attrdir[key] = val except ValueError: log(LOG_WARNING, "skipping attributes parsing (line no %d has invalid attributes): '%s'" % (lineno,attrs)) jobid_name, server_name = JOBID_REGEX.search(fulljobid).groups() server,created = getBatchServer(server_name) job,created = Job.objects.get_or_create(jobid=jobid_name, server=server) if attrdir.has_key('Exit_status'): job.exit_status = int(attrdir['Exit_status']) job.save()
def parseOneLogLine(line,lineno): """ Parse one line from accounting log and insert the data into DB. """ cursor = connection.cursor() try: date,event,fulljobid,attrs = line.split(';') except ValueError: log(LOG_WARNING, "skipping invalid line %d: '%s'" % (lineno,line)) return log(LOG_DEBUG, "processing accounting line: %s:%s:%s ..." %(date, event, fulljobid)) # We ignore PBSPro Licensing lines (it is not job related) if event=='L': log(LOG_DEBUG, "ignored licensing line") return attrdir = {} try: for key,val in map(lambda x: x.split('=',1), attrs.split()): attrdir[key] = val except ValueError: log(LOG_WARNING, "skipping line with invalid attribues %d: '%s'" % (lineno,attrs)) jobid_name, server_name = JOBID_REGEX.search(fulljobid).groups() server,created = getBatchServer(server_name) if created: log(LOG_INFO, "new server will be created: %s" % server_name) #job,created = Job.objects.get_or_create(jobid=jobid_name, server=server) job = SQLJob() job.jobid = jobid_name job.server_id = server.id job.refresh_id_jobstate_id() if attrdir.has_key('owner'): shname = attrdir['owner'].split('@')[1] submithost,created = getSubmitHost(shname) if created: log(LOG_INFO, "new submit host will be created: %s" % shname) job.submithost_id = submithost.pk if attrdir.has_key('requestor'): shname = attrdir['requestor'].split('@')[1] submithost,created = getSubmitHost(shname) if created: log(LOG_INFO, "new submit host will be created: %s" % shname) job.submithost_id = submithost.id if attrdir.has_key('group'): group,created = getGroup(attrdir['group'], server) if created: log(LOG_INFO, "new group will be created: %s" % attrdir['group']) if attrdir.has_key('user'): user,created = getUser(attrdir['user'], server, group) if created: log(LOG_INFO, "new user will be created: %s" % attrdir['user']) job.job_owner_id = user.id # TODO: convert this to SQL as well user.group = group if attrdir.has_key('resources_used.cput'): h,m,s = attrdir['resources_used.cput'].split(":") job.cput = (int(h)*60+int(m))*60+int(s) if attrdir.has_key('resources_used.walltime'): h,m,s = attrdir['resources_used.walltime'].split(":") job.walltime = (int(h)*60+int(m))*60+int(s) if attrdir.has_key('resources_used.cput') and attrdir.has_key('resources_used.walltime'): if job.walltime!=0: job.efficiency = 100*job.cput/job.walltime else: job.efficiency = 0 if attrdir.has_key('Exit_status'): job.exit_status = int(attrdir['Exit_status']) if event=='Q': new_state = getJobState('Q') elif event=='S' or event=='R' or event=='C' or event=='T': new_state = getJobState('R') elif event=='E': new_state = getJobState('C') elif event=='D': new_state = getJobState('D') elif event=='A': new_state = getJobState('A') elif event=='G': new_state = getJobState('D') else: log(LOG_ERROR, "Unknown event type in accounting log file: %s" % line) return if job.job_state_id != getJobState('C').id: # if new_state == getJobState('R') and job.job_state != getJobState('R'): # RunningJob.objects.get_or_create(mainjob=job) # elif new_state != getJobState('R') and job.job_state == getJobState('R'): # try: # rj = RunningJob.objects.get(mainjob=job) # rj.delete() # except RunningJob.DoesNotExist: # pass job.job_state_id = new_state.id else: log(LOG_INFO, "Job %s.%s is already finished, not changing the state." % (job.jobid,server.name)) # running job cache update if attrdir.has_key('queue'): queue,created = getQueue(attrdir['queue'], server) if created: log(LOG_INFO, "new queue will be created: %s" % attrdir['queue']) job.queue_id = queue.id if attrdir.has_key('ctime'): job.ctime = datetime.datetime.fromtimestamp(int(attrdir['ctime'])) if attrdir.has_key('mtime'): job.mtime = datetime.datetime.fromtimestamp(int(attrdir['mtime'])) if attrdir.has_key('qtime'): job.qtime = datetime.datetime.fromtimestamp(int(attrdir['qtime'])) if attrdir.has_key('etime'): job.etime = datetime.datetime.fromtimestamp(int(attrdir['etime'])) if attrdir.has_key('start'): job.start_time = datetime.datetime.fromtimestamp(int(attrdir['start'])) if attrdir.has_key('end'): job.comp_time = datetime.datetime.fromtimestamp(int(attrdir['end'])) if attrdir.has_key('exec_host'): exec_host_names_slots = attrdir['exec_host'].split('+') job.jobslots = [] # convert PBSPro records like 'node1/0*2' to more generic 'node1/0+node1/1' exec_host_names_slots_new = [] for exec_host_name_slot in exec_host_names_slots: if exec_host_name_slot.find('*')>=0: exec_host_slot0, numslots = exec_host_name_slot.split('*') exec_host_name = exec_host_slot0.split('/')[0] exec_host_name_slot_new=[ "%s/%d" % (exec_host_name, i) for i in range(0,int(numslots)) ] log(LOG_DEBUG, "Exec_host %s converted to %s" % (exec_host_name_slot,exec_host_name_slot_new)) exec_host_names_slots_new.extend(exec_host_name_slot_new) else: exec_host_names_slots_new.append(exec_host_name_slot) exec_host_names_slots = exec_host_names_slots_new for exec_host_name_slot in exec_host_names_slots: name,slotstr = exec_host_name_slot.split('/') slot = int(slotstr) node,created = getNode(name, server) if created: log(LOG_INFO, "new node will be created: node name: %s" % (name)) node.save() js,created = getJobSlot(slot=slot,node=node) if created: log(LOG_INFO, "new jobslot will be created: slot: %d, node name: %s" % (slot,name)) js.save() job.jobslots.append(js.id) job.save() if job.id == -1: job.refresh_id_jobstate_id() d,t = date.split(' ') m,d,y = d.split('/') # ae,created = AccountingEvent.objects.get_or_create(timestamp='%s-%s-%s %s' % (y,m,d,t), type=event, job=job) timestamp='%s-%s-%s %s' % (y,m,d,t) cursor.execute("INSERT IGNORE INTO trqacc_accountingevent (timestamp, type, job_id) VALUES (%s,%s,%s)", [timestamp, event, job.id])