def parse(self, line): ''' Parses single line from PBS log file. Please notice, that we use two different separators: ';' and ' ' ''' data = {} unused_date, status, jobName, rest = line.split(';') # we accept only 'E' status # be careful!: this parse can return None, and this is _valid_ situation if status != 'E': return None for item in rest.split(): key, value = item.split('=', 1) data[key] = value if self._mpi: nodes, cores = _parse_mpi(data['exec_host']) else: nodes, cores = 0, 0 # map each field to functions which will extract them mapping = {'Site' : lambda x: self.site_name, 'JobName' : lambda x: jobName, 'LocalUserID' : lambda x: x['user'], 'LocalUserGroup' : lambda x: x['group'], 'WallDuration' : lambda x: parse_time(x['resources_used.walltime']), 'CpuDuration' : lambda x: parse_time(x['resources_used.cput']), 'StartTime' : lambda x: int(x['start']), 'StopTime' : lambda x: int(x['end']), 'Infrastructure' : lambda x: "APEL-CREAM-PBS", 'MachineName' : lambda x: self.machine_name, # remove 'kb' string from the end 'MemoryReal' : lambda x: int(x['resources_used.mem'][:-2]), 'MemoryVirtual' : lambda x: int(x['resources_used.vmem'][:-2]), 'NodeCount' : lambda x: nodes, 'Processors' : lambda x: cores} rc = {} for key in mapping: rc[key] = mapping[key](data) assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value' assert rc['WallDuration'] >= 0, 'Negative WallDuration value' record = EventRecord() record.set_all(rc) return record
def parse(self, line): ''' Parses single line from accounting log file. ''' # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j $JOBID >> /var/log/apel/slurm_acc.20130311 # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED # log.info('line: %s' % (line)); values = line.strip().split('|') if values[14] != 'COMPLETED': return None rmem = None if values[12]: # remove 'K' string from the end rmem = int(values[12][:-1]) vmem = None if values[13]: # remove 'K' string from the end vmem = int(values[13][:-1]) mapping = { 'Site' : lambda x: self.site_name, 'MachineName' : lambda x: self.machine_name, 'Infrastructure' : lambda x: "APEL-CREAM-SLURM", 'JobName' : lambda x: x[0], 'LocalUserID' : lambda x: x[2], 'LocalUserGroup' : lambda x: x[3], 'WallDuration' : lambda x: parse_time(x[6]), 'CpuDuration' : lambda x: int(x[7]), # SLURM gives timestamps which are in system time. 'StartTime' : lambda x: parse_local_timestamp(x[4]), 'StopTime' : lambda x: parse_local_timestamp(x[5]), 'Queue' : lambda x: x[9], 'MemoryReal' : lambda x: rmem, # KB 'MemoryVirtual' : lambda x: vmem, # KB 'Processors' : lambda x: int(x[9]), 'NodeCount' : lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value' assert rc['WallDuration'] >= 0, 'Negative WallDuration value' record = EventRecord() record.set_all(rc) return record
def parse(self, line): ''' Parses single line from accounting log file. ''' # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize -j $JOBID >> /var/log/apel/slurm_acc.20130311 # 667|sleep|root|root|2013-03-11T12:47:37|2013-03-11T12:47:40|00:00:03|12|debug|4|2|cloud-vm-[03-04]|560K|100904K # log.info('line: %s' % (line)); values = line.strip().split('|') rmem = 0 if values[12]: # remove 'K' string from the end rmem = float(values[12][:-1]) vmem = 0 if values[13]: # remove 'K' string from the end vmem = float(values[13][:-1]) mapping = { 'Site' : lambda x: self.site_name, 'MachineName' : lambda x: self.machine_name, 'Infrastructure' : lambda x: "APEL-CREAM-SLURM", 'JobName' : lambda x: x[0], 'LocalUserID' : lambda x: x[2], 'LocalUserGroup' : lambda x: x[3], 'WallDuration' : lambda x: parse_time(x[6]), 'CpuDuration' : lambda x: int(float(x[7])), # need to check timezones 'StartTime' : lambda x: parse_timestamp(x[4]), 'StopTime' : lambda x: parse_timestamp(x[5]), 'Queue' : lambda x: x[9], 'MemoryReal' : lambda x: int(rmem), # KB 'MemoryVirtual' : lambda x: int(vmem), # KB 'Processors' : lambda x: int(x[9]), 'NodeCount' : lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value' assert rc['WallDuration'] >= 0, 'Negative WallDuration value' record = EventRecord() record.set_all(rc) return record
def parse(self, line): """Parse single line from accounting log file.""" # Some sites will use TotalCPU rather than CPUTimeRAW # sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed, # CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j # $JOBID >> /var/log/apel/slurm_acc.20130311 # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED # log.info('line: %s' % (line)); values = line.strip().split('|') # These statuses indicate the job has stopped and resources were used. if values[14] not in ('CANCELLED', 'COMPLETED', 'FAILED', 'NODE_FAIL', 'PREEMPTED', 'TIMEOUT'): return None # Select CPU time parsing function based on field used. if ':' not in values[7]: # CPUTimeRAW used which is a plain integer (as a string). cput_function = int else: # TotalCPU used which has the form d-h:m:s, h:m:s or m:s.s. cput_function = parse_time rmem = self._normalise_memory(values[12]) vmem = self._normalise_memory(values[13]) mapping = {'Site' : lambda x: self.site_name, 'MachineName' : lambda x: self.machine_name, 'Infrastructure' : lambda x: "APEL-CREAM-SLURM", 'JobName' : lambda x: x[0], 'LocalUserID' : lambda x: x[2], 'LocalUserGroup' : lambda x: x[3], 'WallDuration' : lambda x: parse_time(x[6]), 'CpuDuration' : lambda x: cput_function(x[7]), # SLURM gives timestamps which are in system time. 'StartTime' : lambda x: parse_local_timestamp(x[4]), 'StopTime' : lambda x: parse_local_timestamp(x[5]), 'Queue' : lambda x: x[8], 'MemoryReal' : lambda x: rmem, # KB 'MemoryVirtual' : lambda x: vmem, # KB 'Processors' : lambda x: int(x[9]), 'NodeCount' : lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) # Delete the Queue key if empty and let the Record class handle it # (usually by inserting the string 'None'). if rc['Queue'] == '': del rc['Queue'] # Input checking if rc['CpuDuration'] < 0: raise ValueError('Negative CpuDuration value') # No negative WallDuration test as parse_time prevents that. if rc['StopTime'] < rc['StartTime']: raise ValueError('StopTime less than StartTime') record = EventRecord() record.set_all(rc) return record
def parse(self, line): ''' Parses single line from accounting log file. ''' # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End # ,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,S # tate -j $JOBID >> /var/log/apel/slurm_acc.20130311 # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED # log.info('line: %s' % (line)); values = line.strip().split('|') if values[14] != 'COMPLETED': return None rmem = self._normalise_memory(values[12]) vmem = self._normalise_memory(values[13]) mapping = {'Site' : lambda x: self.site_name, 'MachineName' : lambda x: self.machine_name, 'Infrastructure' : lambda x: "APEL-CREAM-SLURM", 'JobName' : lambda x: x[0], 'LocalUserID' : lambda x: x[2], 'LocalUserGroup' : lambda x: x[3], 'WallDuration' : lambda x: parse_time(x[6]), 'CpuDuration' : lambda x: int(x[7]), # SLURM gives timestamps which are in system time. 'StartTime' : lambda x: parse_local_timestamp(x[4]), 'StopTime' : lambda x: parse_local_timestamp(x[5]), 'Queue' : lambda x: x[8], 'MemoryReal' : lambda x: rmem, # KB 'MemoryVirtual' : lambda x: vmem, # KB 'Processors' : lambda x: int(x[9]), 'NodeCount' : lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) # Delete the Queue key if empty and let the Record class handle it # (usually by inserting the string 'None'). if rc['Queue'] == '': del rc['Queue'] # Input checking if rc['CpuDuration'] < 0: raise ValueError('Negative CpuDuration value') if rc['WallDuration'] < 0: raise ValueError('Negative WallDuration value') if rc['StopTime'] < rc['StartTime']: raise ValueError('StopTime less than StartTime') record = EventRecord() record.set_all(rc) return record
def parse(self, line): """Parse single line from accounting log file.""" # Some sites will use TotalCPU rather than CPUTimeRAW # sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed, # CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j # $JOBID >> /var/log/apel/slurm_acc.20130311 # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED # log.info('line: %s' % (line)); values = line.strip().split('|') # These statuses indicate the job has stopped and resources were used. if values[14] not in ('CANCELLED', 'COMPLETED', 'FAILED', 'NODE_FAIL', 'PREEMPTED', 'TIMEOUT'): return None # Select CPU time parsing function based on field used. if ':' not in values[7]: # CPUTimeRAW used which is a plain integer (as a string). cput_function = int else: # TotalCPU used which has the form d-h:m:s, h:m:s or m:s.s. cput_function = parse_time rmem = self._normalise_memory(values[12]) vmem = self._normalise_memory(values[13]) mapping = { 'Site': lambda x: self.site_name, 'MachineName': lambda x: self.machine_name, 'Infrastructure': lambda x: "APEL-CREAM-SLURM", 'JobName': lambda x: x[0], 'LocalUserID': lambda x: x[2], 'LocalUserGroup': lambda x: x[3], 'WallDuration': lambda x: parse_time(x[6]), 'CpuDuration': lambda x: cput_function(x[7]), # SLURM gives timestamps which are in system time. 'StartTime': lambda x: parse_local_timestamp(x[4]), 'StopTime': lambda x: parse_local_timestamp(x[5]), 'Queue': lambda x: x[8], 'MemoryReal': lambda x: rmem, # KB 'MemoryVirtual': lambda x: vmem, # KB 'Processors': lambda x: int(x[9]), 'NodeCount': lambda x: int(x[10]) } rc = {} for key in mapping: rc[key] = mapping[key](values) # Delete the Queue key if empty and let the Record class handle it # (usually by inserting the string 'None'). if rc['Queue'] == '': del rc['Queue'] # Input checking if rc['CpuDuration'] < 0: raise ValueError('Negative CpuDuration value') # No negative WallDuration test as parse_time prevents that. if rc['StopTime'] < rc['StartTime']: raise ValueError('StopTime less than StartTime') record = EventRecord() record.set_all(rc) return record