Exemplo n.º 1
0
Arquivo: pbs.py Projeto: nasiaa/apel
 def parse(self, line):
     '''
     Parses single line from PBS log file.
     
     Please notice, that we use two different separators: ';' and ' '
     '''
     data = {}
     unused_date, status, jobName, rest = line.split(';')
     
     # we accept only 'E' status
     # be careful!: this parse can return None, and this is _valid_ situation
     if status != 'E':
         return None
     
     for item in rest.split():
         key, value = item.split('=', 1)
         data[key] = value
         
     if self._mpi:
         nodes, cores = _parse_mpi(data['exec_host'])
     else:
         nodes, cores = 0, 0
     
     # map each field to functions which will extract them
     mapping = {'Site'           : lambda x: self.site_name, 
                'JobName'        : lambda x: jobName, 
                'LocalUserID'    : lambda x: x['user'],
                'LocalUserGroup' : lambda x: x['group'],
                'WallDuration'   : lambda x: parse_time(x['resources_used.walltime']),
                'CpuDuration'    : lambda x: parse_time(x['resources_used.cput']),
                'StartTime'      : lambda x: int(x['start']),
                'StopTime'       : lambda x: int(x['end']),
                'Infrastructure' : lambda x: "APEL-CREAM-PBS",
                'MachineName'    : lambda x: self.machine_name,
                # remove 'kb' string from the end
                'MemoryReal'     : lambda x: int(x['resources_used.mem'][:-2]),
                'MemoryVirtual'  : lambda x: int(x['resources_used.vmem'][:-2]),
                'NodeCount'      : lambda x: nodes,
                'Processors'     : lambda x: cores}
     
     rc = {}
             
     for key in mapping:
         rc[key] = mapping[key](data)
     
     assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value'
     assert rc['WallDuration'] >= 0, 'Negative WallDuration value'
     
     record = EventRecord()
     record.set_all(rc)
     return record
Exemplo n.º 2
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.
        '''
        # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j $JOBID >> /var/log/apel/slurm_acc.20130311
        # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED
        
        # log.info('line: %s' % (line));
        values = line.strip().split('|')
        
        if values[14] != 'COMPLETED':
            return None

        rmem = None
        if values[12]:
            # remove 'K' string from the end
            rmem = int(values[12][:-1])

        vmem = None
        if values[13]:
            # remove 'K' string from the end
            vmem = int(values[13][:-1])

        mapping = {
                   'Site'            : lambda x: self.site_name,
                   'MachineName'     : lambda x: self.machine_name,
                   'Infrastructure'  : lambda x: "APEL-CREAM-SLURM",
                   'JobName'         : lambda x: x[0],
                   'LocalUserID'     : lambda x: x[2],
                   'LocalUserGroup'  : lambda x: x[3],
                   'WallDuration'    : lambda x: parse_time(x[6]),
                   'CpuDuration'     : lambda x: int(x[7]), 
                   # SLURM gives timestamps which are in system time.
                   'StartTime'       : lambda x: parse_local_timestamp(x[4]),
                   'StopTime'        : lambda x: parse_local_timestamp(x[5]),
                   'Queue'           : lambda x: x[9],
                   'MemoryReal'      : lambda x: rmem, # KB
                   'MemoryVirtual'   : lambda x: vmem, # KB
                   'Processors'      : lambda x: int(x[9]),
                   'NodeCount'       : lambda x: int(x[10])
        }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value'
        assert rc['WallDuration'] >= 0, 'Negative WallDuration value'

        record = EventRecord()
        record.set_all(rc)
        return record      
Exemplo n.º 3
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.
        '''
        # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize -j $JOBID >> /var/log/apel/slurm_acc.20130311
        # 667|sleep|root|root|2013-03-11T12:47:37|2013-03-11T12:47:40|00:00:03|12|debug|4|2|cloud-vm-[03-04]|560K|100904K

        # log.info('line: %s' % (line));
        values = line.strip().split('|')

        rmem = 0
        if values[12]:
            # remove 'K' string from the end
            rmem = float(values[12][:-1])

        vmem = 0
        if values[13]:
            # remove 'K' string from the end
            vmem = float(values[13][:-1])

        mapping = {
                   'Site'            : lambda x: self.site_name,
                   'MachineName'     : lambda x: self.machine_name,
                   'Infrastructure'  : lambda x: "APEL-CREAM-SLURM",
                   'JobName'         : lambda x: x[0],
                   'LocalUserID'     : lambda x: x[2],
                   'LocalUserGroup'  : lambda x: x[3],
                   'WallDuration'    : lambda x: parse_time(x[6]),
                   'CpuDuration'     : lambda x: int(float(x[7])), 
                   # need to check timezones
                   'StartTime'       : lambda x: parse_timestamp(x[4]),
                   'StopTime'        : lambda x: parse_timestamp(x[5]),
                   'Queue'           : lambda x: x[9],
                   'MemoryReal'      : lambda x: int(rmem), # KB
                   'MemoryVirtual'   : lambda x: int(vmem), # KB
                   'Processors'      : lambda x: int(x[9]),
                   'NodeCount'       : lambda x: int(x[10])
        }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        assert rc['CpuDuration'] >= 0, 'Negative CpuDuration value'
        assert rc['WallDuration'] >= 0, 'Negative WallDuration value'

        record = EventRecord()
        record.set_all(rc)
        return record      
Exemplo n.º 4
0
Arquivo: slurm.py Projeto: apel/apel
    def parse(self, line):
        """Parse single line from accounting log file."""
        # Some sites will use TotalCPU rather than CPUTimeRAW

        # sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,
        # CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j
        #  $JOBID >> /var/log/apel/slurm_acc.20130311

        # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED

        # log.info('line: %s' % (line));
        values = line.strip().split('|')

        # These statuses indicate the job has stopped and resources were used.
        if values[14] not in ('CANCELLED', 'COMPLETED', 'FAILED',
                              'NODE_FAIL', 'PREEMPTED', 'TIMEOUT'):
            return None

        # Select CPU time parsing function based on field used.
        if ':' not in values[7]:
            # CPUTimeRAW used which is a plain integer (as a string).
            cput_function = int
        else:
            # TotalCPU used which has the form d-h:m:s, h:m:s or m:s.s.
            cput_function = parse_time

        rmem = self._normalise_memory(values[12])

        vmem = self._normalise_memory(values[13])

        mapping = {'Site'            : lambda x: self.site_name,
                   'MachineName'     : lambda x: self.machine_name,
                   'Infrastructure'  : lambda x: "APEL-CREAM-SLURM",
                   'JobName'         : lambda x: x[0],
                   'LocalUserID'     : lambda x: x[2],
                   'LocalUserGroup'  : lambda x: x[3],
                   'WallDuration'    : lambda x: parse_time(x[6]),
                   'CpuDuration'     : lambda x: cput_function(x[7]),
                   # SLURM gives timestamps which are in system time.
                   'StartTime'       : lambda x: parse_local_timestamp(x[4]),
                   'StopTime'        : lambda x: parse_local_timestamp(x[5]),
                   'Queue'           : lambda x: x[8],
                   'MemoryReal'      : lambda x: rmem,  # KB
                   'MemoryVirtual'   : lambda x: vmem,  # KB
                   'Processors'      : lambda x: int(x[9]),
                   'NodeCount'       : lambda x: int(x[10])
                   }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        # Delete the Queue key if empty and let the Record class handle it
        # (usually by inserting the string 'None').
        if rc['Queue'] == '':
            del rc['Queue']

        # Input checking
        if rc['CpuDuration'] < 0:
            raise ValueError('Negative CpuDuration value')
        # No negative WallDuration test as parse_time prevents that.

        if rc['StopTime'] < rc['StartTime']:
            raise ValueError('StopTime less than StartTime')

        record = EventRecord()
        record.set_all(rc)
        return record
Exemplo n.º 5
0
    def parse(self, line):
        '''
        Parses single line from accounting log file.
        '''
        # /usr/local/bin/sacct -P -n --format=JobID,JobName,User,Group,Start,End
        # ,Elapsed,CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,S
        # tate -j $JOBID >> /var/log/apel/slurm_acc.20130311

        # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED

        # log.info('line: %s' % (line));
        values = line.strip().split('|')

        if values[14] != 'COMPLETED':
            return None

        rmem = self._normalise_memory(values[12])

        vmem = self._normalise_memory(values[13])

        mapping = {'Site'            : lambda x: self.site_name,
                   'MachineName'     : lambda x: self.machine_name,
                   'Infrastructure'  : lambda x: "APEL-CREAM-SLURM",
                   'JobName'         : lambda x: x[0],
                   'LocalUserID'     : lambda x: x[2],
                   'LocalUserGroup'  : lambda x: x[3],
                   'WallDuration'    : lambda x: parse_time(x[6]),
                   'CpuDuration'     : lambda x: int(x[7]), 
                   # SLURM gives timestamps which are in system time.
                   'StartTime'       : lambda x: parse_local_timestamp(x[4]),
                   'StopTime'        : lambda x: parse_local_timestamp(x[5]),
                   'Queue'           : lambda x: x[8],
                   'MemoryReal'      : lambda x: rmem,  # KB
                   'MemoryVirtual'   : lambda x: vmem,  # KB
                   'Processors'      : lambda x: int(x[9]),
                   'NodeCount'       : lambda x: int(x[10])
                   }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        # Delete the Queue key if empty and let the Record class handle it
        # (usually by inserting the string 'None').
        if rc['Queue'] == '':
            del rc['Queue']

        # Input checking
        if rc['CpuDuration'] < 0:
            raise ValueError('Negative CpuDuration value')

        if rc['WallDuration'] < 0:
            raise ValueError('Negative WallDuration value')

        if rc['StopTime'] < rc['StartTime']:
            raise ValueError('StopTime less than StartTime')

        record = EventRecord()
        record.set_all(rc)
        return record
Exemplo n.º 6
0
    def parse(self, line):
        """Parse single line from accounting log file."""
        # Some sites will use TotalCPU rather than CPUTimeRAW

        # sacct -P -n --format=JobID,JobName,User,Group,Start,End,Elapsed,
        # CPUTimeRAW,Partition,NCPUS,NNodes,NodeList,MaxRSS,MaxVMSize,State -j
        #  $JOBID >> /var/log/apel/slurm_acc.20130311

        # 1007|cream_612883006|dteam005|dteam|2013-03-27T17:13:41|2013-03-27T17:13:44|00:00:03|3|prod|1|1|cert-40|||COMPLETED

        # log.info('line: %s' % (line));
        values = line.strip().split('|')

        # These statuses indicate the job has stopped and resources were used.
        if values[14] not in ('CANCELLED', 'COMPLETED', 'FAILED', 'NODE_FAIL',
                              'PREEMPTED', 'TIMEOUT'):
            return None

        # Select CPU time parsing function based on field used.
        if ':' not in values[7]:
            # CPUTimeRAW used which is a plain integer (as a string).
            cput_function = int
        else:
            # TotalCPU used which has the form d-h:m:s, h:m:s or m:s.s.
            cput_function = parse_time

        rmem = self._normalise_memory(values[12])

        vmem = self._normalise_memory(values[13])

        mapping = {
            'Site': lambda x: self.site_name,
            'MachineName': lambda x: self.machine_name,
            'Infrastructure': lambda x: "APEL-CREAM-SLURM",
            'JobName': lambda x: x[0],
            'LocalUserID': lambda x: x[2],
            'LocalUserGroup': lambda x: x[3],
            'WallDuration': lambda x: parse_time(x[6]),
            'CpuDuration': lambda x: cput_function(x[7]),
            # SLURM gives timestamps which are in system time.
            'StartTime': lambda x: parse_local_timestamp(x[4]),
            'StopTime': lambda x: parse_local_timestamp(x[5]),
            'Queue': lambda x: x[8],
            'MemoryReal': lambda x: rmem,  # KB
            'MemoryVirtual': lambda x: vmem,  # KB
            'Processors': lambda x: int(x[9]),
            'NodeCount': lambda x: int(x[10])
        }

        rc = {}

        for key in mapping:
            rc[key] = mapping[key](values)

        # Delete the Queue key if empty and let the Record class handle it
        # (usually by inserting the string 'None').
        if rc['Queue'] == '':
            del rc['Queue']

        # Input checking
        if rc['CpuDuration'] < 0:
            raise ValueError('Negative CpuDuration value')
        # No negative WallDuration test as parse_time prevents that.

        if rc['StopTime'] < rc['StartTime']:
            raise ValueError('StopTime less than StartTime')

        record = EventRecord()
        record.set_all(rc)
        return record