Пример #1
0
    def priv_ports_info(hostname=None):
        """
        Return a list of privileged ports in use on a given host

        :param hostname: The host on which to query privilege ports
                         usage. Defaults to the local host
        :type hostname: str or None
        """
        from ptl.utils.pbs_dshutils import DshUtils

        netstat_tag = re.compile("tcp[\s]+[\d]+[\s]+[\d]+[\s]+"
                                 "(?P<srchost>[\w\*\.]+):(?P<srcport>[\d]+)"
                                 "[\s]+(?P<desthost>[\.\w\*:]+):"
                                 "(?P<destport>[\d]+)"
                                 "[\s]+(?P<state>[\w]+).*")
        du = DshUtils()
        ret = du.run_cmd(hostname, ['netstat', '-at', '--numeric-ports'])
        if ret['rc'] != 0:
            return False

        msg = []
        lines = ret['out']
        resv_ports = {}
        source_hosts = []
        for line in lines:
            m = netstat_tag.match(line)
            if m:
                srcport = int(m.group('srcport'))
                srchost = m.group('srchost')
                destport = int(m.group('destport'))
                desthost = m.group('desthost')
                if srcport < 1024:
                    if srchost not in source_hosts:
                        source_hosts.append(srchost)
                    msg.append(line)
                    if srchost not in resv_ports:
                        resv_ports[srchost] = [srcport]
                    elif srcport not in resv_ports[srchost]:
                        resv_ports[srchost].append(srcport)
                if destport < 1024:
                    msg.append(line)
                    if desthost not in resv_ports:
                        resv_ports[desthost] = [destport]
                    elif destport not in resv_ports[desthost]:
                        resv_ports[desthost].append(destport)

        if len(resv_ports) > 0:
            msg.append('\nPrivilege ports in use: ')
            for k, v in resv_ports.items():
                msg.append('\t' + k + ': ' +
                           str(",".join(map(lambda l: str(l), v))))
            for sh in source_hosts:
                msg.append('\nTotal on ' + sh + ': ' +
                           str(len(resv_ports[sh])))
        else:
            msg.append('No privileged ports currently allocated')

        return msg
Пример #2
0
    def priv_ports_info(hostname=None):
        """
        Return a list of privileged ports in use on a given host

        :param hostname: The host on which to query privilege ports
                         usage. Defaults to the local host
        :type hostname: str or None
        """
        from ptl.utils.pbs_dshutils import DshUtils

        netstat_tag = re.compile("tcp[\s]+[\d]+[\s]+[\d]+[\s]+"
                                 "(?P<srchost>[\w\*\.]+):(?P<srcport>[\d]+)"
                                 "[\s]+(?P<desthost>[\.\w\*:]+):"
                                 "(?P<destport>[\d]+)"
                                 "[\s]+(?P<state>[\w]+).*")
        du = DshUtils()
        ret = du.run_cmd(hostname, ['netstat', '-at', '--numeric-ports'])
        if ret['rc'] != 0:
            return False

        msg = []
        lines = ret['out']
        resv_ports = {}
        source_hosts = []
        for line in lines:
            m = netstat_tag.match(line)
            if m:
                srcport = int(m.group('srcport'))
                srchost = m.group('srchost')
                destport = int(m.group('destport'))
                desthost = m.group('desthost')
                if srcport < 1024:
                    if srchost not in source_hosts:
                        source_hosts.append(srchost)
                    msg.append(line)
                    if srchost not in resv_ports:
                        resv_ports[srchost] = [srcport]
                    elif srcport not in resv_ports[srchost]:
                        resv_ports[srchost].append(srcport)
                if destport < 1024:
                    msg.append(line)
                    if desthost not in resv_ports:
                        resv_ports[desthost] = [destport]
                    elif destport not in resv_ports[desthost]:
                        resv_ports[desthost].append(destport)

        if len(resv_ports) > 0:
            msg.append('\nPrivilege ports in use: ')
            for k, v in resv_ports.items():
                msg.append('\t' + k + ': ' +
                           str(",".join(map(lambda l: str(l), v))))
            for sh in source_hosts:
                msg.append('\nTotal on ' + sh + ': ' +
                           str(len(resv_ports[sh])))
        else:
            msg.append('No privileged ports currently allocated')

        return msg
Пример #3
0
class SyncData(threading.Thread):

    """
    Sync thread
    """

    def __init__(self, sharedpath, queue):
        threading.Thread.__init__(self)
        self.sharedpath = sharedpath
        self.queue = queue
        self._go = True
        self.du = DshUtils()

    def run(self):
        while self._go:
            try:
                host, datadir, bi, sn, hostname, tn, lp = self.queue.get(False,
                                                                         1.0)
            except Queue.Empty:
                continue
            destdatadir = os.path.join(self.sharedpath, bi, sn, hostname, tn,
                                       lp)
            homedir = os.path.join(datadir, 'PBS_' + host)
            _s = ['#!/bin/bash']
            _s += ['mkdir -p %s' % (destdatadir)]
            _s += ['chmod -R 0755 %s' % (destdatadir)]
            _s += ['cp -rp %s %s' % (homedir, destdatadir)]
            _s += ['cp %s/qstat_tf %s' % (datadir, destdatadir)]
            _s += ['cp %s/pbsnodes %s' % (datadir, destdatadir)]
            _s += ['cp %s/print_server %s' % (datadir, destdatadir)]
            _s += ['cp %s/logfile_* %s' % (datadir, destdatadir)]
            _s += ['cat %s/uptime >> %s/uptime' % (datadir, destdatadir)]
            _s += ['cat %s/vmstat >> %s/vmstat' % (datadir, destdatadir)]
            _s += ['cat %s/netstat >> %s/netstat' % (datadir, destdatadir)]
            _s += ['cat %s/ps >> %s/ps' % (datadir, destdatadir)]
            _s += ['cat %s/df >> %s/df' % (datadir, destdatadir)]
            fd, fn = self.du.mkstemp(host, mode=0755, body='\n'.join(_s))
            os.close(fd)
            self.du.run_cmd(host, cmd=fn, sudo=True)

    def stop(self):
        self._go = False
Пример #4
0
class SyncData(threading.Thread):

    """
    Sync thread
    """

    def __init__(self, sharedpath, queue):
        threading.Thread.__init__(self)
        self.sharedpath = sharedpath
        self.queue = queue
        self._go = True
        self.du = DshUtils()

    def run(self):
        while self._go:
            try:
                host, datadir, bi, sn, hostname, tn, lp = self.queue.get(False,
                                                                         1.0)
            except Queue.Empty:
                continue
            destdatadir = os.path.join(self.sharedpath, bi, sn, hostname, tn,
                                       lp)
            homedir = os.path.join(datadir, 'PBS_' + host)
            _s = ['#!/bin/bash']
            _s += ['mkdir -p %s' % (destdatadir)]
            _s += ['chmod -R 0755 %s' % (destdatadir)]
            _s += ['cp -rp %s %s' % (homedir, destdatadir)]
            _s += ['cp %s/qstat_tf %s' % (datadir, destdatadir)]
            _s += ['cp %s/pbsnodes %s' % (datadir, destdatadir)]
            _s += ['cp %s/print_server %s' % (datadir, destdatadir)]
            _s += ['cp %s/logfile_* %s' % (datadir, destdatadir)]
            _s += ['cat %s/uptime >> %s/uptime' % (datadir, destdatadir)]
            _s += ['cat %s/vmstat >> %s/vmstat' % (datadir, destdatadir)]
            _s += ['cat %s/netstat >> %s/netstat' % (datadir, destdatadir)]
            _s += ['cat %s/ps >> %s/ps' % (datadir, destdatadir)]
            _s += ['cat %s/df >> %s/df' % (datadir, destdatadir)]
            fd, fn = self.du.mkstemp(host, mode=0755, body='\n'.join(_s))
            os.close(fd)
            self.du.run_cmd(host, cmd=fn, sudo=True)

    def stop(self):
        self._go = False
Пример #5
0
 def get_system_info(self, hostname=None):
     du = DshUtils()
     # getting RAM size in gb
     mem_info = du.cat(hostname, "/proc/meminfo")
     if mem_info['rc'] != 0:
         _msg = 'failed to get content of /proc/meminfo of host: '
         self.logger.error(_msg + hostname)
     else:
         got_mem_available = False
         for i in mem_info['out']:
             if "MemTotal" in i:
                 self.system_total_ram = float(i.split()[1]) / (2**20)
             elif "MemAvailable" in i:
                 mem_available = float(i.split()[1]) / (2**20)
                 got_mem_available = True
                 break
             elif "MemFree" in i:
                 mem_free = float(i.split()[1]) / (2**20)
             elif "Buffers" in i:
                 buffers = float(i.split()[1]) / (2**20)
             elif i.startswith("Cached"):
                 cached = float(i.split()[1]) / (2**20)
         if got_mem_available:
             self.system_ram = mem_available
         else:
             self.system_ram = mem_free + buffers + cached
     # getting disk size in gb
     pbs_conf = du.parse_pbs_config(hostname)
     pbs_home_info = du.run_cmd(hostname,
                                cmd=['df', '-k', pbs_conf['PBS_HOME']])
     if pbs_home_info['rc'] != 0:
         _msg = 'failed to get output of df -k command of host: '
         self.logger.error(_msg + hostname)
     else:
         disk_info = pbs_home_info['out']
         disk_size = disk_info[1].split()
         self.system_disk = float(disk_size[3]) / (2**20)
         self.system_disk_used_percent = float(disk_size[4].rstrip('%'))
Пример #6
0
 def get_system_info(self, hostname=None):
     du = DshUtils()
     # getting RAM size in gb
     mem_info = du.cat(hostname, "/proc/meminfo")
     if mem_info['rc'] != 0:
         _msg = 'failed to get content of /proc/meminfo of host: '
         self.logger.error(_msg + hostname)
     else:
         for i in mem_info['out']:
             if "MemAvailable" in i:
                 self.system_ram = float(i.split()[1]) / (2**20)
                 break
     # getting disk size in gb
     pbs_conf = du.parse_pbs_config(hostname)
     pbs_home_info = du.run_cmd(hostname,
                                cmd=['df', '-k', pbs_conf['PBS_HOME']])
     if pbs_home_info['rc'] != 0:
         _msg = 'failed to get output of df -k command of host: '
         self.logger.error(_msg + hostname)
     else:
         disk_info = pbs_home_info['out']
         disk_size = disk_info[1].split()
         self.system_disk = float(disk_size[3]) / (2**20)
Пример #7
0
class CrayUtils(object):

    """
    Cray specific utility class
    """
    node_status = []
    node_summary = {}
    cmd_output = []
    du = None

    def __init__(self):
        self.du = DshUtils()
        (self.node_status, self.node_summary) = self.parse_apstat_rn()

    def call_apstat(self, options):
        """
        Build the apstat command and run it.  Return the output of the command.

        :param options: options to pass to apstat command
        :type options: str
        :returns: the command output
        """
        hostname = socket.gethostname()
        platform = self.du.get_platform(hostname)
        apstat_env = os.environ
        apstat_cmd = "apstat"
        if 'cray' not in platform:
            return None
        if 'craysim' in platform:
            lib_path = '$LD_LIBRARY_PATH:/opt/alps/tester/usr/lib/'
            apstat_env['LD_LIBRARY_PATH'] = lib_path
            apstat_env['ALPS_CONFIG_FILE'] = '/opt/alps/tester/alps.conf'
            apstat_env['apsched_sharedDir'] = '/opt/alps/tester/'
            apstat_cmd = "/opt/alps/tester/usr/bin/apstat -d ."
        cmd_run = self.du.run_cmd(hostname, [apstat_cmd, options],
                                  as_script=True, wait_on_script=True,
                                  env=apstat_env)
        return cmd_run

    def parse_apstat_rn(self):
        """
        Parse the apstat command output for node status and summary

        :type options: str
        :returns: tuple of (node status, node summary)
        """
        status = []
        summary = {}
        count = 0
        options = '-rn'
        cmd_run = self.call_apstat(options)
        if cmd_run is None:
            return (status, summary)
        cmd_result = cmd_run['out']
        keys = cmd_result[0].split()
        # Add a key 'Mode' because 'State' is composed of two list items, e.g:
        # State = 'UP  B', where Mode = 'B'
        k2 = ['Mode']
        keys = keys[0:3] + k2 + keys[3:]
        cmd_iter = iter(cmd_result)
        for line in cmd_iter:
            if count == 0:
                count = 1
                continue
            if "Compute node summary" in line:
                summary_line = next(cmd_iter)
                summary_keys = summary_line.split()
                summary_data = next(cmd_iter).split()
                sum_index = 0
                for a in summary_keys:
                    summary[a] = summary_data[sum_index]
                    sum_index += 1
                break
            obj = {}
            line = line.split()
            for i, value in enumerate(line):
                obj[keys[i]] = value
                if keys[i] == 'State':
                    obj[keys[i]] = value + "  " + line[i + 1]
            # If there is no Apids in the apstat then use 'None' as the value
            if "Apids" in obj:
                pass
            else:
                obj["Apids"] = None
            status.append(obj)
        return (status, summary)

    def count_node_summ(self, cnsumm='up'):
        """
        Return the value of any one of the following parameters as shown in
        the 'Compute Node Summary' section of 'apstat -rn' output:
        arch, config, up, resv, use, avail, down

        :param cnsumm: parameter which is being queried, defaults to 'up'
        :type cnsumm: str
        :returns: value of parameter being queried
        """
        return int(self.node_summary[cnsumm])

    def count_node_state(self, state='UP  B'):
        """
        Return how many nodes have a certain 'State' value.

        :param state: parameter which is being queried, defaults to 'UP  B'
        :type state: str
        :returns: count of how many nodes have the state
        """
        count = 0
        status = self.node_status
        for stat in status:
            if stat['State'] == state:
                count += 1
        return count

    def get_numthreads(self, nid):
        """
        Returns the number of hyperthread for the given node
        """
        options = '-N %d -n -f "nid,c/cu"' % int(nid)
        cmd_run = self.call_apstat(options)
        if cmd_run is None:
            return None
        cmd_result = cmd_run['out']
        cmd_iter = iter(cmd_result)
        numthreads = 0
        for line in cmd_iter:
            if "Compute node summary" in line:
                break
            elif "NID" in line:
                continue
            else:
                key = line.split()
                numthreads = int(key[1])
        return numthreads

    def num_compute_vnodes(self, server):
        """
        Count the Cray compute nodes and return the value.
        """
        vnl = server.filter(MGR_OBJ_NODE,
                            {'resources_available.vntype': 'cray_compute'})
        return len(vnl["resources_available.vntype=cray_compute"])
Пример #8
0
class CrayUtils(object):

    """
    Cray specific utility class
    """
    node_status = []
    node_summary = {}
    cmd_output = []
    du = None

    def __init__(self):
        self.du = DshUtils()
        (self.node_status, self.node_summary) = self.parse_apstat_rn()

    def call_apstat(self, options):
        """
        Build the apstat command and run it.  Return the output of the command.

        :param options: options to pass to apstat command
        :type options: str
        :returns: the command output
        """
        hostname = socket.gethostname()
        platform = self.du.get_platform(hostname)
        apstat_env = os.environ
        apstat_cmd = "apstat"
        if 'cray' not in platform:
            return None
        if 'craysim' in platform:
            lib_path = '$LD_LIBRARY_PATH:/opt/alps/tester/usr/lib/'
            apstat_env['LD_LIBRARY_PATH'] = lib_path
            apstat_env['ALPS_CONFIG_FILE'] = '/opt/alps/tester/alps.conf'
            apstat_env['apsched_sharedDir'] = '/opt/alps/tester/'
            apstat_cmd = "/opt/alps/tester/usr/bin/apstat -d ."
        cmd_run = self.du.run_cmd(hostname, [apstat_cmd, options],
                                  as_script=True, wait_on_script=True,
                                  env=apstat_env)
        return cmd_run

    def parse_apstat_rn(self):
        """
        Parse the apstat command output for node status and summary

        :type options: str
        :returns: tuple of (node status, node summary)
        """
        status = []
        summary = {}
        count = 0
        options = '-rn'
        cmd_run = self.call_apstat(options)
        if cmd_run is None:
            return (status, summary)
        cmd_result = cmd_run['out']
        keys = cmd_result[0].split()
        # Add a key 'Mode' because 'State' is composed of two list items, e.g:
        # State = 'UP  B', where Mode = 'B'
        k2 = ['Mode']
        keys = keys[0:3] + k2 + keys[3:]
        cmd_iter = iter(cmd_result)
        for line in cmd_iter:
            if count == 0:
                count = 1
                continue
            if "Compute node summary" in line:
                summary_line = next(cmd_iter)
                summary_keys = summary_line.split()
                summary_data = next(cmd_iter).split()
                sum_index = 0
                for a in summary_keys:
                    summary[a] = summary_data[sum_index]
                    sum_index += 1
                break
            obj = {}
            line = line.split()
            for i, value in enumerate(line):
                obj[keys[i]] = value
                if keys[i] == 'State':
                    obj[keys[i]] = value + "  " + line[i + 1]
            # If there is no Apids in the apstat then use 'None' as the value
            if "Apids" in obj:
                pass
            else:
                obj["Apids"] = None
            status.append(obj)
        return (status, summary)

    def count_node_summ(self, cnsumm='up'):
        """
        Return the value of any one of the following parameters as shown in
        the 'Compute Node Summary' section of 'apstat -rn' output:
        arch, config, up, resv, use, avail, down

        :param cnsumm: parameter which is being queried, defaults to 'up'
        :type cnsumm: str
        :returns: value of parameter being queried
        """
        return int(self.node_summary[cnsumm])

    def count_node_state(self, state='UP  B'):
        """
        Return how many nodes have a certain 'State' value.

        :param state: parameter which is being queried, defaults to 'UP  B'
        :type state: str
        :returns: count of how many nodes have the state
        """
        count = 0
        status = self.node_status
        for stat in status:
            if stat['State'] == state:
                count += 1
        return count

    def get_numthreads(self, nid):
        """
        Returns the number of hyperthread for the given node
        """
        options = '-N %d -n -f "nid,c/cu"' % int(nid)
        cmd_run = self.call_apstat(options)
        if cmd_run is None:
            return None
        cmd_result = cmd_run['out']
        cmd_iter = iter(cmd_result)
        numthreads = 0
        for line in cmd_iter:
            if "Compute node summary" in line:
                break
            elif "NID" in line:
                continue
            else:
                key = line.split()
                numthreads = int(key[1])
        return numthreads
Пример #9
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxsize - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host,
                      path=datadir,
                      mode=0o755,
                      parents=True,
                      logerr=False,
                      level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0)
                and (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return

        servers = getattr(_test, 'servers', None)
        if servers is not None:
            server_host = servers.values()[0].shortname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        moms = getattr(_test, 'moms', None)
        comms = getattr(_test, 'comms', None)
        client = getattr(_test.servers.values()[0], 'client', None)
        server = servers.values()[0]
        add_hosts = []
        if len(servers) > 1:
            for param in servers.values()[1:]:
                add_hosts.append(param.shortname)
        if moms is not None:
            for param in moms.values():
                add_hosts.append(param.shortname)
        if comms is not None:
            for param in comms.values():
                add_hosts.append(param.shortname)
        if client is not None:
            add_hosts.append(client.split('.')[0])

        add_hosts = list(set(add_hosts) - set([server_host]))

        pbs_snapshot_path = os.path.join(server.pbs_conf["PBS_EXEC"], "sbin",
                                         "pbs_snapshot")
        cur_user = self.du.get_current_user()
        cur_user_dir = pwd.getpwnam(cur_user).pw_dir
        cmd = [
            pbs_snapshot_path, '-H', server_host, '--daemon-logs', '2',
            '--accounting-logs', '2', '--with-sudo'
        ]
        if len(add_hosts) > 0:
            cmd += ['--additional-hosts=' + ','.join(add_hosts)]
        cmd += ['-o', cur_user_dir]
        ret = self.du.run_cmd(current_host,
                              cmd,
                              level=logging.DEBUG2,
                              logerr=False)
        if ret['rc'] != 0:
            _msg = 'Failed to get analysis information '
            _msg += 'on %s:' % server_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            if len(ret['out']) == 0:
                self.logger.error('Snapshot command failed')
                f.close()
                return

        snap_out = ret['out'][0]
        snap_out_dest = (snap_out.split(":")[1]).strip()

        dest = os.path.join(datadir, 'PBS_' + server_host + '.tar.gz')
        ret = self.du.run_copy(current_host,
                               snap_out_dest,
                               dest,
                               sudo=True,
                               level=logging.DEBUG2)
        self.du.rm(current_host,
                   path=snap_out_dest,
                   recursive=True,
                   force=True,
                   level=logging.DEBUG2)

        f.close()
        self.__save_data_count += 1
        _msg = 'Saved post analysis data'
        self.logger.info(_msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Пример #10
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 3
    logger = logging.getLogger(__name__)

    def __init__(self):
        self.sharedpath = None
        self.du = DshUtils()
        self.__syncth = None
        self.__queue = Queue.Queue()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, sharedpath):
        self.sharedpath = sharedpath

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        self.enabled = True

    def __get_sntnbi_name(self, test):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            return ('unknown', 'unknown', 'unknown')
        tn = getattr(_test, '_testMethodName', 'unknown')
        if (hasattr(_test, 'server')
                and (getattr(_test, 'server', None) is not None)):
            bi = _test.server.attributes['pbs_version']
        else:
            bi = 'unknown'
        return (sn, tn, bi)

    def __save_home(self, test, status):
        if hasattr(test, 'test'):
            _test = test.test
        elif hasattr(test, 'context'):
            _test = test.context
        else:
            # test does not have any PBS Objects, so just return
            return
        if not hasattr(_test, 'server'):
            # test does not have any PBS Objects, so just return
            return
        st = getattr(test, 'start_time', None)
        if st is not None:
            st = time.mktime(st.timetuple())
        else:
            st = time.time()
        st -= 180  # starttime - 3 min
        et = getattr(test, 'end_time', None)
        if et is not None:
            et = time.mktime(et.timetuple())
        else:
            et = time.time()
        hostname = socket.gethostname().split('.')[0]
        lp = os.environ.get('PBS_JOBID',
                            time.strftime("%Y%b%d_%H_%m_%S", time.localtime()))
        sn, tn, bi = self.__get_sntnbi_name(test)
        if getattr(_test, 'servers', None) is not None:
            shosts = map(lambda x: x.split('.')[0], _test.servers.host_keys())
        else:
            shosts = []
        if getattr(_test, 'schedulers', None) is not None:
            schosts = map(lambda x: x.split('.')[0],
                          _test.schedulers.host_keys())
        else:
            schosts = []
        if getattr(_test, 'moms', None) is not None:
            mhosts = map(lambda x: x.split('.')[0], _test.moms.host_keys())
        else:
            mhosts = []
        hosts = []
        hosts.extend(shosts)
        hosts.extend(schosts)
        hosts.extend(mhosts)
        hosts.append(hostname)
        hosts = sorted(set(hosts))
        for host in hosts:
            confpath = self.du.get_pbs_conf_file(host)
            tmpdir = self.du.get_tempdir(host)
            datadir = os.path.join(tmpdir, bi, sn, hostname, tn, lp)
            _s = ['#!/bin/bash']
            _s += ['. %s' % (confpath)]
            _s += ['mkdir -p %s' % (datadir)]
            _s += ['chmod -R 0755 %s' % (datadir)]
            if host == _test.server.shortname:
                _l = '${PBS_EXEC}/bin/qstat -tf > %s/qstat_tf &' % (datadir)
                _s += [_l]
                _l = '${PBS_EXEC}/bin/pbsnodes -av > %s/pbsnodes &' % (datadir)
                _s += [_l]
                _l = '${PBS_EXEC}/bin/qmgr -c "p s"'
                _l += ' > %s/print_server &' % (datadir)
                _s += [_l]
            _s += ['echo "%s" >> %s/uptime' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/uptime' % (host, datadir)]
            _s += ['uptime >> %s/uptime' % (datadir)]
            _s += ['echo "" >> %s/uptime' % (datadir)]
            _s += ['echo "%s" >> %s/netstat' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/netstat' % (host, datadir)]
            _cmd = self.du.which(host, 'netstat')
            if _cmd == 'netstat':
                _cmd = 'ss'
            if sys.platform.startswith('linux'):
                _cmd += ' -ap'
            else:
                _cmd += ' -an'
            _s += ['%s >> %s/netstat' % (_cmd, datadir)]
            _s += ['echo "" >> %s/netstat' % (datadir)]
            _s += ['echo "%s" >> %s/ps' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/ps' % (host, datadir)]
            _s += ['ps -ef | grep pbs_ >> %s/ps' % (datadir)]
            _s += ['echo "" >> %s/ps' % (datadir)]
            _s += ['echo "%s" >> %s/df' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/df' % (host, datadir)]
            _s += ['df -h >> %s/df' % (datadir)]
            _s += ['echo "" >> %s/df' % (datadir)]
            _s += ['echo "%s" >> %s/vmstat' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/vmstat' % (host, datadir)]
            _s += ['vmstat >> %s/vmstat' % (datadir)]
            _s += ['echo "" >> %s/vmstat' % (datadir)]
            _dst = os.path.join(datadir, 'PBS_' + host)
            _s += ['cp -rp ${PBS_HOME} %s' % (_dst)]
            _s += ['tar -cf %s/datastore.tar %s/datastore' % (_dst, _dst)]
            _s += ['gzip -rf %s/datastore.tar' % (_dst)]
            _s += ['rm -rf %s/datastore' % (_dst)]
            _s += ['rm -rf %s/*_logs' % (_dst)]
            _s += ['rm -rf %s/server_priv/accounting' % (_dst)]
            _s += ['cp %s %s/pbs.conf.%s' % (confpath, _dst, host)]
            if host == hostname:
                _s += ['cat > %s/logfile_%s <<EOF' % (datadir, status)]
                _s += ['%s' % (getattr(test, 'err_in_string', ''))]
                _s += ['']
                _s += ['EOF']
            _s += ['wait']
            fd, fn = self.du.mkstemp(hostname, mode=0755, body='\n'.join(_s))
            os.close(fd)
            self.du.run_cmd(hostname, cmd=fn, sudo=True, logerr=False)
            self.du.rm(hostname, fn, force=True, sudo=True)
            svr = _test.servers[host]
            if svr is not None:
                self.__save_logs(svr, _dst, 'server_logs', st, et)
                _adst = os.path.join(_dst, 'server_priv')
                self.__save_logs(svr, _adst, 'accounting', st, et)
            if getattr(_test, 'moms', None) is not None:
                self.__save_logs(_test.moms[host], _dst, 'mom_logs', st, et)
            if getattr(_test, 'schedulers', None) is not None:
                self.__save_logs(_test.schedulers[host], _dst, 'sched_logs',
                                 st, et)
            if ((self.sharedpath is not None) and (self.__syncth is not None)):
                self.__queue.put((host, datadir, bi, sn, hostname, tn, lp))

    def __save_logs(self, obj, dst, name, st, et, jid=None):
        if name == 'accounting':
            logs = obj.log_lines('accounting',
                                 n='ALL',
                                 starttime=st,
                                 endtime=et)
            logs = map(lambda x: x + '\n', logs)
        elif name == 'tracejob':
            logs = obj.log_lines('tracejob', id=jid, n='ALL')
            name += '_' + jid
        else:
            logs = obj.log_lines(obj, n='ALL', starttime=st, endtime=et)
        f = open(os.path.join(dst, name), 'w+')
        f.writelines(logs)
        f.close()

    def begin(self):
        if self.sharedpath is not None:
            self.__syncth = SyncData(self.sharedpath, self.__queue)
            self.__syncth.daemon = True
            self.__syncth.start()

    def addError(self, test, err):
        self.__save_home(test, 'ERROR')

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL')

    def finalize(self, result):
        if ((self.sharedpath is not None) and (self.__syncth is not None)):
            while not self.__queue.empty():
                pass
            self.__syncth.stop()
            self.__syncth.join()
Пример #11
0
class CrayUtils(object):

    """
    Cray specific utility class
    """
    node_status = []
    node_summary = {}
    du = None

    def __init__(self):
        self.du = DshUtils()
        (self.node_status, self.node_summary) = self.parse_apstat_rn()

    def parse_apstat_rn(self):
        """
        Run apstat command on cray/craysim and parse its output

        :param options: options to pass to apstat command
        :type options: str
        :returns: tuple of (node status, node summary)
        """
        status = []
        summary = {}
        count = 0
        options = '-rn'
        hostname = socket.gethostname()
        platform = self.du.get_platform(hostname)
        apstat_env = os.environ
        apstat_cmd = "apstat"
        if 'cray' not in platform:
            return (status, summary)
        if 'craysim' in platform:
            lib_path = '$LD_LIBRARY_PATH:/opt/alps/tester/usr/lib/'
            apstat_env['LD_LIBRARY_PATH'] = lib_path
            apstat_env['ALPS_CONFIG_FILE'] = '/opt/alps/tester/alps.conf'
            apstat_env['apsched_sharedDir'] = '/opt/alps/tester/'
            apstat_cmd = "/opt/alps/tester/usr/bin/apstat -d ."
        cmd_run = self.du.run_cmd(hostname, [apstat_cmd, options],
                                  as_script=True, wait_on_script=True,
                                  env=apstat_env)
        cmd_result = cmd_run['out']
        keys = cmd_result[0].split()
        # Add a key 'Mode' because 'State' is composed of two list items, e.g:
        # State = 'UP  B', where Mode = 'B'
        k2 = ['Mode']
        keys = keys[0:3] + k2 + keys[3:]
        cmd_iter = iter(cmd_result)
        for line in cmd_iter:
            if count == 0:
                count = 1
                continue
            if "Compute node summary" in line:
                summary_line = next(cmd_iter)
                summary_keys = summary_line.split()
                summary_data = next(cmd_iter).split()
                sum_index = 0
                for a in summary_keys:
                    summary[a] = summary_data[sum_index]
                    sum_index += 1
                break
            obj = {}
            line = line.split()
            for i, value in enumerate(line):
                obj[keys[i]] = value
                if keys[i] == 'State':
                    obj[keys[i]] = value + "  " + line[i + 1]
            # If there is no Apids in the apstat then use 'None' as the value
            if "Apids" in obj:
                pass
            else:
                obj["Apids"] = None
            status.append(obj)
        return (status, summary)

    def count_node_summ(self, cnsumm='up'):
        """
        Return the value of any one of the following parameters as shown in
        the 'Compute Node Summary' section of 'apstat -rn' output:
        arch, config, up, resv, use, avail, down

        :param cnsumm: parameter which is being queried, defaults to 'up'
        :type cnsumm: str
        :returns: value of parameter being queried
        """
        return int(self.node_summary[cnsumm])

    def count_node_state(self, state='UP  B'):
        """
        Return how many nodes have a certain 'State' value.

        :param state: parameter which is being queried, defaults to 'UP  B'
        :type state: str
        :returns: count of how many nodes have the state
        """
        count = 0
        status = self.node_status
        for stat in status:
            if stat['State'] == state:
                count += 1
        return count
Пример #12
0
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host, path=datadir, mode=0755,
                      parents=True, logerr=False, level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0) and
                (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'],
                                'unsupported', 'pbs_diag')
        cur_user = self.du.get_current_user()
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', cur_user]
        cmd += ['-o', pwd.getpwnam(cur_user).pw_dir]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host, diag_out_r, diag_out_dest,
                               sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host, path=diag_out, sudo=True, force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host, path=path, sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host, path=diag_out_dest,
                       force=True, sudo=True, level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host, cmd, sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host, path=c, force=True,
                               sudo=True, level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host, cmd, sudo=True, cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host, cmd, sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host, diag_out_dest, sudo=True,
                       recursive=True, force=True, level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host, diag_out_dest + '.tar.gz',
                               dest, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host, path=diag_out_dest + '.tar.gz',
                   force=True, sudo=True, level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Пример #13
0
class PTLTestData(Plugin):
    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host,
                      path=datadir,
                      mode=0755,
                      parents=True,
                      logerr=False,
                      level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0)
                and (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        svr = getattr(_test, 'server', None)
        if svr is not None:
            svr_host = svr.hostname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        pbs_diag = os.path.join(svr.pbs_conf['PBS_EXEC'], 'unsupported',
                                'pbs_diag')
        cmd = [pbs_diag, '-f', '-d', '2']
        cmd += ['-u', self.du.get_current_user()]
        if len(svr.jobs) > 0:
            cmd += ['-j', ','.join(svr.jobs.keys())]
        ret = self.du.run_cmd(svr_host, cmd, sudo=True, level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to get diag information for '
            _msg += 'on %s:' % svr_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            diag_re = r"(?P<path>\/.*\/pbs_diag_[\d]+_[\d]+\.tar\.gz).*"
            m = re.search(diag_re, '\n'.join(ret['out']))
            if m is not None:
                diag_out = m.group('path')
            else:
                _msg = 'Failed to find generated diag path in below output:'
                _msg += '\n\n' + '-' * 80 + '\n'
                _msg += '\n'.join(ret['out']) + '\n'
                _msg += '-' * 80 + '\n\n'
                f.write(_msg)
                self.logger.error(_msg)
                f.close()
                return
        diag_out_dest = os.path.join(datadir, os.path.basename(diag_out))
        if not self.du.is_localhost(svr_host):
            diag_out_r = svr_host + ':' + diag_out
        else:
            diag_out_r = diag_out
        ret = self.du.run_copy(current_host,
                               diag_out_r,
                               diag_out_dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed to copy generated diag from'
            _msg += ' %s to %s' % (diag_out_r, diag_out_dest)
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            self.du.rm(svr_host,
                       path=diag_out,
                       sudo=True,
                       force=True,
                       level=logging.DEBUG2)
        cores = []
        dir_list = ['server_priv', 'sched_priv', 'mom_priv']
        for d in dir_list:
            path = os.path.join(svr.pbs_conf['PBS_HOME'], d)
            files = self.du.listdir(hostname=svr_host,
                                    path=path,
                                    sudo=True,
                                    level=logging.DEBUG2)
            for _f in files:
                if os.path.basename(_f).startswith('core'):
                    cores.append(_f)
        cores = list(set(cores))
        if len(cores) > 0:
            cmd = ['gunzip', diag_out_dest]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed unzip generated diag at %s:' % diag_out_dest
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            diag_out_dest = diag_out_dest.rstrip('.gz')
            cmd = ['tar', '-xf', diag_out_dest, '-C', datadir]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed extract generated diag %s' % diag_out_dest
                _msg += ' to %s:' % datadir
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(hostname=current_host,
                       path=diag_out_dest,
                       force=True,
                       sudo=True,
                       level=logging.DEBUG2)
            diag_out_dest = diag_out_dest.rstrip('.tar')
            for c in cores:
                cmd = [pbs_diag, '-g', c]
                ret = self.du.run_cmd(svr_host,
                                      cmd,
                                      sudo=True,
                                      level=logging.DEBUG2)
                if ret['rc'] != 0:
                    _msg = 'Failed to get core file information for '
                    _msg += '%s on %s:' % (c, svr_host)
                    _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                    f.write(_msg + '\n')
                    self.logger.error(_msg)
                else:
                    of = os.path.join(diag_out_dest,
                                      os.path.basename(c) + '.out')
                    _f = open(of, 'w+')
                    _f.write('\n'.join(ret['out']) + '\n')
                    _f.close()
                    self.du.rm(hostname=svr_host,
                               path=c,
                               force=True,
                               sudo=True,
                               level=logging.DEBUG2)
            cmd = ['tar', '-cf', diag_out_dest + '.tar']
            cmd += [os.path.basename(diag_out_dest)]
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  cwd=datadir,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed generate tarball of diag directory'
                _msg += ' %s' % diag_out_dest
                _msg += ' after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            cmd = ['gzip', diag_out_dest + '.tar']
            ret = self.du.run_cmd(current_host,
                                  cmd,
                                  sudo=True,
                                  level=logging.DEBUG2)
            if ret['rc'] != 0:
                _msg = 'Failed compress tarball of diag %s' % diag_out_dest
                _msg += '.tar after adding core(s) information in it:'
                _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
                f.write(_msg + '\n')
                self.logger.error(_msg)
                f.close()
                return
            self.du.rm(current_host,
                       diag_out_dest,
                       sudo=True,
                       recursive=True,
                       force=True,
                       level=logging.DEBUG2)
        else:
            diag_out_dest = diag_out_dest.rstrip('.tar.gz')
        dest = os.path.join(datadir,
                            'PBS_' + current_host.split('.')[0] + '.tar.gz')
        ret = self.du.run_copy(current_host,
                               diag_out_dest + '.tar.gz',
                               dest,
                               sudo=True,
                               level=logging.DEBUG2)
        if ret['rc'] != 0:
            _msg = 'Failed rename tarball of diag from %s' % diag_out_dest
            _msg += '.tar.gz to %s:' % dest
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        self.du.rm(current_host,
                   path=diag_out_dest + '.tar.gz',
                   force=True,
                   sudo=True,
                   level=logging.DEBUG2)
        f.close()
        self.__save_data_count += 1
        _msg = 'Successfully saved post analysis data'
        self.logger.log(logging.DEBUG2, _msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Пример #14
0
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxsize - 6
    logger = logging.getLogger(__name__)

    def __init__(self):
        Plugin.__init__(self)
        self.post_data_dir = None
        self.max_postdata_threshold = None
        self.__save_data_count = 0
        self.__priv_sn = ''
        self.du = DshUtils()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, post_data_dir, max_postdata_threshold):
        self.post_data_dir = post_data_dir
        self.max_postdata_threshold = max_postdata_threshold

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        if self.post_data_dir is not None:
            self.enabled = True
        else:
            self.enabled = False

    def __save_home(self, test, status, err=None):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            # test does not have any PBS Objects, so just return
            return
        if self.__priv_sn != sn:
            self.__save_data_count = 0
            self.__priv_sn = sn
        # Saving home might take time so disable timeout
        # handler set by runner
        tn = getattr(_test, '_testMethodName', 'unknown')
        testlogs = getattr(test, 'captured_logs', '')
        datadir = os.path.join(self.post_data_dir, sn, tn)
        if os.path.exists(datadir):
            _msg = 'Old post analysis data exists at %s' % datadir
            _msg += ', skipping saving data for this test case'
            self.logger.warn(_msg)
            _msg = 'Please remove old directory or'
            _msg += ' provide different directory'
            self.logger.warn(_msg)
            return
        if getattr(test, 'old_sigalrm_handler', None) is not None:
            _h = getattr(test, 'old_sigalrm_handler')
            signal.signal(signal.SIGALRM, _h)
            signal.alarm(0)
        self.logger.log(logging.DEBUG2, 'Saving post analysis data...')
        current_host = socket.gethostname().split('.')[0]
        self.du.mkdir(current_host, path=datadir, mode=0o755,
                      parents=True, logerr=False, level=logging.DEBUG2)
        if err is not None:
            if isclass(err[0]) and issubclass(err[0], SkipTest):
                status = 'SKIP'
                status_data = 'Reason = %s' % (err[1])
            else:
                if isclass(err[0]) and issubclass(err[0], TimeOut):
                    status = 'TIMEDOUT'
                status_data = getattr(test, 'err_in_string', '')
        else:
            status_data = ''
        logfile = os.path.join(datadir, 'logfile_' + status)
        f = open(logfile, 'w+')
        f.write(testlogs + '\n')
        f.write(status_data + '\n')
        f.write('test duration: %s\n' % str(getattr(test, 'duration', '0')))
        if status in ('PASS', 'SKIP'):
            # Test case passed or skipped, no need to save post analysis data
            f.close()
            return
        if ((self.max_postdata_threshold != 0) and
                (self.__save_data_count >= self.max_postdata_threshold)):
            _msg = 'Total number of saved post analysis data for this'
            _msg += ' testsuite is exceeded max postdata threshold'
            _msg += ' (%d)' % self.max_postdata_threshold
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return

        servers = getattr(_test, 'servers', None)
        if servers is not None:
            server_host = servers.values()[0].shortname
        else:
            _msg = 'Could not find Server Object in given test object'
            _msg += ', skipping saving post analysis data'
            f.write(_msg + '\n')
            self.logger.warning(_msg)
            f.close()
            return
        moms = getattr(_test, 'moms', None)
        comms = getattr(_test, 'comms', None)
        client = getattr(_test.servers.values()[0], 'client', None)
        server = servers.values()[0]
        add_hosts = []
        if len(servers) > 1:
            for param in servers.values()[1:]:
                add_hosts.append(param.shortname)
        if moms is not None:
            for param in moms.values():
                add_hosts.append(param.shortname)
        if comms is not None:
            for param in comms.values():
                add_hosts.append(param.shortname)
        if client is not None:
            add_hosts.append(client.split('.')[0])

        add_hosts = list(set(add_hosts) - set([server_host]))

        pbs_snapshot_path = os.path.join(
            server.pbs_conf["PBS_EXEC"], "sbin", "pbs_snapshot")
        cur_user = self.du.get_current_user()
        cur_user_dir = pwd.getpwnam(cur_user).pw_dir
        cmd = [
            pbs_snapshot_path,
            '-H', server_host,
            '--daemon-logs',
            '2',
            '--accounting-logs',
            '2',
            '--with-sudo'
            ]
        if len(add_hosts) > 0:
            cmd += ['--additional-hosts=' + ','.join(add_hosts)]
        cmd += ['-o', cur_user_dir]
        ret = self.du.run_cmd(current_host, cmd, level=logging.DEBUG2,
                              logerr=False)
        if ret['rc'] != 0:
            _msg = 'Failed to get analysis information for '
            _msg += 'on %s:' % servers_host
            _msg += '\n\n' + '\n'.join(ret['err']) + '\n\n'
            f.write(_msg + '\n')
            self.logger.error(_msg)
            f.close()
            return
        else:
            if len(ret['out']) == 0:
                self.logger.error('Snapshot command failed')
                f.close()
                return

        snap_out = ret['out'][0]
        snap_out_dest = (snap_out.split(":")[1]).strip()

        dest = os.path.join(datadir,
                            'PBS_' + server_host + '.tar.gz')
        ret = self.du.run_copy(current_host, snap_out_dest,
                               dest, sudo=True, level=logging.DEBUG2)
        self.du.rm(current_host, path=snap_out_dest,
                   recursive=True, force=True, level=logging.DEBUG2)

        f.close()
        self.__save_data_count += 1
        _msg = 'Saved post analysis data'
        self.logger.info(_msg)

    def addError(self, test, err):
        self.__save_home(test, 'ERROR', err)

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL', err)

    def addSuccess(self, test):
        self.__save_home(test, 'PASS')
Пример #15
0
    def _cleanup(self):
        self.logger.info('Cleaning up temporary files')
        du = DshUtils()
        hosts = set(self.param_dict['moms']).union(
            set(self.param_dict['servers']))
        for user in PBS_USERS:
            self.logger.debug('Cleaning %s\'s home directory' % (str(user)))
            runas = PbsUser.get_user(user)
            for host in hosts:
                ret = du.run_cmd(host, cmd=['echo', '$HOME'], sudo=True,
                                 runas=runas, logerr=False, as_script=True,
                                 level=logging.DEBUG)
                if ret['rc'] == 0:
                    path = ret['out'][0].strip()
                else:
                    return None
                ftd = []
                files = du.listdir(host, path=path, runas=user,
                                   level=logging.DEBUG)
                bn = os.path.basename
                ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
                ftd.extend([f for f in files if bn(f).startswith('STDIN')])

                if len(ftd) > 1000:
                    for i in range(0, len(ftd), 1000):
                        j = i + 1000
                        du.rm(host, path=ftd[i:j], runas=user,
                              force=True, level=logging.DEBUG)

        root_dir = os.sep
        dirlist = set([os.path.join(root_dir, 'tmp'),
                       os.path.join(root_dir, 'var', 'tmp')])
        # get tmp dir from the environment
        for envname in 'TMPDIR', 'TEMP', 'TMP':
            dirname = os.getenv(envname)
            if dirname:
                dirlist.add(dirname)

        p = re.compile(r'^pbs\.\d+')
        for tmpdir in dirlist:
            # list the contents of each tmp dir and
            # get the file list to be deleted
            self.logger.info('Cleaning up ' + tmpdir + ' dir')
            ftd = []
            files = du.listdir(path=tmpdir)
            bn = os.path.basename
            ftd.extend([f for f in files if bn(f).startswith('PtlPbs')])
            ftd.extend([f for f in files if bn(f).startswith('STDIN')])
            ftd.extend([f for f in files if bn(f).startswith('pbsscrpt')])
            ftd.extend([f for f in files if bn(f).startswith('pbs.conf.')])
            ftd.extend([f for f in files if p.match(bn(f))])
            for f in ftd:
                du.rm(path=f, sudo=True, recursive=True, force=True,
                      level=logging.DEBUG)
        for f in du.tmpfilelist:
            du.rm(path=f, sudo=True, force=True, level=logging.DEBUG)
        del du.tmpfilelist[:]
        tmpdir = tempfile.gettempdir()
        os.chdir(tmpdir)
        tmppath = os.path.join(tmpdir, 'dejagnutemp%s' % os.getpid())
        if du.isdir(path=tmppath):
            du.rm(path=tmppath, recursive=True, sudo=True, force=True,
                  level=logging.DEBUG)
Пример #16
0
class PTLTestData(Plugin):

    """
    Save post analysis data on test cases failure or error
    """
    name = 'PTLTestData'
    score = sys.maxint - 3
    logger = logging.getLogger(__name__)

    def __init__(self):
        self.sharedpath = None
        self.du = DshUtils()
        self.__syncth = None
        self.__queue = Queue.Queue()

    def options(self, parser, env):
        """
        Register command line options
        """
        pass

    def set_data(self, sharedpath):
        self.sharedpath = sharedpath

    def configure(self, options, config):
        """
        Configure the plugin and system, based on selected options
        """
        self.config = config
        self.enabled = True

    def __get_sntnbi_name(self, test):
        if hasattr(test, 'test'):
            _test = test.test
            sn = _test.__class__.__name__
        elif hasattr(test, 'context'):
            _test = test.context
            sn = _test.__name__
        else:
            return ('unknown', 'unknown', 'unknown')
        tn = getattr(_test, '_testMethodName', 'unknown')
        if (hasattr(_test, 'server') and
                (getattr(_test, 'server', None) is not None)):
            bi = _test.server.attributes['pbs_version']
        else:
            bi = 'unknown'
        return (sn, tn, bi)

    def __save_home(self, test, status):
        if hasattr(test, 'test'):
            _test = test.test
        elif hasattr(test, 'context'):
            _test = test.context
        else:
            # test does not have any PBS Objects, so just return
            return
        if not hasattr(_test, 'server'):
            # test does not have any PBS Objects, so just return
            return
        st = getattr(test, 'start_time', None)
        if st is not None:
            st = time.mktime(st.timetuple())
        else:
            st = time.time()
        st -= 180  # starttime - 3 min
        et = getattr(test, 'end_time', None)
        if et is not None:
            et = time.mktime(et.timetuple())
        else:
            et = time.time()
        hostname = socket.gethostname().split('.')[0]
        lp = os.environ.get('PBS_JOBID', time.strftime("%Y%b%d_%H_%m_%S",
                                                       time.localtime()))
        sn, tn, bi = self.__get_sntnbi_name(test)
        if getattr(_test, 'servers', None) is not None:
            shosts = map(lambda x: x.split('.')[0], _test.servers.host_keys())
        else:
            shosts = []
        if getattr(_test, 'schedulers', None) is not None:
            schosts = map(lambda x: x.split('.')[0],
                          _test.schedulers.host_keys())
        else:
            schosts = []
        if getattr(_test, 'moms', None) is not None:
            mhosts = map(lambda x: x.split('.')[0], _test.moms.host_keys())
        else:
            mhosts = []
        hosts = []
        hosts.extend(shosts)
        hosts.extend(schosts)
        hosts.extend(mhosts)
        hosts.append(hostname)
        hosts = sorted(set(hosts))
        for host in hosts:
            confpath = self.du.get_pbs_conf_file(host)
            tmpdir = self.du.get_tempdir(host)
            datadir = os.path.join(tmpdir, bi, sn, hostname, tn, lp)
            _s = ['#!/bin/bash']
            _s += ['. %s' % (confpath)]
            _s += ['mkdir -p %s' % (datadir)]
            _s += ['chmod -R 0755 %s' % (datadir)]
            if host == _test.server.shortname:
                _l = '${PBS_EXEC}/bin/qstat -tf > %s/qstat_tf &' % (datadir)
                _s += [_l]
                _l = '${PBS_EXEC}/bin/pbsnodes -av > %s/pbsnodes &' % (datadir)
                _s += [_l]
                _l = '${PBS_EXEC}/bin/qmgr -c "p s"'
                _l += ' > %s/print_server &' % (datadir)
                _s += [_l]
            _s += ['echo "%s" >> %s/uptime' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/uptime' % (host, datadir)]
            _s += ['uptime >> %s/uptime' % (datadir)]
            _s += ['echo "" >> %s/uptime' % (datadir)]
            _s += ['echo "%s" >> %s/netstat' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/netstat' % (host, datadir)]
            _cmd = self.du.which(host, 'netstat')
            if _cmd == 'netstat':
                _cmd = 'ss'
            if sys.platform.startswith('linux'):
                _cmd += ' -ap'
            else:
                _cmd += ' -an'
            _s += ['%s >> %s/netstat' % (_cmd, datadir)]
            _s += ['echo "" >> %s/netstat' % (datadir)]
            _s += ['echo "%s" >> %s/ps' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/ps' % (host, datadir)]
            _s += ['ps -ef | grep pbs_ >> %s/ps' % (datadir)]
            _s += ['echo "" >> %s/ps' % (datadir)]
            _s += ['echo "%s" >> %s/df' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/df' % (host, datadir)]
            _s += ['df -h >> %s/df' % (datadir)]
            _s += ['echo "" >> %s/df' % (datadir)]
            _s += ['echo "%s" >> %s/vmstat' % ('*' * 80, datadir)]
            _s += ['echo "On host : %s" >> %s/vmstat' % (host, datadir)]
            _s += ['vmstat >> %s/vmstat' % (datadir)]
            _s += ['echo "" >> %s/vmstat' % (datadir)]
            _dst = os.path.join(datadir, 'PBS_' + host)
            _s += ['cp -rp ${PBS_HOME} %s' % (_dst)]
            _s += ['tar -cf %s/datastore.tar %s/datastore' % (_dst, _dst)]
            _s += ['gzip -rf %s/datastore.tar' % (_dst)]
            _s += ['rm -rf %s/datastore' % (_dst)]
            _s += ['rm -rf %s/*_logs' % (_dst)]
            _s += ['rm -rf %s/server_priv/accounting' % (_dst)]
            _s += ['cp %s %s/pbs.conf.%s' % (confpath, _dst, host)]
            if host == hostname:
                _s += ['cat > %s/logfile_%s <<EOF' % (datadir, status)]
                _s += ['%s' % (getattr(test, 'err_in_string', ''))]
                _s += ['']
                _s += ['EOF']
            _s += ['wait']
            fd, fn = self.du.mkstemp(hostname, mode=0755, body='\n'.join(_s))
            os.close(fd)
            self.du.run_cmd(hostname, cmd=fn, sudo=True, logerr=False)
            self.du.rm(hostname, fn, force=True, sudo=True)
            svr = _test.servers[host]
            if svr is not None:
                self.__save_logs(svr, _dst, 'server_logs', st, et)
                _adst = os.path.join(_dst, 'server_priv')
                self.__save_logs(svr, _adst, 'accounting', st, et)
            if getattr(_test, 'moms', None) is not None:
                self.__save_logs(_test.moms[host], _dst, 'mom_logs', st, et)
            if getattr(_test, 'schedulers', None) is not None:
                self.__save_logs(_test.schedulers[host], _dst, 'sched_logs',
                                 st, et)
            if ((self.sharedpath is not None) and (self.__syncth is not None)):
                self.__queue.put((host, datadir, bi, sn, hostname, tn, lp))

    def __save_logs(self, obj, dst, name, st, et, jid=None):
        if name == 'accounting':
            logs = obj.log_lines('accounting', n='ALL', starttime=st,
                                 endtime=et)
            logs = map(lambda x: x + '\n', logs)
        elif name == 'tracejob':
            logs = obj.log_lines('tracejob', id=jid, n='ALL')
            name += '_' + jid
        else:
            logs = obj.log_lines(obj, n='ALL', starttime=st, endtime=et)
        f = open(os.path.join(dst, name), 'w+')
        f.writelines(logs)
        f.close()

    def begin(self):
        if self.sharedpath is not None:
            self.__syncth = SyncData(self.sharedpath, self.__queue)
            self.__syncth.daemon = True
            self.__syncth.start()

    def addError(self, test, err):
        self.__save_home(test, 'ERROR')

    def addFailure(self, test, err):
        self.__save_home(test, 'FAIL')

    def finalize(self, result):
        if ((self.sharedpath is not None) and (self.__syncth is not None)):
            while not self.__queue.empty():
                pass
            self.__syncth.stop()
            self.__syncth.join()