예제 #1
0
 def __init__(self):
     Config.__init__(self)
     self.client = SshUtil(self.ceph_adm_node, self.ceph_adm_user,
                           self.ceph_adm_password)
     self.utl = Utils(label="Waiting for {} ({}s)".format(
         self.ceph_health_is_ok_with, self.wait_for_health_ok_t))
     self.down_osds = self.get_down_osds()
     self.out_osds = self.get_out_osds()
     self.osds = self.get_osd_ids()
예제 #2
0
 def copy_gateway_logs(self):
     # Refactor to one method
     # move that to the config
     files = ['/var/log/messages']
     timestamp = time.time()
     for host in self.gateways:
         log.info("Collecting logs from {}".format(host))
         ssh = SshUtil(host, self.gateway_user, self.gateway_password)
         for fn in files:
             log.info("Collecting {}".format(fn))
             ssh.copy_file_from_host(fn, 'logs/{}'.format(fn + timestamp))
예제 #3
0
 def copy_vmware_logs(self):
     # Refactor to one method
     # move that to the config
     files = ['/var/log/vmkernel.log']
     timestamp = time.time()
     for host in self.esxi_hosts:
         log.info("Collecting logs from {}".format(host))
         ssh = SshUtil(host, self.user, self.password)
         for fn in files:
             log.info("Collecting {}".format(fn))
             ssh.copy_file_from_host(fn, 'logs/{}'.format(fn + timestamp))
예제 #4
0
 def copy_gateway_logs(self):
     # Refactor to one method
     # move that to the config
     files = ['/var/log/messages']
     timestamp = time.time()
     for host, creds in self.gateways.items():
         log.info("Collecting logs from {}".format(host))
         ssh = SshUtil(host, creds[0], creds[1])
         for fn in files:
             log.info("Collecting {}".format(fn))
             save_name = fn.replace('/', '_')
             ssh.copy_file_from_host(fn, 'logs/{}'.format(save_name+"_"+host+"_"+str(timestamp)))
예제 #5
0
 def hosts_up(self):
     """
     Check if all sources like (gateways, vmware, cephadm) are up.
     """
     log.info("Perfoming intial availability check for hosts.")
     state = True
     for gateway in self.gateways:
         ssh_ = SshUtil(gateway, self.gateway_user, self.gateway_password)
         try:
             log.debug(
                 "Checking gateway {} for availability".format(gateway))
             ssh_.run_cmd('ls')
         except:
             log.critical("Unexpected error: {}".format(sys.exc_info()[0]))
             log.critical(Fore.RED +
                          "The gateway {} is not reachable".format(gateway))
             state = False
     return state
예제 #6
0
    def stress_test(self):
        self.startup()
        abort = False
        while not abort:
            self.ops.queue.wait_for_any_finished_task()
            self.print_header()
            self.check_thresholds()
            seed = random.randrange(0,500)
            log.debug("seed: {}".format(seed))
            if seed in range(0,99):
                self.ops.clone_vm()

            if seed in range(100,199):
                self.destroy_vms(count=1)

            if seed in range(200,299):
                self.ops.clone_vm()

            if seed in range(300,320):
                gateway = self.ops.random_gateway()
                SshUtil(gateway, self.gateway_user, self.gateway_password, self.reboot_allowed).reboot()

            if seed in range(321,399):
                random_osd = self.cephops.random_osd()
                self.cephops.mark_osd(random_osd, 'out')

            if seed in range(400,500):
                log.info(Fore.RED + "Placeholder .. What to do more?")

            if not self.cephops.wait_for_health_ok(silent=False):
                log.info(Fore.RED + "Health of Ceph is not ok, aborting")
                abort = True

            if not self.vmwareops.health_ok():
                log.info(Fore.RED + "Health of VMWARE is not ok, aborting")
                abort = True
            self.print_footer()
            
        if abort:
            self.teardown()
예제 #7
0
 def clients(self):
     for node in self.esxi_hosts:
         yield SshUtil(self.host, self.user, self.password)
예제 #8
0
 def clients(self):
     for node, creds in self.esxi_hosts.items():
         yield SshUtil(node, creds[0], creds[1])
예제 #9
0
class CephOps(Config):
    def __init__(self):
        Config.__init__(self)
        self.client = SshUtil(self.ceph_adm_node, self.ceph_adm_user,
                              self.ceph_adm_password)
        self.utl = Utils(label="Waiting for {} ({}s)".format(
            self.ceph_health_is_ok_with, self.wait_for_health_ok_t))
        self.down_osds = self.get_down_osds()
        self.out_osds = self.get_out_osds()
        self.osds = self.get_osd_ids()

    def random_osd(self):
        log.debug("Selecting random OSD")
        return random.choice(self.osds)

    def health_ok(self, silent=True):
        stdout, _ = self.client.run_cmd('ceph health')
        if stdout.startswith(self.ceph_health_is_ok_with):
            if silent:
                log.debug("Health is ok")
            else:
                log.info("Health is ok")
            return True
        if silent:
            log.debug("Health is not ok")
        else:
            log.info("Health is not ok")
        return False

    def set_noup(self):
        log.debug("Setting NoUp")
        stdout, _ = self.client.run_cmd('ceph osd set noup')

    def unset_noup(self):
        log.debug("Unsetting NoUp")
        stdout, _ = self.client.run_cmd('ceph osd unset noup')

    def get_down_osds(self):
        data, _ = self.client.run_cmd('ceph osd tree -f json')
        down_osds = []
        for node_osd in ast.literal_eval(data)['nodes']:
            if node_osd.get('type') == 'osd' and 'osd' in node_osd.get('name'):
                if node_osd.get('status') == 'down':
                    down_osds.append(node_osd.get('id'))
        return down_osds

    def get_out_osds(self):
        data, _ = self.client.run_cmd('ceph osd tree -f json')
        out_osds = []
        for node_osd in ast.literal_eval(data)['nodes']:
            if node_osd.get('type') == 'osd' and 'osd' in node_osd.get('name'):
                if node_osd.get('reweight') != 1.0:
                    out_osds.append(node_osd.get('id'))
        return out_osds

    @property
    def max_down_osds(self):
        # Allow to have max 20% osds down
        return int(len(self.osds) * self.max_down_osds_ratio)

    @property
    def osd_down_count(self):
        return len(self.down_osds)

    @property
    def osd_out_count(self):
        return len(self.out_osds)

    def wait_for_health_ok(self, silent=True):
        """Wait for $timeout until ceph cluster is back to HEALTH_OK/WARN again
        """
        start = time.time()
        timeout = start + self.wait_for_health_ok_t
        health_ok = False
        while not health_ok:
            if not silent:
                self.utl.spinner()
            if self.health_ok():
                end = time.time()
                log.info("Ceph's health is okay after {} seconds".format(
                    round(end - start, 4)))
                health_ok = True
            if time.time() > timeout:
                health_ok = False
            time.sleep(5)
        return health_ok

    def get_osd_ids(self):
        log.debug("Scraping OSDs")
        ids, _ = self.client.run_cmd('ceph osd ls -f json')
        return ast.literal_eval(ids)

    def mark_osd(self, osd_id, state):
        cmd = "ceph osd {} {}".format(state, osd_id)
        stdout, stderr = self.client.run_cmd(cmd)
        if stderr:
            log.info(stderr)
        if not stderr and stdout != 'marked {} osd.{}.'.format(state, osd_id):
            log.info(stdout)