def stx_ansible(node, localhost_config): LOG.Info("# Config Controller with config file %s" % localhost_config) CK_RET(node.copy_to_node(localhost_config, "~/")) # Using ansible cmd = [] cmd.append("export PATH=/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin") cmd.append("export HOSTNAME=localhost") cmd.append( "BOOTSTRAP_YML=`find /usr/share/ansible/stx-ansible/playbooks -name bootstrap.yml`" ) cmd.append("ansible-playbook " "$BOOTSTRAP_YML " "-e \"ansible_become_pass=%s\"" % node.get_password()) retval, retlog = node.ssh( ";".join(cmd), logfile=getlogfile("controller-0.00_ansible.log")) LOG.print_log(retlog) if retval == 0: for l in retlog: if l.find("failed=") >= 0: import re failed = re.compile(".*failed=(\d*)").match(l).group(1) if failed != "0": LOG.Error( "##### ERROR: ansible doesn't return good result.") return -1 else: LOG.Error("##### ERROR: ansible script exited abnormally.") return -1 return 0
def normalize_testnodes_config(): ALL_POSIBLE_SYSTEM_MODE = ["multi", "simplex", "duplex", "multi_aio"] if args.system_mode: dplmnt["system_mode"] = args.system_mode CK_RET(dplmnt["system_mode"] in ALL_POSIBLE_SYSTEM_MODE, log="Wrong System Mode.") if iskvm(): if args.compute_num != None: dplmnt["compute_num"] = args.compute_num if args.storage_num != None: dplmnt["storage_num"] = args.storage_num if args.virtimg_dir: dplmnt['vm_img_location'] = os.path.abspath(args.virtimg_dir) CK_RET(os.path.exists(dplmnt["vm_img_location"]), log="%s not exist." % dplmnt['vm_img_location']) if args.prefix: dplmnt['vm_prefix_name'] = args.prefix if isbm(): configfiles = [] if args.bm_controller0: CK_RET(UTILS.check_file_exist(args.bm_controller0), log="bm machine not exist.") dplmnt["bm_controller0"] = args.bm_controller0 if args.bm_controller1: CK_RET(UTILS.check_file_exist(args.bm_controller1), log="bm machine not exist.") dplmnt["bm_controller1"] = args.bm_controller1 dplmnt["bm_storage_nodes"] = [] dplmnt["bm_worker_nodes"] = [] if args.bm_storages: for n in args.bm_storages: CK_RET(UTILS.check_file_exist(n), log="bm machine not exist.") dplmnt["bm_storage_nodes"] = args.bm_storages dplmnt["storage_num"] = len(dplmnt["bm_storage_nodes"]) if args.bm_workers: for n in args.bm_workers: CK_RET(UTILS.check_file_exist(n), log="bm machine not exist.") dplmnt["bm_worker_nodes"] = args.bm_workers dplmnt["compute_num"] = len(dplmnt["bm_worker_nodes"]) ## Normalize Test Config if dplmnt["system_mode"] == "duplex": dplmnt["controller_num"] = 2 dplmnt["compute_num"] = 0 dplmnt["storage_num"] = 0 if isbm() and not (dplmnt["bm_controller0"] and dplmnt["bm_controller1"]): LOG.Error("BM: no bm machine provided.") exit_with_failure() elif dplmnt["system_mode"] == "simplex": dplmnt["controller_num"] = 1 dplmnt["compute_num"] = 0 dplmnt["storage_num"] = 0 if isbm() and not dplmnt["bm_controller0"]: LOG.Error("BM: no bm machine provided.") exit_with_failure() else: # For containerized version multi-node, there must have 2 controllers. dplmnt["controller_num"] = 2 if dplmnt['system_mode'] == 'multi_aio': dplmnt["storage_num"] = 0 if iskvm() and dplmnt["compute_num"] < 1: dplmnt["compute_num"] = 1 if isbm(): CK_RET(dplmnt["compute_num"] > 0, log="BM: no bm machine provided.")
def load_json_config(json_config): if not json_config: return None CONFIG = {} try: with open(json_config, "r") as f: CONFIG = json.load(f) except json.decoder.JSONDecodeError as e: LOG.Error("%s is not in json format." % json_config) exit(1) except FileNotFoundError as e: LOG.Error("%s not found." % json_config) exit(1) return CONFIG
def __wait_for_new_host(node0, node, minutes): hostname = node.get_hostname() nodeid = node.get_nodeid() personality = node.get_personality() node.power_off() node.boot() def check(): retval, retlog = node0.stx_cmd("system host-list | grep %d" % nodeid, silent=True) return retval == 0 ret = UTILS.wait_for("New Host %d occur." % nodeid, check, slowdown=STX_SLOWDOWN).run(minutes) if ret == 0: LOG.Info("Found host node (#%d) as %s, set personality as %s." % (nodeid, hostname, personality)) CK_RET( node0.stx_cmd("system host-update %d personality=%s hostname=%s " "boot_device=%s rootfs_device=%s " "console=tty0" % (nodeid, personality, hostname, node.get_boot_device(), node.get_rootfs_device()))) return True else: LOG.Error("Failed to found new host node %s." % hostname) return False
def add_config(target, config_name): folder = "ansibleconfig" suffix = ".yml" src = os.path.join(CurrPath, folder, config_name + suffix) ret = cmdhost("cat %s >> %s" % (src, target), silent=True) if ret != 0: LOG.Error("Failed to add ansible config %s to %s" % (src, target)) exit(-1)
def cmdhost(cmd, cwd=None, logfile=None, silent=False): realcmd = cmd result = "" retval = 0 if cwd != None: os.chdir(cwd) try: result = check_output(realcmd, shell=True).splitlines() except CalledProcessError as ecp: LOG.Error("ERROR: failed to run \"%s\": returned %s %s" % (cmd, ecp.returncode, ecp.output)) retval = ecp.returncode except Exception, error: LOG.Error("ERROR: failed to run \"%s\": %s" % (cmd, error)) LOG.Error("") retval = -1
def check_ret(result, func_exit_failure=None, log=None): if isinstance(result, bool): if not result: if func_exit_failure: func_exit_failure() LOG.Error(log) return -1 elif isinstance(result, int) and 0 != result: if func_exit_failure: func_exit_failure() LOG.Error(log) return -1 elif isinstance(result, tuple) and 0 != result[0]: if func_exit_failure: func_exit_failure() LOG.Error(log) return -1 return 0
def exit_with_failure(): global dplmnt dplmnt['status'] = "Fail" dplmnt['endtime'] = datetime.now().strftime('%H:%M:%S') LOG.Error("#### ERROR: failed at %s ####" % dplmnt['endtime']) if dplorch: if dplorch["current_step"] > STEP_CONTROLLER0: STX.get_system_logs(node_0) save_config() exit(1)
def shell(cmd, cwd=None, logfile=None, silent=False, DEBUG=False): realcmd = cmd if DEBUG: realcmd = "echo \"%s\"" % cmd result = None retval = 0 if cwd != None: os.chdir(cwd) try: result = SP.check_output(realcmd, shell=True).splitlines() except SP.CalledProcessError as ecp: if not silent: LOG.Error("ERROR: failed to run \"%s\": returned %s" % (cmd, ecp.returncode)) LOG.print_error(ecp.output.splitlines()) retval = ecp.returncode retlog = ecp.output.splitlines() except Exception as error: if not silent: LOG.Error("ERROR: failed to run \"%s\": %s" % (cmd, error)) retval = -1 result_str = [] if result: for l in result: if isinstance(l, bytes): result_str.append(l.decode(encoding="utf-8", errors="strict")) else: result_str = result break if logfile != None: with open(logfile, "w") as f: for l in result_str: f.write(l) f.write("\n") if cwd != None: os.chdir(CurrPath) return retval, result_str
def stx_get_mgmt_ip(node0, node): retval, retlog = node0.stx_cmd("system host-show %s" % node.get_hostname()) LOG.print_log(retlog) if retval == 0: for l in retlog: if l.find("mgmt_ip") >= 0: mgmt_ip = l.split()[3] node.set_mgmt_ip(mgmt_ip) return True LOG.Error("MGMT ip not found for %s." % node0.get_hostname()) return False
def get_node_step(node): hostname = node.get_hostname() personality = node.get_personality() if hostname == NODE.HOSTNAME_CONTROLLER0: return STEP_CONTROLLER0 elif hostname == NODE.HOSTNAME_CONTROLLER1: return STEP_CONTROLLER1 elif personality == NODE.PERSONALITY_STORAGE: return STEP_STORAGE elif personality == NODE.PERSONALITY_COMPUTE: return STEP_COMPUTE else: LOG.Error("No valid test step for the node %s." % hostname) return -1
def __umount_point(self, mnt_point): umounting_attempts = 3 while umounting_attempts > 0: ret, log = CMD.shell('sudo umount -l {}'.format(mnt_point)) if ret != 0 and umounting_attempts: LOG.Info('Failed to umount {}, retrying...'.format( mnt_point)) elif ret != 0 and not umounting_attempts: LOG.Error('Max umounting attempts reached, leaving ' 'installation') __print_errorlog("UMOUNT", log) exit(1) else: break umounting_attempts -= 1
def __stx_app_op(node0, op, helm_charts=None, appname=None, silent=False): retval = 0 if helm_charts or appname: cmd = "set -ex; system %s %s" % (op, helm_charts if helm_charts else appname) retval, retlog = node0.stx_cmd(cmd, silent=silent) elif op == APP_OP_LIST: retval, retlog = node0.stx_cmd("system %s" % op, silent=silent) else: LOG.Error( "No valid helm charts or app name provided, or unsupported operation." ) return -1, None if retval: LOG.print_error(retlog) else: LOG.print_log(retlog) return retval, retlog
def wait_for_app_status_finish(node0, appname, minutes, current_stat): def exit_if_true(): stat, progress = __stx_app_stat(node0, appname) LOG.Info("current status: %s, %s" % (stat, progress)) if stat == None or stat == current_stat: # app not found or app status no change, keep waiting return False else: # status changed, exit the wait loop return True ret = UTILS.wait_for("app %s to finish \"%s\"" % (appname, current_stat), exit_if_true, interval=60, slowdown=STX_SLOWDOWN).run(minutes) if ret == -1: LOG.Error("Wait for %s %s timed out." % (appname, current_stat)) return __stx_app_stat(node0, appname)[0]
def stx_wait_for_platform_integ_app(node0, acceptable_status=[APP_APPLY_SUCCESS], exception_on_error=True): appname = "platform-integ-apps" stat = __stx_app_stat(node0, appname)[0] while stat not in acceptable_status: if stat in [APP_UPLOADING, APP_APPLYING]: stat = wait_for_app_status_finish(node0, appname, 20, stat) elif stat in [APP_UPLOAD_FAILED, APP_APPLY_FAILED]: LOG.Error("%s failed to be applied, exit with error." % appname) if exception_on_error: __exit_with_exception() else: break else: time.sleep(10) stat = __stx_app_stat(node0, appname)[0] __stx_app_op(node0, APP_OP_LIST)
def check_file_exist(f): if os.path.exists(f): return True LOG.Error("%s is not existing" % f) return False
def __print_errorlog(self, module, log): for l in log: LOG.Error(module + ": " + l)
install_mode = BM_PXE.MODE_UEFI_AIO_LL else: install_mode = BM_PXE.MODE_UEFI_AIO pxe_agent = BM_PXE.PxeAgent( pxe_server_config_json=pxe_config, iso=newiso, default_install=install_mode) pxe_agent.mount_iso() pxe_agent.prepare_for_node(node_0) # pxe_agent.check_pxe_services() dplorch['pxe_agent'] = pxe_agent else: LOG.Error("===== ERROR: Non-supported method to deploy (only kvm supported).") exit_with_failure() FINISH_STEP(STEP_ENV_CREATE) ######################################## # Deployment Step STEP_CONTROLLER0_INSTALL: # Install controller-0 ######################################### if RUN_STEP(STEP_CONTROLLER0_INSTALL): for node in dplorch["nodes"].keys(): if node_n(node).is_power_on(): node_n(node).power_off() time.sleep(5)
def Error(self, log): LOG.Error("%s: %s" % (self.name, log))
def stx_apply_application(node0, helm_charts): LOG.Info("Applying application with helm charts %s." % helm_charts) # copy the helm_charts to test folder and upload it to the target. charts_filename = os.path.basename(helm_charts) appname = os.path.splitext(charts_filename)[0] node0.copy_to_node(helm_charts, "~/") failures = 0 appstat = __stx_app_stat(node0, appname)[0] def handle_failure(failures): failures += 1 get_pod_logs(node0, logname="pod_logs_failed_%d" % failures) return failures while appstat != APP_APPLY_SUCCESS: LOG.Info("App %s current status: %s" % (appname, appstat)) if appstat == APP_NONE: retval, retlog = __stx_app_op(node0, APP_OP_UPLOAD, helm_charts=charts_filename) real_app_name = None for l in retlog: if l.find(" name ") >= 0: real_app_name = l.split("|")[2].strip() if not real_app_name: __exit_with_exception() LOG.Info("App Real Name: %s" % real_app_name) appname = real_app_name appstat = wait_for_app_status_finish(node0, appname, 20, APP_UPLOADING) elif appstat == APP_UPLOAD_SUCCESS: retval, retlog = __stx_app_op(node0, APP_OP_APPLY, appname=appname) appstat = wait_for_app_status_finish(node0, appname, 120, APP_APPLYING) elif appstat == APP_UPLOAD_FAILED: failures = handle_failure(failures) retval, retlog = __stx_app_op(node0, APP_OP_DELETE, appname=appname) appstat = wait_for_app_status_finish(node0, appname, 20, APP_DELETING) elif appstat == APP_APPLY_SUCCESS: # should not run here break elif appstat == APP_APPLY_FAILED: failures = handle_failure(failures) retval, retlog = __stx_app_op(node0, APP_OP_REMOVE, appname=appname) appstat = wait_for_app_status_finish(node0, appname, 30, APP_REMOVING) elif appstat == APP_REMOVE_FAILED: failures = handle_failure(failures) retval, retlog = __stx_app_op(node0, APP_OP_REMOVE, appname=appname) appstat = wait_for_app_status_finish(node0, appname, 30, APP_REMOVING) elif appstat == APP_DELETE_FAILED: # Fatal LOG.Error("Failed to delete app %s after %d failures" % (appname, failures)) __exit_with_exception() elif appstat in [ APP_UPLOADING, APP_APPLYING, APP_REMOVING, APP_DELETING ]: failures = handle_failure(failures) appstat = wait_for_app_status_finish(node0, appname, 20, appstat) else: # Fatal LOG.Error("Unknown application status %s" % appstat) __exit_with_exception() if failures >= 5: # Fatal LOG.Error("Application %s apply failed!" % app_name) __exit_with_exception() return appname