Exemplo n.º 1
0
def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios,
        wait_duration):
    try:
        # Loop to run the scenarios starts here
        for pod_scenario in scenarios_list:
            if len(pod_scenario) > 1:
                pre_action_output = post_actions.run(kubeconfig_path,
                                                     pod_scenario[1])
            else:
                pre_action_output = ""
            scenario_logs = runcommand.invoke(
                "powerfulseal autonomous --use-pod-delete-instead-"
                "of-ssh-kill --policy-file %s --kubeconfig %s "
                "--no-cloud --inventory-kubernetes --headless" %
                (pod_scenario[0], kubeconfig_path))

            # Display pod scenario logs/actions
            print(scenario_logs)

            logging.info("Scenario: %s has been successfully injected!" %
                         (pod_scenario[0]))
            logging.info("Waiting for the specified duration: %s" %
                         (wait_duration))
            time.sleep(wait_duration)

            failed_post_scenarios = post_actions.check_recovery(
                kubeconfig_path, pod_scenario, failed_post_scenarios,
                pre_action_output)
            cerberus.publish_kraken_status(config, failed_post_scenarios)
    except Exception as e:
        logging.error("Failed to run scenario: %s. Encountered the following "
                      "exception: %s" % (pod_scenario[0], e))
    return failed_post_scenarios
Exemplo n.º 2
0
def container_run(kubeconfig_path, scenarios_list, config, failed_post_scenarios, wait_duration):
    for container_scenario_config in scenarios_list:
        if len(container_scenario_config) > 1:
            pre_action_output = post_actions.run(kubeconfig_path, container_scenario_config[1])
        else:
            pre_action_output = ""
        with open(container_scenario_config[0], "r") as f:
            cont_scenario_config = yaml.full_load(f)
            for cont_scenario in cont_scenario_config["scenarios"]:
                # capture start time
                start_time = int(time.time())
                killed_containers = container_killing_in_pod(cont_scenario)

                if len(container_scenario_config) > 1:
                    try:
                        failed_post_scenarios = post_actions.check_recovery(
                            kubeconfig_path, container_scenario_config, failed_post_scenarios, pre_action_output
                        )
                    except Exception as e:
                        logging.error("Failed to run post action checks: %s" % e)
                        sys.exit(1)
                else:
                    failed_post_scenarios = check_failed_containers(
                        killed_containers, cont_scenario.get("retry_wait", 120)
                    )

                logging.info("Waiting for the specified duration: %s" % (wait_duration))
                time.sleep(wait_duration)

                # capture end time
                end_time = int(time.time())

                # publish cerberus status
                cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)
                logging.info("")
Exemplo n.º 3
0
def run(kubeconfig_path, scenarios_list, config, failed_post_scenarios,
        wait_duration):
    # Loop to run the scenarios starts here
    for pod_scenario in scenarios_list:
        if len(pod_scenario) > 1:
            pre_action_output = post_actions.run(kubeconfig_path,
                                                 pod_scenario[1])
        else:
            pre_action_output = ""
        try:
            # capture start time
            start_time = int(time.time())

            input = serialization.load_from_file(pod_scenario)

            s = pod_plugin.get_schema()
            input_data: pod_plugin.KillPodConfig = s.unserialize_input(
                "pod", input)

            if kubeconfig_path is not None:
                input_data.kubeconfig_path = kubeconfig_path

            output_id, output_data = s.call_step("pod", input_data)

            if output_id == "error":
                data: pod_plugin.PodErrorOutput = output_data
                logging.error("Failed to run pod scenario: {}".format(
                    data.error))
            else:
                data: pod_plugin.PodSuccessOutput = output_data
                for pod in data.pods:
                    print("Deleted pod {} in namespace {}\n".format(
                        pod.pod_name, pod.pod_namespace))
        except Exception as e:
            logging.error(
                "Failed to run scenario: %s. Encountered the following "
                "exception: %s" % (pod_scenario[0], e))
            sys.exit(1)

        logging.info("Scenario: %s has been successfully injected!" %
                     (pod_scenario[0]))
        logging.info("Waiting for the specified duration: %s" %
                     (wait_duration))
        time.sleep(wait_duration)

        try:
            failed_post_scenarios = post_actions.check_recovery(
                kubeconfig_path, pod_scenario, failed_post_scenarios,
                pre_action_output)
        except Exception as e:
            logging.error("Failed to run post action checks: %s" % e)
            sys.exit(1)

        # capture end time
        end_time = int(time.time())

        # publish cerberus status
        cerberus.publish_kraken_status(config, failed_post_scenarios,
                                       start_time, end_time)
    return failed_post_scenarios
def run(scenarios_list, config, wait_duration, failed_post_scenarios, kubeconfig_path):
    for scenario_config in scenarios_list:
        if len(scenario_config) > 1:
            pre_action_output = post_actions.run(kubeconfig_path, scenario_config[1])
        else:
            pre_action_output = ""
        with open(scenario_config[0], "r") as f:
            scenario_config_yaml = yaml.full_load(f)
            for scenario in scenario_config_yaml["scenarios"]:
                scenario_namespace = scenario.get("namespace", "^.*$")
                scenario_label = scenario.get("label_selector", None)
                run_count = scenario.get("runs", 1)
                namespace_action = scenario.get("action", "delete")
                run_sleep = scenario.get("sleep", 10)
                wait_time = scenario.get("wait_time", 30)
                killed_namespaces = []
                namespaces = kubecli.check_namespaces([scenario_namespace], scenario_label)
                start_time = int(time.time())
                for i in range(run_count):
                    if len(namespaces) == 0:
                        logging.error(
                            "Couldn't %s %s namespaces, not enough namespaces matching %s with label %s"
                            % (namespace_action, str(run_count), scenario_namespace, str(scenario_label))
                        )
                        sys.exit(1)
                    selected_namespace = namespaces[random.randint(0, len(namespaces) - 1)]
                    killed_namespaces.append(selected_namespace)
                    try:
                        runcommand.invoke("oc %s project %s" % (namespace_action, selected_namespace))
                        logging.info(namespace_action + " on namespace " + str(selected_namespace) + " was successful")
                    except Exception as e:
                        logging.info(
                            namespace_action + " on namespace " + str(selected_namespace) + " was unsuccessful"
                        )
                        logging.info("Namespace action error: " + str(e))
                        sys.exit(1)
                    namespaces.remove(selected_namespace)
                    logging.info("Waiting %s seconds between namespace deletions" % str(run_sleep))
                    time.sleep(run_sleep)

                logging.info("Waiting for the specified duration: %s" % wait_duration)
                time.sleep(wait_duration)
                if len(scenario_config) > 1:
                    try:
                        failed_post_scenarios = post_actions.check_recovery(
                            kubeconfig_path, scenario_config, failed_post_scenarios, pre_action_output
                        )
                    except Exception as e:
                        logging.error("Failed to run post action checks: %s" % e)
                        sys.exit(1)
                else:
                    failed_post_scenarios = check_active_namespace(killed_namespaces, wait_time)
                end_time = int(time.time())
                cerberus.publish_kraken_status(config, failed_post_scenarios, start_time, end_time)
Exemplo n.º 5
0
def run(scenarios_list, config, wait_duration):
    failed_post_scenarios = []
    for shut_down_config in scenarios_list:
        if len(shut_down_config) > 1:
            pre_action_output = post_actions.run("", shut_down_config[1])
        else:
            pre_action_output = ""
        with open(shut_down_config[0], "r") as f:
            shut_down_config_yaml = yaml.full_load(f)
            shut_down_config_scenario = shut_down_config_yaml[
                "cluster_shut_down_scenario"]
            cluster_shut_down(shut_down_config_scenario)
            logging.info("Waiting for the specified duration: %s" %
                         (wait_duration))
            time.sleep(wait_duration)
            failed_post_scenarios = post_actions.check_recovery(
                "", shut_down_config, failed_post_scenarios, pre_action_output)
            cerberus.publish_kraken_status(config, failed_post_scenarios)
def run(scenarios_list, config, wait_duration, failed_post_scenarios,
        kubeconfig_path):
    for scenario_config in scenarios_list:
        if len(scenario_config) > 1:
            pre_action_output = post_actions.run(kubeconfig_path,
                                                 scenario_config[1])
        else:
            pre_action_output = ""
        with open(scenario_config[0], "r") as f:
            scenario_config_yaml = yaml.full_load(f)
            for scenario in scenario_config_yaml["scenarios"]:
                scenario_namespace = scenario.get("namespace", "")
                scenario_label = scenario.get("label_selector", "")
                if scenario_namespace is not None and scenario_namespace.strip(
                ) != "":
                    if scenario_label is not None and scenario_label.strip(
                    ) != "":
                        logging.error(
                            "You can only have namespace or label set in your namespace scenario"
                        )
                        logging.error(
                            "Current scenario config has namespace '%s' and label selector '%s'"
                            % (scenario_namespace, scenario_label))
                        logging.error(
                            "Please set either namespace to blank ('') or label_selector to blank ('') to continue"
                        )
                        sys.exit(1)
                delete_count = scenario.get("delete_count", 1)
                run_count = scenario.get("runs", 1)
                run_sleep = scenario.get("sleep", 10)
                wait_time = scenario.get("wait_time", 30)
                killed_namespaces = []
                start_time = int(time.time())
                for i in range(run_count):
                    namespaces = kubecli.check_namespaces([scenario_namespace],
                                                          scenario_label)
                    for j in range(delete_count):
                        if len(namespaces) == 0:
                            logging.error(
                                "Couldn't delete %s namespaces, not enough namespaces matching %s with label %s"
                                % (str(run_count), scenario_namespace,
                                   str(scenario_label)))
                            sys.exit(1)
                        selected_namespace = namespaces[random.randint(
                            0,
                            len(namespaces) - 1)]
                        killed_namespaces.append(selected_namespace)
                        try:
                            kubecli.delete_namespace(selected_namespace)
                            logging.info(
                                "Delete on namespace %s was successful" %
                                str(selected_namespace))
                        except Exception as e:
                            logging.info(
                                "Delete on namespace %s was unsuccessful" %
                                str(selected_namespace))
                            logging.info("Namespace action error: " + str(e))
                            sys.exit(1)
                        namespaces.remove(selected_namespace)
                        logging.info(
                            "Waiting %s seconds between namespace deletions" %
                            str(run_sleep))
                        time.sleep(run_sleep)

                        logging.info("Waiting for the specified duration: %s" %
                                     wait_duration)
                        time.sleep(wait_duration)
                        if len(scenario_config) > 1:
                            try:
                                failed_post_scenarios = post_actions.check_recovery(
                                    kubeconfig_path, scenario_config,
                                    failed_post_scenarios, pre_action_output)
                            except Exception as e:
                                logging.error(
                                    "Failed to run post action checks: %s" % e)
                                sys.exit(1)
                        else:
                            failed_post_scenarios = check_active_namespace(
                                killed_namespaces, wait_time)
                end_time = int(time.time())
                cerberus.publish_kraken_status(config, failed_post_scenarios,
                                               start_time, end_time)