def ensure_all_critical_processes_running(duthost, containers_in_namespaces): """Checks whether each critical process is running and starts it if it is not running. Args: duthost: Hostname of DUT. containers_in_namespaces: A dictionary where keys are container names and values are lists which contains ids of namespaces this container should reside in. Returns: None. """ for container_name in containers_in_namespaces.keys(): critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(container_name) pytest_assert(succeeded, "Failed to get critical group and process lists of container '{}'".format(container_name)) namespace_ids = containers_in_namespaces[container_name] for namespace_id in namespace_ids: container_name_in_namespace = container_name if namespace_id != DEFAULT_ASIC_ID: container_name_in_namespace += namespace_id for critical_process in critical_process_list: # Skip 'dsserve' process since it was not managed by supervisord # TODO: Should remove the following two lines once the issue was solved in the image. if container_name_in_namespace == "syncd" and critical_process == "dsserve": continue ensure_process_is_running(duthost, container_name_in_namespace, critical_process) for critical_group in critical_group_list: group_program_info = get_group_program_info(duthost, container_name_in_namespace, critical_group) for program_name in group_program_info: ensure_process_is_running(duthost, container_name_in_namespace, program_name)
def stop_critical_processes(duthost, containers_in_namespaces): """Gets critical processes of each running container and then stops them from running. Args: duthost: Hostname of DUT. containers_in_namespaces: A dictionary where keys are container names and values are lists which contains ids of namespaces this container should reside in. Returns: None. """ for container_name in containers_in_namespaces.keys(): namespace_ids = containers_in_namespaces[container_name] container_name_in_namespace = container_name # If a container is only running on host, then namespace_ids is [None] # If a container is running on multi-ASIC, then namespace_ids is [0, 1, ...] # If a container is running on host and multi-ASICs, then namespace_ids is [None, 0, 1, ...] if len(namespace_ids) >= 2: container_name_in_namespace += namespace_ids[1] critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists( container_name_in_namespace) pytest_assert( succeeded, "Failed to get critical group and process lists of container '{}'". format(container_name_in_namespace)) for namespace_id in namespace_ids: container_name_in_namespace = container_name if namespace_id != DEFAULT_ASIC_ID: container_name_in_namespace += namespace_id for critical_process in critical_process_list: # Skip 'dsserve' process since it was not managed by supervisord # TODO: Should remove the following two lines once the issue was solved in the image. if "syncd" in container_name_in_namespace and critical_process == "dsserve": continue program_status, program_pid = get_program_info( duthost, container_name_in_namespace, critical_process) check_and_kill_process(duthost, container_name_in_namespace, critical_process, program_status, program_pid) for critical_group in critical_group_list: group_program_info = get_group_program_info( duthost, container_name_in_namespace, critical_group) for program_name in group_program_info: check_and_kill_process(duthost, container_name_in_namespace, critical_group + ":" + program_name, group_program_info[program_name][0], group_program_info[program_name][1])
def get_expected_alerting_messages(duthost, containers_in_namespaces): """Generates the regex of expected alerting messages for the critical processes in each namespace. Args: duthost: Hostname of DUT. containers_in_namespaces: A dictionary where keys are container names and values are lists which contains ids of namespaces this container should reside in. Returns: None. """ expected_alerting_messages = [] for container_name in containers_in_namespaces.keys(): logger.info("Generating the expected alerting messages for container '{}'...".format(container_name)) critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists(container_name) pytest_assert(succeeded, "Failed to get critical group and process lists of container '{}'".format(container_name)) namespace_ids = containers_in_namespaces[container_name] for namespace_id in namespace_ids: namespace_name = "host" if namespace_id != DEFAULT_ASIC_ID: namespace_name = NAMESPACE_PREFIX + namespace_id for critical_process in critical_process_list: # Skip 'dsserve' process since it was not managed by supervisord # TODO: Should remove the following two lines once the issue was solved in the image. if container_name == "syncd" and critical_process == "dsserve": continue logger.info("Generating the expected alerting message for process '{}'".format(critical_process)) expected_alerting_messages.append(".*Process '{}' is not running in namespace '{}'.*".format(critical_process, namespace_name)) for critical_group in critical_group_list: group_program_info = get_group_program_info(duthost, container_name, critical_group) for program_name in group_program_info: logger.info("Generating the expected alerting message for process '{}'".format(program_name)) expected_alerting_messages.append(".*Process '{}' is not running in namespace '{}'.*".format(program_name, namespace_name)) logger.info("Generating the expected alerting messages for container '{}' was done!".format(container_name)) return expected_alerting_messages
def get_expected_alerting_messages_supervisor(duthost, containers_in_namespaces): """Generates the regex of expected alerting messages for the critical processes in each container. These alerting messages will be matched against those in syslog generated by Supervisord. Args: duthost: Hostname of DUT. containers_in_namespaces: A dictionary where keys are container names and values are lists which contains ids of namespaces this container should reside in. Returns: A list contains the regex of alerting messages. """ expected_alerting_messages = [] logger.info("Generating the regex of expected alerting messages ...") for container_name in containers_in_namespaces.keys(): namespace_ids = containers_in_namespaces[container_name] container_name_in_namespace = container_name # If a container is only running on host, then namespace_ids is [None] # If a container is running on multi-ASIC, then namespace_ids is [0, 1, ...] # If a container is running on host and multi-ASICs, then namespace_ids is [None, 0, 1, ...] if len(namespace_ids) > 2: container_name_in_namespace += namespace_ids[1] critical_group_list, critical_process_list, succeeded = duthost.get_critical_group_and_process_lists( container_name_in_namespace) pytest_assert( succeeded, "Failed to get critical group and process lists of container '{}'". format(container_name_in_namespace)) for namespace_id in namespace_ids: namespace_name = "host" container_name_in_namespace = container_name if namespace_id != DEFAULT_ASIC_ID: namespace_name = NAMESPACE_PREFIX + namespace_id container_name_in_namespace += namespace_id logger.info( "Generating the regex of expected alerting messages for container '{}'..." .format(container_name_in_namespace)) for critical_process in critical_process_list: # Skip 'dsserve' process since it was not managed by supervisord # TODO: Should remove the following two lines once the issue was solved in the image. if "syncd" in container_name_in_namespace and critical_process == "dsserve": continue logger.info( "Generating the regex of expected alerting message for process '{}' in container '{}'" .format(critical_process, container_name_in_namespace)) expected_alerting_messages.append( ".*Process '{}' is not running in namespace '{}'.*".format( critical_process, namespace_name)) for critical_group in critical_group_list: group_program_info = get_group_program_info( duthost, container_name_in_namespace, critical_group) for program_name in group_program_info: logger.info( "Generating the regex of expected alerting message for process '{}' in container '{}'" .format(program_name, container_name_in_namespace)) expected_alerting_messages.append( ".*Process '{}' is not running in namespace '{}'.*". format(program_name, namespace_name)) logger.info( "Generating the regex of expected alerting messages for container '{}' was done!" .format(container_name_in_namespace)) return expected_alerting_messages