Exemplo n.º 1
0
def pods_in_phase(label_selector: str,
                  phase: str = "Running",
                  ns: str = "default",
                  secrets: Secrets = None) -> bool:
    """
    Lookup a pod by `label_selector` in the namespace `ns`.

    Raises :exc:`chaoslib.exceptions.FailedActivity` when the state is not
    as expected.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pods matching label '{n}'".format(
        d=len(ret.items), n=label_selector))

    if not ret.items:
        raise FailedActivity(
            "no pods '{name}' were found".format(name=label_selector))

    for d in ret.items:
        if d.status.phase != phase:
            raise FailedActivity(
                "pod '{name}' is in phase '{s}' but should be '{p}'".format(
                    name=label_selector, s=d.status.phase, p=phase))

    return True
Exemplo n.º 2
0
def create_node(meta: Dict[str, Any] = None,
                spec: Dict[str, Any] = None,
                secrets: Secrets = None) -> client.V1Node:
    """
    Create one new node in the cluster.

    Due to the way things work on certain cloud providers, you won't be able
    to use this meaningfully on them. For instance on GCE, this will likely
    fail.

    See also: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#idempotency
    """  # noqa: E501
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    body = client.V1Node()

    body.metadata = client.V1ObjectMeta(**meta) if meta else None
    body.spec = client.V1NodeSpec(**spec) if spec else None

    try:
        res = v1.create_node(body)
    except ApiException as x:
        raise FailedActivity("Creating new node failed: {}".format(x.body))

    logger.debug("Node '{}' created".format(res.metadata.name))

    return res
Exemplo n.º 3
0
def get_random_namespace(configuration: Configuration = None,
                         secrets: Secrets = None):
    """
    Get random namespace from cluster.
    Supports ns-ignore-list value in configuration
    :param secrets: chaostoolkit will inject this dictionary
    :param configuration: chaostoolkit will inject this dictionary
    :return: random namespace
    """
    ns_ignore_list = []
    if configuration is not None:
        ns_ignore_list = configuration.get("ns-ignore-list", [])

    api = create_k8s_api_client(secrets)
    v1 = client.CoreV1Api(api)
    ret = v1.list_namespace()
    namespace = None

    clean_ns = [
        namespace for namespace in ret.items
        if namespace.metadata.name not in ns_ignore_list
    ]

    if len(clean_ns) > 0:
        namespace = random.choice(clean_ns)
    return namespace
Exemplo n.º 4
0
def count_pods(label_selector: str,
               phase: str = None,
               ns: str = "default",
               secrets: Secrets = None) -> int:
    """
    Count the number of pods matching the given selector in a given `phase`, if
    one is given.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pods matching label '{n}'".format(
        d=len(ret.items), n=label_selector))

    if not ret.items:
        return 0

    if not phase:
        return len(ret.items)

    count = 0
    for d in ret.items:
        if d.status.phase == phase:
            count = count + 1

    return count
Exemplo n.º 5
0
def all_microservices_healthy(ns: str = "default",
                              secrets: Secrets = None) -> MicroservicesStatus:
    """
    Check all microservices in the system are running and available.

    Raises :exc:`chaoslib.exceptions.FailedActivity` when the state is not
    as expected.
    """
    api = create_k8s_api_client(secrets)
    not_ready = []
    failed = []

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(namespace=ns)
    for p in ret.items:
        phase = p.status.phase
        if phase == "Failed":
            failed.append(p)
        elif phase != "Running":
            not_ready.append(p)

    logger.debug("Found {d} failed and {n} not ready pods".format(
        d=len(failed), n=len(not_ready)))

    # we probably should list them in the message
    if failed or not_ready:
        raise FailedActivity("the system is unhealthy")

    return True
Exemplo n.º 6
0
def cordon_node(name: str = None,
                label_selector: str = None,
                secrets: Secrets = None):
    """
    Cordon nodes matching the given label or name, so that no pods
    are scheduled on them any longer.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    if name:
        ret = v1.list_node(field_selector="metadata.name={}".format(name))
        logger.debug("Found {d} node named '{s}'".format(d=len(ret.items),
                                                         s=name))
    else:
        ret = v1.list_node(label_selector=label_selector)
        logger.debug("Found {d} node(s) labelled '{s}'".format(
            d=len(ret.items), s=label_selector))

    nodes = ret.items
    if not nodes:
        raise FailedActivity(
            "failed to find a node that matches selector {}".format(
                label_selector))

    body = {"spec": {"unschedulable": True}}

    for n in nodes:
        try:
            v1.patch_node(n.metadata.name, body)
        except ApiException as x:
            logger.debug("Unscheduling node '{}' failed: {}".format(
                n.metadata.name, x.body))
            raise FailedActivity("Failed to unschedule node '{}': {}".format(
                n.metadata.name, x.body))
Exemplo n.º 7
0
def all_pods_in_all_ns_are_ok(configuration: Configuration = None,
                              secrets: Secrets = None):
    """

    :param configuration: experiment configuration
    :param secrets: k8s credentials
    :return: True if all pods are in running state, False otherwise. This fucntion ignores nodes with taints from
    configuration taints-ignore-list list
    """

    ns_ignore_list = get_value_from_configuration(
        configuration, "ns-ignore-list")
    if ns_ignore_list is None:
        ns_ignore_list = []

    taint_ignore_list = []
    taints = get_value_from_configuration(configuration, "taints-ignore-list")
    if taints is not None:
        taint_ignore_list = load_taint_list_from_dict(taints)

    nodes, kubeclient = get_active_nodes(None, taint_ignore_list, secrets)

    active_nodes = [i.metadata.name for i in nodes.items]

    api = create_k8s_api_client(secrets)
    v1 = client.CoreV1Api(api)
    pods = v1.list_pod_for_all_namespaces(watch=False)

    retval = check_pods_statuses(active_nodes, ns_ignore_list, pods)

    return retval
Exemplo n.º 8
0
def microservice_available_and_healthy(
        name: str, ns: str = "default",
        label_selector: str = "name in ({name})",
        secrets: Secrets = None) -> Union[bool, None]:
    """
    Lookup a deployment by `name` in the namespace `ns`.

    The selected resources are matched by the given `label_selector`.

    Raises :exc:`chaoslib.exceptions.FailedActivity` when the state is not
    as expected.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)

    v1 = client.AppsV1beta1Api(api)
    ret = v1.list_namespaced_deployment(ns, label_selector=label_selector)

    logger.debug("Found {d} deployments named '{n}'".format(
        d=len(ret.items), n=name))

    if not ret.items:
        raise FailedActivity(
            "microservice '{name}' was not found".format(name=name))

    for d in ret.items:
        logger.debug("Deployment has '{s}' available replicas".format(
            s=d.status.available_replicas))

        if d.status.available_replicas != d.spec.replicas:
            raise FailedActivity(
                "microservice '{name}' is not healthy".format(name=name))

    return True
Exemplo n.º 9
0
def delete_nodes(label_selector: str = None,
                 all: bool = False,
                 rand: bool = False,
                 count: int = None,
                 grace_period_seconds: int = None,
                 secrets: Secrets = None):
    """
    Delete nodes gracefully. Select the appropriate nodes by label.

    Nodes are not drained beforehand so we can see how cluster behaves. Nodes
    cannot be restarted, they are really deleted. Please be careful when using
    this action.

    On certain cloud providers, you also need to delete the underneath VM
    instance as well afterwards. This is the case on GCE for instance.

    If `all` is set to `True`, all nodes will be terminated.
    If `rand` is set to `True`, one random node will be terminated.
    If ̀`count` is set to a positive number, only a upto `count` nodes
    (randomly picked) will be terminated. Otherwise, the first retrieved node
    will be terminated.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_node(label_selector=label_selector)

    logger.debug("Found {d} nodes labelled '{s}'".format(d=len(ret.items),
                                                         s=label_selector))

    nodes = ret.items
    if not nodes:
        raise FailedActivity(
            "failed to find a node that matches selector {}".format(
                label_selector))

    if rand:
        nodes = [random.choice(nodes)]
        logger.debug("Picked node '{p}' to be terminated".format(
            p=nodes[0].metadata.name))
    elif count is not None:
        nodes = random.choices(nodes, k=count)
        logger.debug("Picked {c} nodes '{p}' to be terminated".format(
            c=len(nodes), p=", ".join([n.metadata.name for n in nodes])))
    elif not all:
        nodes = [nodes[0]]
        logger.debug("Picked node '{p}' to be terminated".format(
            p=nodes[0].metadata.name))
    else:
        logger.debug("Picked all nodes '{p}' to be terminated".format(
            p=", ".join([n.metadata.name for n in nodes])))

    body = client.V1DeleteOptions()
    for n in nodes:
        res = v1.delete_node(n.metadata.name,
                             body,
                             grace_period_seconds=grace_period_seconds)

        if res.status != "Success":
            logger.debug("Terminating nodes failed: {}".format(res.message))
Exemplo n.º 10
0
def get_not_empty_ns(secret: Secrets = None,
                     ns_ignore_list: str = "",
                     label_selector: str = "com.wix.lifecycle=true"):
    api = create_k8s_api_client(secret)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespace()

    good_ns_list = [
        ns.metadata.name for ns in ret.items
        if ns.metadata.name not in ns_ignore_list
    ]

    retval = None
    count = 100
    if len(good_ns_list) > 0:
        while retval is None and count > 0:
            count -= 1
            selected_ns = random.choice(good_ns_list)
            ret_pods = v1.list_namespaced_pod(selected_ns,
                                              label_selector=label_selector)
            if len(ret_pods.items) > 1:
                retval = selected_ns
                logger.debug("Found {} non-empty namespace".format(retval))
                break
    else:
        retval = 'default'
    return retval
Exemplo n.º 11
0
def microservice_is_not_available(name: str, ns: str = "default",
                                  label_selector: str = "name in ({name})",
                                  secrets: Secrets = None) -> bool:
    """
    Lookup pods with a `name` label set to the given `name` in the specified
    `ns`.

    Raises :exc:`chaoslib.exceptions.FailedActivity` when one of the pods
    with the specified `name` is in the `"Running"` phase.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pod named '{n}'".format(
        d=len(ret.items), n=name))

    for p in ret.items:
        phase = p.status.phase
        logger.debug("Pod '{p}' has status '{s}'".format(
            p=p.metadata.name, s=phase))
        if phase == "Running":
            raise FailedActivity(
                "microservice '{name}' is actually running".format(name=name))

    return True
Exemplo n.º 12
0
def verify_pod_termination_reason(k8s_label_selector: str,
                                  reason: str = None,
                                  secrets: Secrets = None) -> bool:
    """
    Verifies that pod marked with labels matching label selector are in proper reason for
    its current state

    :param label_selector: label selector to limit our selection
    :param reason: Reason for pod state. Usually in our context its omething bad
    like OOMKilled
    :param secrets: chaostoolkit will inject this parameter
    :return: True of pod with specified termination reason was found.False otherwise.
    """
    retval = False
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_pod_for_all_namespaces(label_selector=k8s_label_selector)

    for item in ret.items:
        if item.status is not None and item.status.container_statuses is not None:
            for status in item.status.container_statuses:
                if status.last_state.terminated is not None and status.last_state.terminated.reason == reason:
                    retval = True
                    break
    if not retval:
        raise FailedActivity(
            "No pods with proper termination reason found ({} {})".format(
                k8s_label_selector, reason))
    return retval
Exemplo n.º 13
0
def get_active_nodes(label_selector: str = None,
                     taints_ignore_list=None,
                     secrets: Secrets = None):
    """
    List all nodes, that are not tainted by known taints. You may filter nodes
    by specifying a label selector.
    """
    if taints_ignore_list is None:
        taints_ignore_list = []

    api = create_k8s_api_client(secrets)
    v1 = client.CoreV1Api(api)
    if label_selector:
        ret = v1.list_node_with_http_info(label_selector=label_selector,
                                          _preload_content=True,
                                          _return_http_data_only=True)
    else:
        ret = v1.list_node_with_http_info(_preload_content=True,
                                          _return_http_data_only=True)
    node_list = ret.items
    retval = client.V1NodeList(items=[])
    for node in node_list:
        node_ignored = False
        if node.spec.taints is not None:
            node_ignored = node_should_be_ignored_by_taints(
                node.spec.taints, taints_ignore_list)
        if not node_ignored:
            retval.items.append(node)
    return retval, v1
Exemplo n.º 14
0
def kill_microservice_by_label(label_selector: str = "name in ({name})",
                               secrets: Secrets = None):
    """
    Kill a microservice by `label_selector` in the namespace `ns`.

    The microservice is killed by deleting the deployment for it without
    a graceful period to trigger an abrupt termination.

    The selected resources are matched by the given `label_selector`.
    """

    api = create_k8s_api_client(secrets)

    v1 = client.AppsV1beta1Api(api)
    try:
        ret = v1.list_deployment_for_all_namespaces(
            label_selector=label_selector)
        if ret.items:
            logger.debug("Found {d} deployments labeled '{n}'".format(
                d=len(ret.items), n=label_selector))

            body = client.V1DeleteOptions()
            for d in ret.items:
                logger.debug("Delete deployment {}".format(d.metadata.name))
                res = v1.delete_namespaced_deployment(
                    name=d.metadata.name,
                    namespace=d.metadata.namespace,
                    body=body)

            v1 = client.ExtensionsV1beta1Api(api)
            ret = v1.list_replica_set_for_all_namespaces(
                label_selector=label_selector)
            logger.debug("Found {d} replica sets labeled '{n}'".format(
                d=len(ret.items), n=label_selector))

            v1 = client.ExtensionsV1beta1Api(api)
            body = client.V1DeleteOptions()
            for r in ret.items:
                logger.warning("Delete replicaset {}".format(r.metadata.name))
                res = v1.delete_namespaced_replica_set(
                    name=r.metadata.name,
                    namespace=r.metadata.namespace,
                    body=body)

            v1 = client.CoreV1Api(api)
            ret = v1.list_pod_for_all_namespaces(label_selector=label_selector)

            logger.debug("Found {d} pods labeled '{n}'".format(
                d=len(ret.items), n=label_selector))

            body = client.V1DeleteOptions()
            for p in ret.items:
                logger.warning("Delete pod {}".format(p.metadata.name))
                res = v1.delete_namespaced_pod(name=p.metadata.name,
                                               namespace=p.metadata.namespace,
                                               body=body)
    except ApiException as e:
        pass
def Xtest_client_can_be_created_from_secrets(load_incluster_config, has_conf):
    os.environ["CHAOSTOOLKIT_IN_POD"] = "true"

    try:
        has_conf.return_value = False
        load_incluster_config.return_value = None
        api = create_k8s_api_client()
        load_incluster_config.assert_called_once_with()
    finally:
        os.environ.pop("CHAOSTOOLKIT_IN_POD", None)
Exemplo n.º 16
0
def remove_service_endpoint(name: str,
                            ns: str = "default",
                            secrets: Secrets = None):
    """
    Remove the service endpoint that sits in front of microservices (pods).
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    v1.delete_namespaced_service(name, namespace=ns)
Exemplo n.º 17
0
def get_node_list(label_selector, secrets):
    api = create_k8s_api_client(secrets)
    v1 = client.CoreV1Api(api)
    if label_selector:
        ret = v1.list_node_with_http_info(label_selector=label_selector,
                                          _preload_content=True,
                                          _return_http_data_only=True)
    else:
        ret = v1.list_node_with_http_info(_preload_content=True,
                                          _return_http_data_only=True)
    return ret.items, v1
def Xtest_client_can_be_created_from_secrets(has_conf):
    has_conf.return_value = False
    secrets = {
        "KUBERNETES_HOST": "http://someplace",
        "KUBERNETES_API_KEY": "6789",
        "KUBERNETES_API_KEY_PREFIX": "Boom"
    }
    api = create_k8s_api_client(secrets)
    assert api.configuration.host == "http://someplace"
    assert api.configuration.api_key.get("authorization", "6789")
    assert api.configuration.api_key_prefix.get("authorization", "Boom")
Exemplo n.º 19
0
def read_pod_logs(name: str = None,
                  last: Union[str, None] = None,
                  ns: str = "default",
                  from_previous: bool = False,
                  label_selector: str = "name in ({name})",
                  container_name: str = None,
                  secrets: Secrets = None) -> Dict[str, str]:
    """
    Fetch logs for all the pods with the label `"name"` set to `name` and
    return a dictionary with the keys being the pod's name and the values
    the logs of said pod. If `name` is not provided, use only the
    `label_selector` instead.

    When your pod has several containers, you should also set `container_name`
    to clarify which container you want to read logs from.

    If you provide `last`, this returns the logs of the last N seconds
    until now. This can set to a fluent delta such as `10 minutes`.

    You may also set `from_previous` to `True` to capture the logs of a
    previous pod's incarnation, if any.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)
    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pods: [{p}]".format(
        d=len(ret.items), p=', '.join([p.metadata.name for p in ret.items])))

    since = None
    if last:
        now = datetime.now()
        since = int((now - dateparser.parse(last)).total_seconds())

    params = dict(
        namespace=ns,
        follow=False,
        previous=from_previous,
        timestamps=True,
        container=container_name or "",  # None is not a valid value
        _preload_content=False)

    if since:
        params["since_seconds"] = since

    logs = {}
    for p in ret.items:
        name = p.metadata.name
        logger.debug("Fetching logs for pod '{n}'".format(n=name))
        r = v1.read_namespaced_pod_log(name, **params)
        logs[name] = r.read().decode('utf-8')

    return logs
def Xtest_client_can_provide_a_context(cfg, has_conf):
    has_conf.return_value = True
    cfg.new_client_from_config = MagicMock()
    try:
        os.environ.pop("NASA_SECRETS_URL", None)
        os.environ.pop("NASA_TOKEN", None)
        os.environ.update({"KUBERNETES_CONTEXT": "minikube"})
        api = create_k8s_api_client()
        cfg.new_client_from_config.assert_called_with(context="minikube")
    finally:
        os.environ.pop("KUBERNETES_CONTEXT", None)
Exemplo n.º 21
0
def kill_microservice(name: str,
                      ns: str = "default",
                      label_selector: str = "name in ({name})",
                      secrets: Secrets = None):
    """
    Kill a microservice by `name` in the namespace `ns`.

    The microservice is killed by deleting the deployment for it without
    a graceful period to trigger an abrupt termination.

    The selected resources are matched by the given `label_selector`.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)

    v1 = client.AppsV1beta1Api(api)
    ret = v1.list_namespaced_deployment(ns, label_selector=label_selector)

    logger.debug("Found {d} deployments named '{n}'".format(d=len(ret.items),
                                                            n=name))

    body = client.V1DeleteOptions()
    for d in ret.items:
        res = v1.delete_namespaced_deployment(name=d.metadata.name,
                                              namespace=ns,
                                              body=body)

    v1 = client.ExtensionsV1beta1Api(api)
    ret = v1.list_namespaced_replica_set(ns, label_selector=label_selector)

    logger.debug("Found {d} replica sets named '{n}'".format(d=len(ret.items),
                                                             n=name))

    body = client.V1DeleteOptions()
    for r in ret.items:
        res = v1.delete_namespaced_replica_set(name=r.metadata.name,
                                               namespace=ns,
                                               body=body)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pods named '{n}'".format(d=len(ret.items), n=name))

    body = client.V1DeleteOptions()
    for p in ret.items:
        res = v1.delete_namespaced_pod(name=p.metadata.name,
                                       namespace=ns,
                                       body=body)
Exemplo n.º 22
0
def get_nodes(label_selector: str = None, secrets: Secrets = None):
    """
    List all Kubernetes worker nodes in your cluster. You may filter nodes
    by specifying a label selector.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    if label_selector:
        ret = v1.list_node(label_selector=label_selector,
                           _preload_content=False)
    else:
        ret = v1.list_node(_preload_content=False)

    return json.loads(ret.read().decode('utf-8'))
Exemplo n.º 23
0
def terminate_pods(label_selector: str = None, name_pattern: str = None,
                   all: bool = False, rand: bool = False,
                   ns: str = "default", secrets: Secrets = None):
    """
    Terminate a pod gracefully. Select the appropriate pods by label and/or
    name patterns. Whenever a pattern is provided for the name, all pods
    retrieved will be filtered out if their name do not match the given
    pattern.

    If neither `label_selector` nor `name_pattern` are provided, all pods
    in the namespace will be terminated.

    If `all` is set to `True`, all matching pods will be terminated.
    If `rand` is set to `True`, one random pod will be terminated.
    Otherwise, the first retrieved pod will be terminated.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_pod(ns, label_selector=label_selector)

    logger.debug("Found {d} pods labelled '{s}'".format(
        d=len(ret.items), s=label_selector))

    pods = []
    if name_pattern:
        pattern = re.compile(name_pattern)
        for p in ret.items:
            if pattern.match(p.metadata.name):
                pods.append(p)
                logger.debug("Pod '{p}' match pattern".format(
                    p=p.metadata.name))
    else:
        pods = ret.items

    if rand:
        pods = [random.choice(pods)]
        logger.debug("Picked pod '{p}' to be terminated".format(
            p=pods[0].metadata.name))
    elif not all:
        pods = [pods[0]]
        logger.debug("Picked pod '{p}' to be terminated".format(
            p=pods[0].metadata.name))

    body = client.V1DeleteOptions()
    for p in pods:
        res = v1.delete_namespaced_pod(
            p.metadata.name, ns, body)
Exemplo n.º 24
0
def deployment_is_not_fully_available(name: str,
                                      ns: str = "default",
                                      label_selector: str = "name in ({name})",
                                      timeout: int = 30,
                                      secrets: Secrets = None):
    """
    Wait until the deployment gets into an intermediate state where not all
    expected replicas are available. Once this state is reached, return `True`.
    If the state is not reached after `timeout` seconds, a
    :exc:`chaoslib.exceptions.FailedActivity` exception is raised.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)
    v1 = client.AppsV1beta1Api(api)
    w = watch.Watch()
    timeout = int(timeout)

    try:
        logger.debug("Watching events for {t}s".format(t=timeout))
        for event in w.stream(v1.list_namespaced_deployment,
                              namespace=ns,
                              label_selector=label_selector,
                              _request_timeout=timeout):
            deployment = event['object']
            status = deployment.status
            spec = deployment.spec

            logger.debug("Deployment '{p}' {t}: "
                         "Ready Replicas {r} - "
                         "Unavailable Replicas {u} - "
                         "Desired Replicas {a}".format(
                             p=deployment.metadata.name,
                             t=event["type"],
                             r=status.ready_replicas,
                             a=spec.replicas,
                             u=status.unavailable_replicas))

            if status.ready_replicas != spec.replicas:
                w.stop()
                return True

    except urllib3.exceptions.ReadTimeoutError:
        logger.debug("Timed out!")
        raise FailedActivity(
            "microservice '{name}' failed to stop running within {t}s".format(
                name=name, t=timeout))
Exemplo n.º 25
0
def all_microservices_healthy(
        ns: str = "default",
        secrets: Secrets = None,
        configuration: Configuration = None) -> MicroservicesStatus:
    """
    Check all microservices in the system are running and available.

    Raises :exc:`chaoslib.exceptions.FailedActivity` when the state is not
    as expected.
    """
    api = create_k8s_api_client(secrets)
    not_ready = []
    failed = []
    not_in_condition = []
    ns_ignore_list = []
    if configuration is not None:
        ns_ignore_list = configuration.get("ns-ignore-list", [])
    v1 = client.CoreV1Api(api)
    if ns == "":
        ret = v1.list_pod_for_all_namespaces()
    else:
        ret = v1.list_namespaced_pod(namespace=ns)
    total = 0
    for p in ret.items:
        phase = p.status.phase
        if p.metadata.namespace not in ns_ignore_list:
            total = total + 1
            if phase == "Failed" or phase == "Unknown":
                failed.append(p)
            elif phase != "Running":
                not_ready.append(p)

    logger.debug("Total pods {t}. Found {d} failed and {n} not ready pods".format(
        d=len(failed), n=len(not_ready), t=total))
    for srv in failed:
        logger.debug("Failed service {} on {} {}".format(
            srv.metadata.name, srv.spec.node_name, srv.status.phase))
    for srv in not_ready:
        logger.debug("Not ready service {} on {} {}".format(
            srv.metadata.name, srv.spec.node_name, srv.status.phase))

    # we probably should list them in the message
    if failed or not_ready:
        raise FailedActivity("the system is unhealthy")

    return True
Exemplo n.º 26
0
def scale_microservice(name: str,
                       replicas: int,
                       ns: str = "default",
                       secrets: Secrets = None):
    """
    Scale a deployment up or down. The `name` is the name of the deployment.
    """
    api = create_k8s_api_client(secrets)

    v1 = client.ExtensionsV1beta1Api(api)
    body = {"spec": {"replicas": replicas}}
    try:
        v1.patch_namespaced_deployment_scale(name, namespace=ns, body=body)
    except ApiException as e:
        raise FailedActivity(
            "failed to scale '{s}' to {r} replicas: {e}".format(s=name,
                                                                r=replicas,
                                                                e=str(e)))
Exemplo n.º 27
0
def deploy_single_obj(secrets: Secrets, ns: str, obj):
    api = create_k8s_api_client(secrets)
    retval = None
    api_specific = None
    apiVersion = obj.get('apiVersion')
    if apiVersion == 'apps/v1beta1':
        api_specific = client.AppsV1beta1Api(api)
    elif apiVersion == 'v1':
        api_specific = client.CoreV1Api(api)
    else:
        logger.warning("Unable to create api client for {}".format(apiVersion))
    if api_specific is not None:
        kind = obj.get('kind')
        if kind == 'Deployment':
            retval = api_specific.create_namespaced_deployment(ns, body=obj)
        elif kind == 'Pod':
            retval = api_specific.create_namespaced_pod(ns, body=obj)
        else:
            logger.warning("Unable to create object".format(kind))
    return retval
Exemplo n.º 28
0
def service_endpoint_is_initialized(name: str, ns: str = "default",
                                    label_selector: str = "name in ({name})",
                                    secrets: Secrets = None):
    """
    Lookup a service endpoint by its name and raises :exc:`FailedProbe` when
    the service was not found or not initialized.
    """
    label_selector = label_selector.format(name=name)
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    ret = v1.list_namespaced_service(ns, label_selector=label_selector)

    logger.debug("Found {d} services named '{n}'".format(
        d=len(ret.items), n=name))

    if not ret.items:
        raise FailedActivity(
            "service '{name}' is not initialized".format(name=name))

    return True
Exemplo n.º 29
0
def start_microservice(spec_path: str,
                       ns: str = "default",
                       secrets: Secrets = None):
    """
    Start a microservice described by the deployment config, which must be the
    path to the JSON or YAML representation of the deployment.
    """
    api = create_k8s_api_client(secrets)

    with open(spec_path) as f:
        p, ext = os.path.splitext(spec_path)
        if ext == '.json':
            deployment = json.loads(f.read())
        elif ext in ['.yml', '.yaml']:
            deployment = yaml.load_all(f.read())
        else:
            raise FailedActivity(
                "cannot process {path}".format(path=spec_path))

    v1 = client.AppsV1beta1Api(api)
    resp = v1.create_namespaced_deployment(ns, body=deployment)
    return resp
Exemplo n.º 30
0
def all_nodes_are_ok(label_selector: str = None, secrets: Secrets = None):
    """
    List all Kubernetes worker nodes in your cluster. You may filter nodes
    by specifying a label selector.
    """
    retval = True
    api = create_k8s_api_client(secrets)

    v1 = client.CoreV1Api(api)
    if label_selector:
        ret = v1.list_node(label_selector=label_selector,
                           _preload_content=False)
    else:
        ret = v1.list_node(_preload_content=False)

    items_in_list = ret.items
    for item in items_in_list:
        for condition in item.status.conditions:
            if condition.type == "Ready" and condition.status == "False":
                retval = False
                break

    return retval