def verify_load_balancer_completed(core_api, stack_id, stack_config, role):
    # getaddrinfo(host, port, 0, SOCK_STREAM)
    hosts = services.get_load_balancer_hosts(core_api, stack_id, role,
                                             stack_config["namespace"])
    if not len(hosts):
        raise errors.RetryOperation("waiting for %s load balancer hostname" %
                                    role)
    for host in hosts:
        try:
            import socket
            socket.gethostbyname(host)
        except socket.error:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (cannot resolve hostname)" %
                role)
        pass
    if role == services.standalone_role or role == services.deployer_role or role == services.search_head_role or role == services.cluster_master_role:
        try:
            instances.create_client(core_api, stack_id, stack_config, role)
        except ssl.SSLEOFError:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (SSL protocol error)" %
                role)
        except TimeoutError:
            raise errors.RetryOperation(
                "Waiting for %s ingress connection (timeout)" % role)
def push_deployer_bundle(core_api, stack_id, stack_config):
    search_head_hostnames = services.get_load_balancer_hosts(
        core_api, stack_id, services.search_head_role,
        stack_config["namespace"])
    if len(search_head_hostnames) == 0:
        raise errors.RetryOperation(
            "Waiting for hostname for search heads ...")
    search_head_hostname = search_head_hostnames[0]

    if is_sh_cluster_restart_in_progress(core_api, stack_id, stack_config):
        raise errors.RetryOperation(
            "wait for SH cluster restart process completed ...")

    service = instances.create_client(core_api, stack_id, stack_config,
                                      services.deployer_role)
    service.post(
        "apps/deploy",
        target="https://%s:8089" % (search_head_hostname),
        action="all",
        advertising="true",
        force="true",
    )
    logging.info("pushed SH deployer bundle")

    if is_sh_cluster_restart_in_progress(core_api, stack_id, stack_config):
        raise errors.RetryOperation(
            "wait for SH cluster restart process completed ...")
def create_client(core_api, stack_id, stack_config, role):
    hosts = services.get_load_balancer_hosts(core_api, stack_id, role,
                                             stack_config["namespace"])
    if len(hosts) == 0:
        raise Exception(
            "could not get hostname for load balancer for role %s " % (role))
    password = get_admin_password(core_api, stack_id, stack_config, role)
    # logging.info("%s" % hosts[0])
    splunk = splunklib.client.Service(
        port=8089,
        scheme="https",
        host=hosts[0],
        username="******",
        password=password,
        # verify=False,
    )
    splunk.login()
    return splunk
Пример #4
0
    def handle_GET(self):
        path = self.request['path']
        _, stack_id = os.path.split(path)
        stack_config = get_stack_config(self.splunk, stack_id)

        result = {
            "status": stack_config["status"],
            "title": stack_config["title"] if "title" in stack_config else "",
            "deployment_type": stack_config["deployment_type"],
            "license_master_mode": stack_config["license_master_mode"],
            "cluster": stack_config["cluster"],
            "namespace": stack_config["namespace"],
        }

        if stack_config["deployment_type"] == "distributed":
            result["indexer_count"] = stack_config["indexer_count"]
            result["search_head_count"] = stack_config["search_head_count"]

        api_client = clusters.create_client(
            self.service, stack_config["cluster"])
        from kubernetes import client as kubernetes
        core_api = kubernetes.CoreV1Api(api_client)

        hosts = services.get_load_balancer_hosts(
            core_api, stack_id, services.search_head_role, stack_config["namespace"])
        if hosts:
            admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.search_head_role)
            result.update({
                "search_head_endpoint": ["http://%s" % hostname for hostname in hosts],
                "search_head_password": admin_password,
            })
        if stack_config["license_master_mode"] == "local":
            hosts = services.get_load_balancer_hosts(
                core_api, stack_id, services.license_master_role, stack_config["namespace"])
            if hosts:
                admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.license_master_role)
                result.update({
                    "license_master_endpoint": ["http://%s" % hostname for hostname in hosts],
                    "license_master_password": admin_password,
                })
        hosts = services.get_load_balancer_hosts(
            core_api, stack_id, services.cluster_master_role, stack_config["namespace"])
        if hosts:
            admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.cluster_master_role)
            result.update({
                "cluster_master_endpoint": ["http://%s" % hostname for hostname in hosts],
                "cluster_master_password": admin_password,
            })
        hosts = services.get_load_balancer_hosts(
            core_api, stack_id, services.deployer_role, stack_config["namespace"])
        if hosts:
            admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.deployer_role)
            result.update({
                "deployer_endpoint": ["http://%s" % hostname for hostname in hosts],
                "deployer_password": admin_password,
            })
        hosts = services.get_load_balancer_hosts(
            core_api, stack_id, services.standalone_role, stack_config["namespace"])
        if hosts:
            admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.standalone_role)
            result.update({
                "standalone_endpoint": ["http://%s" % hostname for hostname in hosts],
                "standalone_password": admin_password,
            })
        hosts = services.get_load_balancer_hosts(
            core_api, stack_id, services.indexer_role, stack_config["namespace"])
        if hosts:
            admin_password = instances.get_admin_password(core_api, stack_id, stack_config, services.indexer_role)
            result.update({
                "indexer_endpoint": ["%s:9997" % hostname for hostname in hosts],
                "indexer_password": admin_password,
            })
        self.send_result(result)
def run_cases(splunk, test_id, test):
    cases_collection = get_performance_test_cases_collection(splunk)
    cases = cases_collection.query(
        query=json.dumps({
            "test_id": test_id,
        }),
        sort="index:1",
    )
    for case in cases:
        case_id = case["_key"]
        status = case["status"]
        if status == CASE_FINISHED:
            continue
        if status == CASE_WAITING:
            result = splunk.post(
                "saas/stacks", **{
                    "deployment_type": case["deployment_type"],
                    "indexer_count": case["indexer_count"],
                    "search_head_count": case["search_head_count"],
                    "cpu_per_instance": case["cpu_per_instance"],
                    "etc_storage_in_gb": case["etc_storage_in_gb"],
                    "other_var_storage_in_gb": case["other_var_storage_in_gb"],
                    "indexer_var_storage_in_gb":
                    case["indexer_var_storage_in_gb"],
                    "memory_per_instance": case["memory_per_instance"],
                    "title":
                    "Performance Test %s and Case %s" % (test_id, case_id),
                    "cluster": test["cluster"],
                })
            response = json.loads(result.body.read())["entry"][0]["content"]
            stack_id = response["stack_id"]
            logging.info("created stack %s for test case %s" %
                         (stack_id, case_id))
            case.update({
                "status": CASE_STARTING,
                "stack_id": stack_id,
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation(
                "waiting for stack %s in test case %s starting up ..." %
                (stack_id, case_id))
        elif status == CASE_STARTING:
            stack_id = case["stack_id"]
            stack = splunk.get("saas/stack/%s" % stack_id)
            stack_status = json.loads(
                stack.body.read())["entry"][0]["content"]["status"]
            if stack_status == stacks.CREATING:
                raise errors.RetryOperation()
            if stack_status != stacks.CREATED:
                raise Exception("unexpected stack status: %s" % stack_status)
            logging.info("successfully created stack %s for case %s" %
                         (stack_id, case_id))
            stack_config = stacks.get_stack_config(splunk, stack_id)
            kube_client = clusters.create_client(splunk,
                                                 stack_config["cluster"])
            cluster_config = clusters.get_cluster(splunk, test["cluster"])
            node_selector_labels = cluster_config["node_selector"].split(",")
            node_selector_for_generators = {}
            for label in node_selector_labels:
                if label:
                    kv = label.split("=")
                    if len(kv) != 2:
                        raise errors.ApplicationError(
                            "invalid node selector format (%s)" %
                            cluster_config.node_selector)
                    node_selector_for_generators[kv[0]] = kv[1]
            apps_api = kubernetes.AppsV1Api(kube_client)
            core_api = kubernetes.CoreV1Api(kube_client)
            if stack_config["deployment_type"] == "standalone":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])
            elif stack_config["deployment_type"] == "distributed":
                indexer_hosts = services.get_load_balancer_hosts(
                    core_api, stack_id, services.indexer_role,
                    stack_config["namespace"])
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            data_volume_in_gb_per_day = int(case["data_volume_in_gb_per_day"])
            logging.debug("data_volume_in_gb_per_day=%s" %
                          (data_volume_in_gb_per_day))
            data_volume_in_gb_per_second = data_volume_in_gb_per_day / 24 / 60 / 60
            logging.debug("data_volume_in_gb_per_second=%s" %
                          (data_volume_in_gb_per_second))
            data_volume_in_kb_per_second = data_volume_in_gb_per_second * 1024 * 1024
            logging.debug("data_volume_in_kb_per_second=%s" %
                          (data_volume_in_kb_per_second))
            max_kb_per_second_per_data_generator = 100
            logging.debug("max_kb_per_second_per_data_generator=%s" %
                          (max_kb_per_second_per_data_generator))
            number_of_data_generators = max(
                int(data_volume_in_kb_per_second /
                    max_kb_per_second_per_data_generator) + 1, 1)
            logging.debug("number_of_data_generators=%s" %
                          (number_of_data_generators))
            data_volume_in_kb_per_second_per_data_generator = data_volume_in_kb_per_second / \
                number_of_data_generators
            logging.debug(
                "data_volume_in_kb_per_second_per_data_generator=%s" %
                (data_volume_in_kb_per_second_per_data_generator))
            deployment_name = "datagen-%s" % (stack_id)
            try:
                apps_api.read_namespaced_deployment(
                    deployment_name, namespace=stack_config["namespace"])
                data_gen_deployment_already_exists = True
            except kubernetes.rest.ApiException as e:
                if e.status != 404:
                    raise
                data_gen_deployment_already_exists = False
            if not data_gen_deployment_already_exists:
                apps_api.create_namespaced_deployment(
                    namespace=stack_config["namespace"],
                    body=kubernetes.V1Deployment(
                        metadata=kubernetes.V1ObjectMeta(
                            name=deployment_name,
                            namespace=stack_config["namespace"],
                            labels={
                                "app": "datagen",
                                "test": test_id,
                                "case": case_id,
                            },
                        ),
                        spec=kubernetes.V1DeploymentSpec(
                            replicas=number_of_data_generators,
                            selector=kubernetes.V1LabelSelector(
                                match_labels={
                                    "name": "datagen-%s" % (stack_id),
                                }),
                            template=kubernetes.V1PodTemplateSpec(
                                metadata=kubernetes.V1ObjectMeta(labels={
                                    "name":
                                    "datagen-%s" % (stack_id),
                                    "app":
                                    "datagen",
                                    "test":
                                    test_id,
                                    "case":
                                    case_id,
                                    "stack":
                                    stack_id,
                                }, ),
                                spec=kubernetes.V1PodSpec(
                                    containers=[
                                        kubernetes.V1Container(
                                            name="datagen",
                                            image=
                                            "blackhypothesis/splunkeventgenerator:latest",
                                            resources=kubernetes.
                                            V1ResourceRequirements(
                                                requests={
                                                    "memory": "10Mi",
                                                    "cpu": "500m",
                                                },
                                                limits={
                                                    "memory": "50Mi",
                                                    "cpu": "1",
                                                },
                                            ),
                                            env=[
                                                kubernetes.V1EnvVar(
                                                    name="DSTHOST",
                                                    value=";".join(
                                                        map(
                                                            lambda host: host +
                                                            ":9996",
                                                            indexer_hosts)),
                                                ),
                                                kubernetes.V1EnvVar(
                                                    name="KB_S",
                                                    value="%s" %
                                                    data_volume_in_kb_per_second_per_data_generator,
                                                ),
                                            ],
                                        ),
                                    ],
                                    node_selector=node_selector_for_generators,
                                ),
                            ),
                        ),
                    ),
                )
                logging.info("created %s data generators for case %s" %
                             (number_of_data_generators, case_id))
            if stack_config["deployment_type"] == "standalone":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.standalone_role,
                    stack_config["namespace"])[0]
            elif stack_config["deployment_type"] == "distributed":
                search_head_host = services.get_load_balancer_hosts(
                    core_api, stack_id, services.search_head_role,
                    stack_config["namespace"])[0]
            else:
                raise Exception("unexpected deployment type: %s" %
                                stack_config["deployment_type"])
            searches_per_day = int(case["searches_per_day"])
            logging.debug("searches_per_day=%s" % (searches_per_day))
            searches_per_second = searches_per_day / 24 / 60 / 60
            logging.debug("searches_per_second=%s" % (searches_per_second))
            max_searches_per_second_per_generator = 5
            logging.debug("max_searches_per_second_per_generator=%s" %
                          (max_searches_per_second_per_generator))
            number_of_search_generators = max(
                int(searches_per_second /
                    max_searches_per_second_per_generator) + 1, 1)
            logging.debug("number_of_search_generators=%s" %
                          (number_of_search_generators))
            searches_per_second_per_generator = searches_per_second / \
                number_of_search_generators
            logging.debug("searches_per_second_per_generator=%s" %
                          (searches_per_second_per_generator))
            search_template = case["search_template"]
            if searches_per_day > 0 and search_template:
                deployment_name = "searchgen-%s" % (stack_id)
                try:
                    apps_api.read_namespaced_deployment(
                        deployment_name, namespace=stack_config["namespace"])
                    search_gen_deployment_already_exists = True
                except kubernetes.rest.ApiException as e:
                    if e.status != 404:
                        raise
                    search_gen_deployment_already_exists = False
                if not search_gen_deployment_already_exists:
                    admin_password = instances.get_admin_password(
                        core_api, stack_id, stack_config,
                        services.search_head_role)
                    apps_api.create_namespaced_deployment(
                        namespace=stack_config["namespace"],
                        body=kubernetes.V1Deployment(
                            metadata=kubernetes.V1ObjectMeta(
                                name=deployment_name,
                                namespace=stack_config["namespace"],
                                labels={
                                    "app": "searchgen",
                                    "test": test_id,
                                    "case": case_id,
                                },
                            ),
                            spec=kubernetes.V1DeploymentSpec(
                                replicas=number_of_search_generators,
                                selector=kubernetes.V1LabelSelector(
                                    match_labels={
                                        "name": "searchgen-%s" % (stack_id),
                                    }),
                                template=kubernetes.V1PodTemplateSpec(
                                    metadata=kubernetes.V1ObjectMeta(labels={
                                        "name":
                                        "searchgen-%s" % (stack_id),
                                        "app":
                                        "searchgen",
                                        "test":
                                        test_id,
                                        "case":
                                        case_id,
                                        "stack":
                                        stack_id,
                                    }, ),
                                    spec=kubernetes.V1PodSpec(
                                        containers=[
                                            kubernetes.V1Container(
                                                name="searchgen",
                                                image=
                                                "hovu96/splunk-searchgen:latest",
                                                resources=kubernetes.
                                                V1ResourceRequirements(
                                                    requests={
                                                        "memory": "10Mi",
                                                        "cpu": "500m",
                                                    },
                                                    limits={
                                                        "memory": "50Mi",
                                                        "cpu": "1",
                                                    },
                                                ),
                                                env=[
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPL",
                                                        value=search_template,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_HOST",
                                                        value=search_head_host,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_USER",
                                                        value="admin",
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name=
                                                        "SEARCH_GEN_PASSWORD",
                                                        value=admin_password,
                                                    ),
                                                    kubernetes.V1EnvVar(
                                                        name="SEARCH_GEN_SPS",
                                                        value="%s" %
                                                        searches_per_second_per_generator,
                                                    ),
                                                ],
                                            ),
                                        ],
                                        node_selector=
                                        node_selector_for_generators,
                                    ),
                                ),
                            ),
                        ),
                    )
                    logging.info("created %s search generators for case %s" %
                                 (number_of_search_generators, case_id))
            else:
                logging.info("no search generators started")
            case.update({
                "status": CASE_RUNNING,
                "time_started_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("running test case %s ..." % case_id)
        elif status == CASE_RUNNING:
            time_started_running = case["time_started_running"]
            time_now = time.time()
            seconds_running_to_far = time_now - time_started_running
            target_run_duration = test["run_duration"]
            logging.debug(
                "time_started_running=%s time_now=%s seconds_running_to_far=%s"
                % (time_started_running, time_now, seconds_running_to_far))
            if seconds_running_to_far < (target_run_duration * 60):
                logging.debug("still waiting")
                raise errors.RetryOperation()
            logging.info("time elapsed for case %s" % (case_id))
            case.update({
                "status": CASE_STOPPING,
                "time_finished_running": time.time(),
            })
            cases_collection.update(case_id, json.dumps(case))
            raise errors.RetryOperation("stopping test case %s" % case_id)
        elif status == CASE_STOPPING:
            stop_case(splunk, test_id, case_id, case)
            case.update({
                "status": CASE_FINISHED,
            })
            cases_collection.update(case_id, json.dumps(case))
            logging.info("finished test case %s" % case_id)
        else:
            logging.error("run_cases: unexpected status for test case %s: %s" %
                          (case_id, status))
            raise errors.RetryOperation()