Пример #1
0
def test_deployment(pvc_factory, pod_factory):
    deploy = config.RUN['cli_params'].get('deploy')
    teardown = config.RUN['cli_params'].get('teardown')
    if not teardown or deploy:
        log.info("Verifying OCP cluster is running")
        assert is_cluster_running(config.ENV_DATA['cluster_path'])
        if not config.ENV_DATA['skip_ocs_deployment']:
            ocs_registry_image = config.DEPLOYMENT.get(
                'ocs_registry_image'
            )
            ocs_install_verification(ocs_registry_image=ocs_registry_image)

            # Check basic cluster functionality by creating resources
            # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
            # run IO and delete the resources
            if config.DEPLOYMENT['external_mode']:
                sanity_helpers = SanityExternalCluster()
            else:
                sanity_helpers = Sanity()
            sanity_helpers.health_check()
            sanity_helpers.create_resources(pvc_factory, pod_factory)
            sanity_helpers.delete_resources()

    if teardown:
        log.info(
            "Cluster will be destroyed during teardown part of this test."
        )
Пример #2
0
    def deploy_cluster(self, log_cli_level='DEBUG'):
        """
        We are handling both OCP and OCS deployment here based on flags

        Args:
            log_cli_level (str): log level for installer (default: DEBUG)
        """
        if not config.ENV_DATA['skip_ocp_deployment']:
            if is_cluster_running(self.cluster_path):
                logger.warning(
                    "OCP cluster is already running, skipping installation")
            else:
                try:
                    self.deploy_ocp(log_cli_level)
                    self.post_ocp_deploy()
                except Exception as e:
                    logger.error(e)
                    if config.REPORTING['gather_on_deploy_failure']:
                        collect_ocs_logs('deployment', ocs=False)
                    raise

        if not config.ENV_DATA['skip_ocs_deployment']:
            try:
                self.deploy_ocs()
            except Exception as e:
                logger.error(e)
                if config.REPORTING['gather_on_deploy_failure']:
                    # Let's do the collections separately to guard against one
                    # of them failing
                    collect_ocs_logs('deployment', ocs=False)
                    collect_ocs_logs('deployment', ocp=False)
                raise
        else:
            logger.warning("OCS deployment will be skipped")
Пример #3
0
def test_deployment():
    deploy = config.RUN['cli_params'].get('deploy')
    teardown = config.RUN['cli_params'].get('teardown')
    if not teardown or deploy:
        assert is_cluster_running(config.ENV_DATA['cluster_path'])

    if teardown:
        log.info(
            "Cluster will be destroyed during teardown part of this test.")
Пример #4
0
def test_deployment():
    deploy = config.RUN['cli_params'].get('deploy')
    teardown = config.RUN['cli_params'].get('teardown')
    if not teardown or deploy:
        log.info("Verifying OCP cluster is running")
        assert is_cluster_running(config.ENV_DATA['cluster_path'])
        if not config.ENV_DATA['skip_ocs_deployment']:
            ocs_install_verification()

    if teardown:
        log.info(
            "Cluster will be destroyed during teardown part of this test.")
Пример #5
0
def test_deployment(pvc_factory, pod_factory):
    deploy = config.RUN["cli_params"].get("deploy")
    teardown = config.RUN["cli_params"].get("teardown")
    if not teardown or deploy:
        log.info("Verifying OCP cluster is running")
        assert is_cluster_running(config.ENV_DATA["cluster_path"])
        if not config.ENV_DATA["skip_ocs_deployment"]:
            if config.multicluster:
                restore_ctx_index = config.cur_index
                for cluster in get_non_acm_cluster_config():
                    config.switch_ctx(
                        cluster.MULTICLUSTER["multicluster_index"])
                    log.info(
                        f"Sanity check for cluster: {cluster.ENV_DATA['cluster_name']}"
                    )
                    sanity_helpers = Sanity()
                    sanity_helpers.health_check()
                    sanity_helpers.delete_resources()
                config.switch_ctx(restore_ctx_index)
            else:
                ocs_registry_image = config.DEPLOYMENT.get(
                    "ocs_registry_image")
                if config.ENV_DATA["mcg_only_deployment"]:
                    mcg_only_install_verification(
                        ocs_registry_image=ocs_registry_image)
                    return
                else:
                    ocs_install_verification(
                        ocs_registry_image=ocs_registry_image)

                # Check basic cluster functionality by creating resources
                # (pools, storageclasses, PVCs, pods - both CephFS and RBD),
                # run IO and delete the resources
                if config.DEPLOYMENT["external_mode"]:
                    sanity_helpers = SanityExternalCluster()
                else:
                    sanity_helpers = Sanity()
                if (config.ENV_DATA["platform"].lower()
                        in constants.MANAGED_SERVICE_PLATFORMS):
                    try:
                        sanity_helpers.health_check()
                    except exceptions.ResourceWrongStatusException as err_msg:
                        log.warning(err_msg)
                else:
                    sanity_helpers.health_check()
                sanity_helpers.delete_resources()
                # Verify ceph health
                log.info("Verifying ceph health after deployment")
                assert ceph_health_check(tries=10, delay=30)

    if teardown:
        log.info(
            "Cluster will be destroyed during teardown part of this test.")
Пример #6
0
    def deploy_cluster(self, log_cli_level='DEBUG'):
        """
        We are handling both OCP and OCS deployment here based on flags

        Args:
            log_cli_level (str): log level for installer (default: DEBUG)
        """
        if not config.ENV_DATA['skip_ocp_deployment']:
            if is_cluster_running(self.cluster_path):
                logger.warning(
                    "OCP cluster is already running, skipping installation")
            else:
                self.deploy_ocp(log_cli_level)

        if not config.ENV_DATA['skip_ocs_deployment']:
            self.deploy_ocs()
        else:
            logger.warning("OCS deployment will be skipped")
Пример #7
0
def test_cluster_is_running():
    assert is_cluster_running(config.ENV_DATA['cluster_path'])
Пример #8
0
def cluster(request):
    log.info(f"All logs located at {log_path}")
    log.info("Running OCS basic installation")
    cluster_path = config.ENV_DATA['cluster_path']
    deploy = config.RUN['cli_params']['deploy']
    teardown = config.RUN['cli_params']['teardown']
    # Add a finalizer to teardown the cluster after test execution is finished
    if teardown:
        request.addfinalizer(cluster_teardown)
        log.info("Will teardown cluster because --teardown was provided")
    # Test cluster access and if exist just skip the deployment.
    if is_cluster_running(cluster_path):
        log.info("The installation is skipped because the cluster is running")
        return
    elif teardown and not deploy:
        log.info("Attempting teardown of non-accessible cluster: %s",
                 cluster_path)
        return
    elif not deploy and not teardown:
        msg = "The given cluster can not be connected to: {}. ".format(
            cluster_path)
        msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster"
        pytest.fail(msg)
    elif not system.is_path_empty(cluster_path) and deploy:
        msg = "The given cluster path is not empty: {}. ".format(cluster_path)
        msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster"
        pytest.fail(msg)
    else:
        log.info(
            "A testing cluster will be deployed and cluster information stored at: %s",
            cluster_path)

    # Generate install-config from template
    log.info("Generating install-config")
    pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret")

    # TODO: check for supported platform and raise the exception if not
    # supported. Currently we support just AWS.

    _templating = templating.Templating()
    install_config_str = _templating.render_template("install-config.yaml.j2",
                                                     config.ENV_DATA)
    # Log the install config *before* adding the pull secret, so we don't leak
    # sensitive data.
    log.info(f"Install config: \n{install_config_str}")
    # Parse the rendered YAML so that we can manipulate the object directly
    install_config_obj = yaml.safe_load(install_config_str)
    with open(pull_secret_path, "r") as f:
        # Parse, then unparse, the JSON file.
        # We do this for two reasons: to ensure it is well-formatted, and
        # also to ensure it ends up as a single line.
        install_config_obj['pullSecret'] = json.dumps(json.loads(f.read()))
    install_config_str = yaml.safe_dump(install_config_obj)
    install_config = os.path.join(cluster_path, "install-config.yaml")
    with open(install_config, "w") as f:
        f.write(install_config_str)

    # Download installer
    installer = get_openshift_installer(config.DEPLOYMENT['installer_version'])
    # Download client
    get_openshift_client()

    # Deploy cluster
    log.info("Deploying cluster")
    run_cmd(f"{installer} create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug")

    # Test cluster access
    if not OCP.set_kubeconfig(
            os.path.join(cluster_path, config.RUN.get('kubeconfig_location'))):
        pytest.fail("Cluster is not available!")

    # TODO: Create cluster object, add to config.ENV_DATA for other tests to
    # utilize.
    # Determine worker pattern and create ebs volumes
    with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
        tfvars = json.load(f)

    cluster_id = tfvars['cluster_id']
    worker_pattern = f'{cluster_id}-worker*'
    log.info(f'Worker pattern: {worker_pattern}')
    create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region'])

    # render templates and create resources
    create_oc_resource('common.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"')
    run_cmd(f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {config.ENV_DATA['cluster_namespace']}")
    apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-provisioner-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    apply_oc_resource('csi-provisioner-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    # Increased to 15 seconds as 10 is not enough
    # TODO: do the sampler function and check if resource exist
    wait_time = 15
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('operator-openshift-with-csi.yaml', cluster_path,
                       _templating, config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    create_oc_resource('cluster.yaml', cluster_path, _templating,
                       config.ENV_DATA)

    POD = ocp.OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
    CFS = ocp.OCP(kind=constants.CEPHFILESYSTEM,
                  namespace=config.ENV_DATA['cluster_namespace'])

    # Check for the Running status of Ceph Pods
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-agent "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mon',
                                 resource_count=3,
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mgr',
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-osd',
                                 resource_count=3,
                                 timeout=600)

    create_oc_resource('toolbox.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('storage-manifest.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("service-monitor.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("prometheus-rules.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)

    # Create MDS pods for CephFileSystem
    fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML)
    fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace']

    ceph_obj = OCS(**fs_data)
    ceph_obj.create()
    assert POD.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector='app=rook-ceph-mds',
                                 resource_count=2,
                                 timeout=600)

    # Check for CephFilesystem creation in ocp
    cfs_data = CFS.get()
    cfs_name = cfs_data['items'][0]['metadata']['name']

    if helpers.validate_cephfilesystem(cfs_name):
        log.info(f"MDS deployment is successful!")
        defaults.CEPHFILESYSTEM_NAME = cfs_name
    else:
        log.error(f"MDS deployment Failed! Please check logs!")

    # Verify health of ceph cluster
    # TODO: move destroy cluster logic to new CLI usage pattern?
    log.info("Done creating rook resources, waiting for HEALTH_OK")
    assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
Пример #9
0
    def deploy_cluster(self, log_cli_level="DEBUG"):
        """
        We are handling both OCP and OCS deployment here based on flags

        Args:
            log_cli_level (str): log level for installer (default: DEBUG)
        """
        if not config.ENV_DATA["skip_ocp_deployment"]:
            if is_cluster_running(self.cluster_path):
                logger.warning(
                    "OCP cluster is already running, skipping installation")
            else:
                try:
                    self.deploy_ocp(log_cli_level)
                    self.post_ocp_deploy()
                except Exception as e:
                    config.RUN["is_ocp_deployment_failed"] = True
                    logger.error(e)
                    if config.REPORTING["gather_on_deploy_failure"]:
                        collect_ocs_logs("deployment", ocs=False)
                    raise

        # Deployment of network split scripts via machineconfig API happens
        # before OCS deployment.
        if config.DEPLOYMENT.get("network_split_setup"):
            master_zones = config.ENV_DATA.get("master_availability_zones")
            worker_zones = config.ENV_DATA.get("worker_availability_zones")
            # special external zone, which is directly defined by ip addr list,
            # such zone could represent external services, which we could block
            # access to via ax-bx-cx network split
            if config.DEPLOYMENT.get("network_split_zonex_addrs") is not None:
                x_addr_list = config.DEPLOYMENT[
                    "network_split_zonex_addrs"].split(",")
            else:
                x_addr_list = None
            if config.DEPLOYMENT.get("arbiter_deployment"):
                arbiter_zone = self.get_arbiter_location()
                logger.debug("detected arbiter zone: %s", arbiter_zone)
            else:
                arbiter_zone = None
            # TODO: use temporary directory for all temporary files of
            # ocs-deployment, not just here in this particular case
            tmp_path = Path(tempfile.mkdtemp(prefix="ocs-ci-deployment-"))
            logger.debug("created temporary directory %s", tmp_path)
            setup_netsplit(tmp_path, master_zones, worker_zones, x_addr_list,
                           arbiter_zone)

        if not config.ENV_DATA["skip_ocs_deployment"]:
            try:
                self.deploy_ocs()

                if config.REPORTING["collect_logs_on_success_run"]:
                    collect_ocs_logs("deployment",
                                     ocp=False,
                                     status_failure=False)
            except Exception as e:
                logger.error(e)
                if config.REPORTING["gather_on_deploy_failure"]:
                    # Let's do the collections separately to guard against one
                    # of them failing
                    collect_ocs_logs("deployment", ocs=False)
                    collect_ocs_logs("deployment", ocp=False)
                raise
        else:
            logger.warning("OCS deployment will be skipped")