def run(args): import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Mandatory arguments rhbuild = args["--rhbuild"] suite_files = args["--suite"] glb_file = args.get("--global-conf") if args.get("--cluster-conf"): glb_file = args["--cluster-conf"] # Deciders reuse = args.get("--reuse", None) cloud_type = args.get("--cloud", "openstack") # These are not mandatory options inventory_file = args.get("--inventory") osp_cred_file = args.get("--osp-cred") osp_cred = load_file(osp_cred_file) if osp_cred_file else dict() cleanup_name = args.get("--cleanup", None) version2 = args.get("--v2", False) ignore_latest_nightly_container = args.get("--ignore-latest-container", False) # Set log directory and get absolute path console_log_level = args.get("--log-level") log_directory = args.get("--log-dir") run_id = generate_unique_id(length=6) run_dir = create_run_dir(run_id, log_directory) metadata = TestMetaData( run_id=run_id, rhbuild=rhbuild, logstash=get_cephci_config().get("logstash", {}), ) if log.config.get("logstash"): host = log.config["logstash"]["host"] port = log.config["logstash"]["port"] version = log.config["logstash"].get("version", 1) handler = logstash.TCPLogstashHandler( host=host, port=port, version=version, ) handler.setLevel(log.log_level) root.addHandler(handler) server = f"tcp://{host}:{port}" log._logger.debug(f"Log events are also pushed to {server}") startup_log = os.path.join(run_dir, "startup.log") handler = logging.FileHandler(startup_log) handler.setLevel(logging.INFO) handler.setFormatter(formatter) root.addHandler(handler) if console_log_level: ch.setLevel(logging.getLevelName(console_log_level.upper())) log.info(f"Startup log location: {startup_log}") run_start_time = datetime.datetime.now() trigger_user = getuser() platform = None build = None base_url = None ubuntu_repo = None docker_registry = None docker_image = None docker_tag = None ceph_name = None compose_id = None if cleanup_name and not osp_cred: raise Exception("Need cloud credentials to perform cleanup.") if cleanup_name: if cloud_type == "openstack": cleanup_ceph_nodes(osp_cred, cleanup_name) elif cloud_type == "ibmc": cleanup_ibmc_ceph_nodes(osp_cred, cleanup_name) else: log.warning("Unknown cloud type.") return 0 if glb_file is None and not reuse: raise Exception("Unable to gather information about cluster layout.") if osp_cred_file is None and not reuse and cloud_type in [ "openstack", "ibmc" ]: raise Exception("Require cloud credentials to create cluster.") if inventory_file is None and not reuse and cloud_type in [ "openstack", "ibmc" ]: raise Exception( "Require system configuration information to provision.") if not version2: # Get ceph cluster version name with open("rhbuild.yaml") as fd: rhbuild_file = yaml.safe_load(fd) ceph = rhbuild_file["ceph"] rhbuild_ = None try: ceph_name, rhbuild_ = next( filter( lambda x: x, [(ceph[x]["name"], x) for x in ceph if x == rhbuild.split(".")[0]], )) except StopIteration: print( "\nERROR: Please provide correct RH build version, run exited." ) sys.exit(1) # Get base-url composes = ceph[rhbuild_]["composes"] if not base_url: if rhbuild in composes: base_url = composes[rhbuild or "latest"]["base_url"] # Get ubuntu-repo if not ubuntu_repo and rhbuild.startswith("3"): if rhbuild in composes: ubuntu_repo = composes[rhbuild or "latest"]["ubuntu_repo"] if os.environ.get("TOOL") is not None: ci_message = json.loads(os.environ["CI_MESSAGE"]) compose_id = ci_message["compose_id"] compose_url = ci_message["compose_url"] + "/" product_name = ci_message.get("product_name", None) product_version = ci_message.get("product_version", None) log.info("COMPOSE_URL = %s ", compose_url) if os.environ["TOOL"] == "pungi": # is a rhel compose log.info("trigger on CI RHEL Compose") elif os.environ["TOOL"] == "rhcephcompose": # is a ubuntu compose log.info("trigger on CI Ubuntu Compose") ubuntu_repo = compose_url log.info("using ubuntu repo" + ubuntu_repo) elif os.environ["TOOL"] == "bucko": # is a docker compose log.info("Trigger on CI Docker Compose") docker_registry, docker_tag = ci_message["repository"].split( "/rh-osbs/rhceph:") docker_image = "rh-osbs/rhceph" log.info( f"\nUsing docker registry from ci message: {docker_registry} \n" f"Docker image: {docker_image}\nDocker tag:{docker_tag}") log.warning("Using Docker insecure registry setting") docker_insecure_registry = True if product_name == "ceph": # is a rhceph compose base_url = compose_url log.info("using base url" + base_url) if not os.environ.get("TOOL") and not ignore_latest_nightly_container: try: latest_container = get_latest_container(rhbuild) except ValueError: print( "ERROR:No latest nightly container UMB msg at " "/ceph/cephci-jenkins/latest-rhceph-container-info/ " "specify using the cli args or use --ignore-latest-container" ) sys.exit(1) docker_registry = (latest_container.get("docker_registry") if not docker_registry else docker_registry) docker_image = (latest_container.get("docker_image") if not docker_image else docker_image) docker_tag = (latest_container.get("docker_tag") if not docker_tag else docker_tag) log.info( f"Using latest nightly docker image - {docker_registry}/{docker_image}:{docker_tag}" ) docker_insecure_registry = True log.warning("Using Docker insecure registry setting") else: platform = args.get("--platform", "rhel-8") build = args.get("--build", "latest") if not platform: raise TestSetupFailure("please provide --platform [rhel-7|rhel-8]") if build != "released": base_url, docker_registry, docker_image, docker_tag = fetch_build_artifacts( build, rhbuild, platform) store = args.get("--store", False) base_url = args.get("--rhs-ceph-repo") or base_url ubuntu_repo = args.get("--ubuntu-repo") or ubuntu_repo docker_registry = args.get("--docker-registry") or docker_registry docker_image = args.get("--docker-image") or docker_image docker_tag = args.get("--docker-tag") or docker_tag kernel_repo = args.get("--kernel-repo", None) docker_insecure_registry = args.get("--insecure-registry", False) post_results = args.get("--post-results") skip_setup = args.get("--skip-cluster", False) skip_subscription = args.get("--skip-subscription", False) post_to_report_portal = args.get("--report-portal", False) rp_logger = ReportPortal() instances_name = args.get("--instances-name") if instances_name: instances_name = instances_name.replace(".", "-") osp_image = args.get("--osp-image") filestore = args.get("--filestore", False) ec_pool_vals = args.get("--use-ec-pool", None) skip_version_compare = args.get("--skip-version-compare", False) custom_config = args.get("--custom-config") custom_config_file = args.get("--custom-config-file") xunit_results = args.get("--xunit-results", False) enable_eus = args.get("--enable-eus", False) skip_enabling_rhel_rpms = args.get("--skip-enabling-rhel-rpms", False) # load config, suite and inventory yaml files conf = load_file(glb_file) suite = init_suite.load_suites(suite_files) cli_arguments = f"{sys.executable} {' '.join(sys.argv)}" log.info(f"The CLI for the current run :\n{cli_arguments}\n") log.info(f"RPM Compose source - {base_url}") log.info( f"Red Hat Ceph Image used - {docker_registry}/{docker_image}:{docker_tag}" ) ceph_version = [] ceph_ansible_version = [] distro = [] clients = [] inventory = None image_name = None if inventory_file: inventory = load_file(inventory_file) if osp_image and inventory.get("instance", {}).get("create"): inventory.get("instance").get("create").update( {"image-name": osp_image}) image_name = inventory.get("instance", {}).get("create", {}).get("image-name") if inventory.get("instance", {}).get("create"): distro.append( inventory.get("instance").get("create").get("image-name")) for cluster in conf.get("globals"): if cluster.get("ceph-cluster").get("inventory"): cluster_inventory_path = os.path.abspath( cluster.get("ceph-cluster").get("inventory")) with open(cluster_inventory_path, "r") as inventory_stream: cluster_inventory = yaml.safe_load(inventory_stream) image_name = (cluster_inventory.get("instance").get("create").get( "image-name")) distro.append(image_name.replace(".iso", "")) # get COMPOSE ID and ceph version if build not in ["released", "cvp"]: if cloud_type == "openstack" or cloud_type == "baremetal": resp = requests.get(base_url + "/COMPOSE_ID", verify=False) compose_id = resp.text elif cloud_type == "ibmc": compose_id = "UNKNOWN" if "rhel" == inventory.get("id"): if cloud_type == "ibmc": ceph_pkgs = requests.get(base_url + "/Tools/Packages/", verify=False) elif cloud_type == "openstack" or cloud_type == "baremetal": ceph_pkgs = requests.get( base_url + "/compose/Tools/x86_64/os/Packages/", verify=False) m = re.search(r"ceph-common-(.*?).x86", ceph_pkgs.text) ceph_version.append(m.group(1)) m = re.search(r"ceph-ansible-(.*?).rpm", ceph_pkgs.text) ceph_ansible_version.append(m.group(1)) log.info("Compose id is: " + compose_id) else: ubuntu_pkgs = requests.get( ubuntu_repo + "/Tools/dists/xenial/main/binary-amd64/Packages") m = re.search(r"ceph\nVersion: (.*)", ubuntu_pkgs.text) ceph_version.append(m.group(1)) m = re.search(r"ceph-ansible\nVersion: (.*)", ubuntu_pkgs.text) ceph_ansible_version.append(m.group(1)) distro = ",".join(list(set(distro))) ceph_version = ", ".join(list(set(ceph_version))) ceph_ansible_version = ", ".join(list(set(ceph_ansible_version))) metadata["rhcs"] = ceph_version log.info("Testing Ceph Version: " + ceph_version) log.info("Testing Ceph Ansible Version: " + ceph_ansible_version) service = None suite_name = "::".join(suite_files) if post_to_report_portal: log.info("Creating report portal session") # Only the first file is considered for launch description. suite_file_name = suite_name.split("::")[0].split("/")[-1] suite_file_name = suite_file_name.strip(".yaml") suite_file_name = " ".join(suite_file_name.split("_")) _log = run_dir.replace("/ceph/", "http://magna002.ceph.redhat.com/") launch_name = f"RHCS {rhbuild} - {suite_file_name}" launch_desc = textwrap.dedent(""" ceph version: {ceph_version} ceph-ansible version: {ceph_ansible_version} compose-id: {compose_id} invoked-by: {user} log-location: {_log} """.format( ceph_version=ceph_version, ceph_ansible_version=ceph_ansible_version, user=getuser(), compose_id=compose_id, _log=_log, )) if docker_image and docker_registry and docker_tag: launch_desc = launch_desc + textwrap.dedent(""" docker registry: {docker_registry} docker image: {docker_image} docker tag: {docker_tag} invoked-by: {user} """.format( docker_registry=docker_registry, docker_image=docker_image, user=getuser(), docker_tag=docker_tag, )) qe_tier = get_tier_level(suite_name) attributes = dict({ "rhcs": rhbuild, "tier": qe_tier, "ceph_version": ceph_version, "os": platform if platform else "-".join(rhbuild.split("-")[1:]), }) rp_logger.start_launch(name=launch_name, description=launch_desc, attributes=attributes) def fetch_test_details(var) -> dict: """ Accepts the test and then provides the parameters of that test as a list. :param var: the test collected from the suite file :return: Returns a dictionary of the various test params """ details = dict() details["docker-containers-list"] = [] details["name"] = var.get("name") details["desc"] = var.get("desc") details["file"] = var.get("module") details["cli_arguments"] = cli_arguments details["polarion-id"] = var.get("polarion-id") polarion_default_url = "https://polarion.engineering.redhat.com/polarion/#/project/CEPH/workitem?id=" details["polarion-id-link"] = "{}{}".format(polarion_default_url, details["polarion-id"]) details["rhbuild"] = rhbuild details["cloud-type"] = cloud_type details["ceph-version"] = ceph_version details["ceph-ansible-version"] = ceph_ansible_version details["compose-id"] = compose_id details["distro"] = distro details["suite-name"] = suite_name details["suite-file"] = suite_files details["conf-file"] = glb_file details["ceph-version-name"] = ceph_name details["duration"] = "0s" details["status"] = "Not Executed" details["comments"] = var.get("comments", str()) return details if reuse is None: try: ceph_cluster_dict, clients = create_nodes( conf, inventory, osp_cred, run_id, cloud_type, service, instances_name, enable_eus=enable_eus, rp_logger=rp_logger, ) except Exception as err: log.error(err) tests = suite.get("tests") res = [] for test in tests: test = test.get("test") tmp = fetch_test_details(test) res.append(tmp) run_end_time = datetime.datetime.now() duration = divmod((run_end_time - run_start_time).total_seconds(), 60) total_time = { "start": run_start_time.strftime("%d %B %Y , %I:%M:%S %p"), "end": run_end_time.strftime("%d %B %Y , %I:%M:%S %p"), "total": f"{int(duration[0])} mins, {int(duration[1])} secs", } send_to_cephci = post_results or post_to_report_portal info = { "status": "Fail", "trace": (traceback.format_exc(limit=2)).split("\n"), } test_res = { "result": res, "run_id": run_id, "trigger_user": trigger_user, "run_directory": run_dir, "total_time": total_time, "info": info, "send_to_cephci": send_to_cephci, } email_results(test_result=test_res) return 1 else: ceph_store_nodes = open(reuse, "rb") ceph_cluster_dict = pickle.load(ceph_store_nodes) ceph_store_nodes.close() for cluster_name, cluster in ceph_cluster_dict.items(): for node in cluster: node.reconnect() if store: ceph_clusters_file = f"rerun/{instances_name}-{run_id}" if not os.path.exists(os.path.dirname(ceph_clusters_file)): os.makedirs(os.path.dirname(ceph_clusters_file)) store_cluster_state(ceph_cluster_dict, ceph_clusters_file) sys.path.append(os.path.abspath("tests")) sys.path.append(os.path.abspath("tests/rados")) sys.path.append(os.path.abspath("tests/cephadm")) sys.path.append(os.path.abspath("tests/rbd")) sys.path.append(os.path.abspath("tests/rbd_mirror")) sys.path.append(os.path.abspath("tests/cephfs")) sys.path.append(os.path.abspath("tests/iscsi")) sys.path.append(os.path.abspath("tests/rgw")) sys.path.append(os.path.abspath("tests/ceph_ansible")) sys.path.append(os.path.abspath("tests/ceph_installer")) sys.path.append(os.path.abspath("tests/mgr")) sys.path.append(os.path.abspath("tests/dashboard")) sys.path.append(os.path.abspath("tests/misc_env")) sys.path.append(os.path.abspath("tests/parallel")) sys.path.append(os.path.abspath("tests/upgrades")) tests = suite.get("tests") tcs = [] jenkins_rc = 0 # use ceph_test_data to pass around dynamic data between tests ceph_test_data = dict() ceph_test_data["custom-config"] = custom_config ceph_test_data["custom-config-file"] = custom_config_file # Initialize test return code rc = 0 for test in tests: test = test.get("test") parallel = test.get("parallel") tc = fetch_test_details(test) test_file = tc["file"] report_portal_description = tc["desc"] or "" unique_test_name = create_unique_test_name(tc["name"], test_names) test_names.append(unique_test_name) tc["log-link"] = configure_logger(unique_test_name, run_dir) mod_file_name = os.path.splitext(test_file)[0] test_mod = importlib.import_module(mod_file_name) print("\nRunning test: {test_name}".format(test_name=tc["name"])) if tc.get("log-link"): print("Test logfile location: {log_url}".format( log_url=tc["log-link"])) log.info(f"Running test {test_file}") # log.info("Running test %s", test_file) start = datetime.datetime.now() for cluster_name in test.get("clusters", ceph_cluster_dict): if test.get("clusters"): config = test.get("clusters").get(cluster_name).get( "config", {}) else: config = test.get("config", {}) if not config.get("base_url"): config["base_url"] = base_url config[ "rhbuild"] = f"{rhbuild}-{platform}" if version2 else rhbuild config["cloud-type"] = cloud_type if "ubuntu_repo" in locals(): config["ubuntu_repo"] = ubuntu_repo if skip_setup is True: config["skip_setup"] = True if skip_subscription is True: config["skip_subscription"] = True if args.get("--add-repo"): repo = args.get("--add-repo") if repo.startswith("http"): config["add-repo"] = repo config["build_type"] = build config["enable_eus"] = enable_eus config["skip_enabling_rhel_rpms"] = skip_enabling_rhel_rpms config["docker-insecure-registry"] = docker_insecure_registry config["skip_version_compare"] = skip_version_compare config["container_image"] = "%s/%s:%s" % ( docker_registry, docker_image, docker_tag, ) config["ceph_docker_registry"] = docker_registry report_portal_description += f"docker registry: {docker_registry}" config["ceph_docker_image"] = docker_image report_portal_description += f"docker image: {docker_image}" config["ceph_docker_image_tag"] = docker_tag report_portal_description += f"docker registry: {docker_registry}" if filestore: config["filestore"] = filestore if ec_pool_vals: config["ec-pool-k-m"] = ec_pool_vals if args.get("--hotfix-repo"): hotfix_repo = args.get("--hotfix-repo") if hotfix_repo.startswith("http"): config["hotfix_repo"] = hotfix_repo if kernel_repo is not None: config["kernel-repo"] = kernel_repo if osp_cred: config["osp_cred"] = osp_cred # if Kernel Repo is defined in ENV then set the value in config if os.environ.get("KERNEL-REPO-URL") is not None: config["kernel-repo"] = os.environ.get("KERNEL-REPO-URL") try: if post_to_report_portal: rp_logger.start_test_item( name=unique_test_name, description=report_portal_description, item_type="STEP", ) rp_logger.log( message=f"Logfile location - {tc['log-link']}") rp_logger.log(message=f"Polarion ID: {tc['polarion-id']}") # Initialize the cluster with the expected rhcs_version hence the # precedence would be from test suite. # rhbuild would start with the version for example 5.0 or 4.2-rhel-7 _rhcs_version = test.get("ceph_rhcs_version", rhbuild[:3]) ceph_cluster_dict[cluster_name].rhcs_version = _rhcs_version rc = test_mod.run( ceph_cluster=ceph_cluster_dict[cluster_name], ceph_nodes=ceph_cluster_dict[cluster_name], config=config, parallel=parallel, test_data=ceph_test_data, ceph_cluster_dict=ceph_cluster_dict, clients=clients, ) except BaseException: # noqa if post_to_report_portal: rp_logger.log(message=traceback.format_exc(), level="ERROR") log.error(traceback.format_exc()) rc = 1 finally: collect_recipe(ceph_cluster_dict[cluster_name]) if store: store_cluster_state(ceph_cluster_dict, ceph_clusters_file) if rc != 0: break elapsed = datetime.datetime.now() - start tc["duration"] = elapsed # Write to report portal if post_to_report_portal: rp_logger.finish_test_item( status="PASSED" if rc == 0 else "FAILED") if rc == 0: tc["status"] = "Pass" msg = "Test {} passed".format(test_mod) log.info(msg) print(msg) if post_results: post_to_polarion(tc=tc) else: tc["status"] = "Failed" msg = "Test {} failed".format(test_mod) log.info(msg) print(msg) jenkins_rc = 1 if post_results: post_to_polarion(tc=tc) if test.get("abort-on-fail", False): log.info("Aborting on test failure") tcs.append(tc) break if test.get("destroy-cluster") is True: if cloud_type == "openstack": cleanup_ceph_nodes(osp_cred, instances_name) elif cloud_type == "ibmc": cleanup_ibmc_ceph_nodes(osp_cred, instances_name) if test.get("recreate-cluster") is True: ceph_cluster_dict, clients = create_nodes( conf, inventory, osp_cred, run_id, cloud_type, service, instances_name, enable_eus=enable_eus, ) tcs.append(tc) url_base = (magna_url + run_dir.split("/")[-1] if "/ceph/cephci-jenkins" in run_dir else run_dir) log.info("\nAll test logs located here: {base}".format(base=url_base)) close_and_remove_filehandlers() test_run_metadata = { "build": rhbuild, "polarion-project-id": "CEPH", "suite-name": suite_name, "distro": distro, "ceph-version": ceph_version, "ceph-ansible-version": ceph_ansible_version, "base_url": base_url, "container-registry": docker_registry, "container-image": docker_image, "container-tag": docker_tag, "compose-id": compose_id, "log-dir": run_dir, "run-id": run_id, } if post_to_report_portal: rp_logger.finish_launch() if xunit_results: create_xunit_results(suite_name, tcs, test_run_metadata) print("\nAll test logs located here: {base}".format(base=url_base)) print_results(tcs) send_to_cephci = post_results or post_to_report_portal run_end_time = datetime.datetime.now() duration = divmod((run_end_time - run_start_time).total_seconds(), 60) total_time = { "start": run_start_time.strftime("%d %B %Y , %I:%M:%S %p"), "end": run_end_time.strftime("%d %B %Y , %I:%M:%S %p"), "total": f"{int(duration[0])} mins, {int(duration[1])} secs", } info = {"status": "Pass"} test_res = { "result": tcs, "run_id": run_id, "trigger_user": trigger_user, "run_directory": run_dir, "total_time": total_time, "info": info, "send_to_cephci": send_to_cephci, } email_results(test_result=test_res) return jenkins_rc
def create_nodes( conf, inventory, osp_cred, run_id, cloud_type="openstack", report_portal_session=None, instances_name=None, enable_eus=False, rp_logger: Optional[ReportPortal] = None, ): """Creates the system under test environment.""" if report_portal_session: name = create_unique_test_name("ceph node creation", test_names) test_names.append(name) desc = "Ceph cluster preparation" rp_logger.start_test_item(name=name, description=desc, item_type="STEP") log.info("Destroying existing osp instances..") if cloud_type == "openstack": cleanup_ceph_nodes(osp_cred, instances_name) elif cloud_type == "ibmc": cleanup_ibmc_ceph_nodes(osp_cred, instances_name) ceph_cluster_dict = {} log.info("Creating osp instances") clients = [] for cluster in conf.get("globals"): if cloud_type == "openstack": ceph_vmnodes = create_ceph_nodes( cluster, inventory, osp_cred, run_id, instances_name, enable_eus=enable_eus, ) elif cloud_type == "ibmc": ceph_vmnodes = create_ibmc_ceph_nodes(cluster, inventory, osp_cred, run_id, instances_name) elif cloud_type == "baremetal": ceph_vmnodes = create_baremetal_ceph_nodes(cluster) ceph_nodes = [] root_password = None for node in ceph_vmnodes.values(): look_for_key = False private_key_path = "" if cloud_type == "openstack": private_ip = node.get_private_ip() elif cloud_type == "baremetal": private_key_path = node.private_key if node.private_key else "" private_ip = node.ip_address look_for_key = True if node.private_key else False root_password = node.root_password elif cloud_type == "ibmc": glbs = osp_cred.get("globals") ibmc = glbs.get("ibm-credentials") private_key_path = ibmc.get("private_key_path") private_ip = node.ip_address look_for_key = True if node.role == "win-iscsi-clients": clients.append( WinNode(ip_address=node.ip_address, private_ip=private_ip)) else: ceph = CephNode( username="******", password="******", root_password="******" if not root_password else root_password, look_for_key=look_for_key, private_key_path=private_key_path, root_login=node.root_login, role=node.role, no_of_volumes=node.no_of_volumes, ip_address=node.ip_address, subnet=node.subnet, private_ip=private_ip, hostname=node.hostname, ceph_vmnode=node, ) ceph_nodes.append(ceph) cluster_name = cluster.get("ceph-cluster").get("name", "ceph") ceph_cluster_dict[cluster_name] = Ceph(cluster_name, ceph_nodes) # TODO: refactor cluster dict to cluster list log.info("Done creating osp instances") log.info("Waiting for Floating IPs to be available") log.info("Sleeping 15 Seconds") time.sleep(15) for cluster_name, cluster in ceph_cluster_dict.items(): for instance in cluster: try: instance.connect() except BaseException: rp_logger.finish_test_item(status="FAILED") raise rp_logger.finish_test_item(status="PASSED") return ceph_cluster_dict, clients