def _detect_filesystem(self):
        if self.get_list("/api/target/") == []:
            # Attempt to ensure all the targets are mounted for the filesystem.
            for host in config["lustre_servers"]:
                self.remote_command(host["address"], "mount -a -t lustre", expected_return_code=None)

            self.add_hosts([l["address"] for l in config["lustre_servers"]])

            # Verify hosts are immutable
            response = self.chroma_manager.get("/api/host/")
            self.assertEqual(response.successful, True, response.text)
            hosts = response.json["objects"]
            self.assertEqual(len(config["lustre_servers"]), len(hosts))

            for host in hosts:
                self.assertTrue(host["immutable_state"], host)

                actions = get_actions(self.chroma_manager, [host]).json["objects"]
                available_job_classes = [x["class_name"] for x in actions]
                self.assertIn("ForceRemoveHostJob", available_job_classes)

                available_transition_states = [x["state"] for x in actions]
                self.assertIn("removed", available_transition_states)

            # Issue command to detect existing filesystem
            response = self.chroma_manager.post(
                "/api/command/",
                body={"message": "Detecting filesystems", "jobs": [{"class_name": "DetectTargetsJob", "args": {}}]},
            )
            self.assertEqual(response.successful, True, response.text)
            command = response.json

            self.wait_for_command(self.chroma_manager, command["id"], timeout=LONG_TEST_TIMEOUT)

            # Verify it detected the filesystem
            filesystem = self._filesystem
            self.assertEqual(config["filesystem"]["name"], filesystem["name"])
            self.assertTrue(filesystem["immutable_state"])
            actions = get_actions(self.chroma_manager, [filesystem]).json["objects"]
            available_states = [t["state"] for t in actions]
            self.assertIn("forgotten", available_states)
            self.assertNotIn("removed", available_states)

            # Wait for active_host_name to get set on all of the targets
            self.wait_until_true(
                lambda: (
                    len([t for t in self.get_list("/api/target/") if not t["active_host"] is None])
                    == len(config["filesystem"]["targets"])
                )
            )
    def test_available_actions(self):
        """Test that hosts actions can be looked on the JobScheduler over RPC

        actions are the union of
          1. jobs (StateChangeJob) used to put an object in an available transitional state, and
          2. jobs (AdvertisedJob) conditionally made available by an obj to be applied to the object.

        Technically these are both just jobs, and 'actions' defines the total list of those jobs.
        """

        server_config_1 = config["lustre_servers"][0]

        host = self.add_hosts([server_config_1["address"]])[0]

        self.set_state(host["lnet_configuration"], "lnet_up")

        lnet_configuration = self.get_by_uri(host["lnet_configuration"]).json
        self.assertEqual(lnet_configuration["state"], "lnet_up")

        actions = get_actions(self.chroma_manager,
                              [lnet_configuration]).json["objects"]

        returned_job_verbs = [x["verb"] for x in actions if x["verb"]]
        expected_verbs_in_order = ["Stop LNet", "Unload LNet"]
        self.assertEqual(returned_job_verbs, expected_verbs_in_order)
        def check_expected_jobs(server, expected_jobs):
            host = self.get_list("/api/host/", args={"fqdn":
                                                     server["fqdn"]})[0]

            actions = get_actions(self.chroma_manager, [host]).json["objects"]

            returned_jobs = [
                x["class_name"] for x in actions if x["class_name"]
            ]

            self.assertEqual(
                set(returned_jobs),
                set(expected_jobs),
                "Host state %s (%s)\n Host Alerts [%s]" % (
                    host["state"],
                    self.get_json_by_uri(host["resource_uri"])["state"],
                    ", ".join([
                        alert["alert_type"]
                        for alert in self.get_list("/api/alert/", {
                            "active": True,
                            "alert_item_id": host["id"]
                        })
                    ]),
                ),
            )
Exemplo n.º 4
0
        def saw_expected_transitions(test_job_classes):
            obj = self.get_json_by_uri(test_server["resource_uri"])
            actions = get_actions(self.chroma_manager, [obj]).json["objects"]
            available_job_classes = [x["class_name"] for x in actions if x["class_name"] in test_job_classes]

            logger.debug("Found these available jobs: '%s'" % available_job_classes)
            return set(available_job_classes) == set(test_job_classes) and len(available_job_classes) == len(
                test_job_classes
            )
    def _get_mount_job(self, job_class):
        self.worker = self.get_json_by_uri(self.worker["resource_uri"])

        actions = get_actions(self.chroma_manager, [self.worker]).json["objects"]

        for action in actions:
            if action.get("class_name", None) == job_class:
                return action

        return None
Exemplo n.º 6
0
    def test_jobs_advertisement(self):
        servers = self.add_hosts([s["address"] for s in self.TEST_SERVERS[0:2]])
        test_server = servers[0]

        test_job_classes = ["RebootHostJob", "ShutdownHostJob"]

        # First, ensure that reboot/shutdown jobs are advertised for a
        # server after it's been added and set up.
        def saw_expected_transitions(test_job_classes):
            obj = self.get_json_by_uri(test_server["resource_uri"])
            actions = get_actions(self.chroma_manager, [obj]).json["objects"]
            available_job_classes = [x["class_name"] for x in actions if x["class_name"] in test_job_classes]

            logger.debug("Found these available jobs: '%s'" % available_job_classes)
            return set(available_job_classes) == set(test_job_classes) and len(available_job_classes) == len(
                test_job_classes
            )

        self.wait_for_assert(lambda: self.assertTrue(saw_expected_transitions(test_job_classes)))

        # Next, kill the server to generate a HostOfflineAlert.
        self.remote_operations.kill_server(test_server["fqdn"])

        def get_host_unavailable_alerts(host):
            host_alerts = self.get_list(
                "/api/alert/",
                {
                    "active": True,
                    "alert_item_content_type_id": host["content_type_id"],
                    "alert_item_id": host["id"],
                    "alert_type__in": ["HostOfflineAlert", "HostContactAlert"],
                },
            )
            logger.debug("Found these host related alerts: '%s'" % host_alerts)
            return host_alerts

        self.wait_until_true(lambda: get_host_unavailable_alerts(test_server))

        # Check to make sure the reboot/shutdown jobs are not advertised.
        obj = self.get_json_by_uri(test_server["resource_uri"])
        actions = get_actions(self.chroma_manager, [obj]).json["objects"]
        available_job_classes = [x["class_name"] for x in actions if x["class_name"] in test_job_classes]

        self.assertListEqual(available_job_classes, [])

        # Finally, start the server back up to lower the HostOfflineAlert,
        self.remote_operations.await_server_boot(test_server["fqdn"], restart=True)
        self.wait_until_true(lambda: not get_host_unavailable_alerts(test_server))
        self.wait_until_true(
            lambda: self.get_json_by_uri(test_server["resource_uri"])["state"]
            not in ["removed", "undeployed", "unconfigured"]
        )

        # and ensure that we see the reboot/shutdown jobs again.
        self.wait_until_true(lambda: saw_expected_transitions(test_job_classes))
Exemplo n.º 7
0
    def wait_for_action(self, victim, timeout=TEST_TIMEOUT, **filters):
        """
        Check victim's available_actions until the desired action is available
        or the timeout is reached, filtering on action keys: class_name, state.
        """
        for _ in util.wait(timeout):
            obj = self.get_json_by_uri(victim["resource_uri"])

            actions = obj.get("available_actions", None)

            if actions is None:
                actions = get_actions(self.chroma_manager, [obj]).json["objects"]

            for action in actions:
                if all(action.get(key) == filters[key] for key in filters):
                    return action
        actions = [dict((key, action.get(key)) for key in filters) for action in actions]
        raise AssertionError("{0} not found in {1}".format(filters, actions))
    def test_available_transitions(self):
        """Test that hosts states can be looked up JobScheduler over RPC"""

        server_config_1 = config["lustre_servers"][0]

        # Add two hosts
        self.add_hosts([server_config_1["address"]])

        host1 = self.get_list("/api/host/",
                              args={"fqdn": server_config_1["fqdn"]})[0]

        actions = get_actions(self.chroma_manager, [host1]).json["objects"]

        #  Since no jobs are incomplete (could check it, but na...)
        #  We ought to get some available states, more than 1 at least.
        states = [x["state"] for x in actions if x["state"]]

        for state in states:
            self.assertIn(state, ["removed"])