示例#1
0
class TestAgent(unittest.TestCase, AgentCommonTests):
    def shortDescription(self):
        return None

    def configure_host(self):
        config_req = Host.GetConfigRequest()
        host_config = self.host_client.get_host_config(config_req).hostConfig

        leaf_scheduler = SchedulerRole(stable_uuid("leaf scheduler"))
        leaf_scheduler.parent_id = stable_uuid("parent scheduler")
        leaf_scheduler.hosts = [host_config.agent_id]

        config_request = ConfigureRequest(
            stable_uuid("leaf scheduler"),
            Roles([leaf_scheduler]))

        self.host_client.configure(config_request)

    def test_bootstrap(self):
        # Verify the negative case first as it has no sideeffect.
        self._update_agent_invalid_config()
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        # Don't set availability zone since
        # self._update_agent_config() sets it.
        del new_config["--availability-zone"]
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        req = self._update_agent_config()

        # Start back the agent and verify that the config seen by the agent is
        # the same we requested for.
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        self._validate_post_boostrap_config(req)

    @property
    def agent_in_uwsim(self):
        return False

    def setUp(self):
        self.runtime = RuntimeUtils(self.id())
        self.config = self.runtime.get_default_agent_config()
        self._datastores = ["datastore1", "datastore2"]
        self.config["--datastores"] = ",".join(self._datastores)
        res = self.runtime.start_agent(self.config)
        self.proc, self.host_client, self.control_client = res
        self.configure_host()
        self.set_host_mode(HostMode.NORMAL)
        self.clear_datastore_tags()

    def tearDown(self):
        self.runtime.cleanup()

    def test_agent_with_invalid_vsi(self):
        self.runtime.stop_agent(self.proc)
        # Since the config file takes precedence over the command line
        # options we need to remove the directory that was created
        # from the start_agent in the setup method.
        shutil.rmtree(self.config["--config-path"])
        new_config = self.config.copy()
        new_config["--hypervisor"] = "esx"
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        time_waited = 0
        while self._agent_running() and time_waited < 5:
            time.sleep(0.2)
            time_waited += 0.2
        self.assertFalse(self._agent_running())

    def test_datastore_parse(self):
        """Test that the agent parses datastore args"""
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        new_config["--datastores"] = " ds1,ds2, ds3, ds4 "
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res

        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        datastore_ids = [ds.id for ds in response.hostConfig.datastores]
        expected_ids = [str(uuid.uuid5(uuid.NAMESPACE_DNS, name))
                        for name in ["ds1", "ds2", "ds3", "ds4"]]
        assert_that(datastore_ids, equal_to(expected_ids))

    def test_management_only_parse(self):
        # Default option for management_only is False
        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        assert_that(response.result, equal_to(Host.GetConfigResultCode.OK))
        assert_that(response.hostConfig.management_only, equal_to(False))

        # Restart agent with --management-only option, test the flag is set
        # to True
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        new_config["--management-only"] = None
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res

        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        assert_that(response.result, equal_to(Host.GetConfigResultCode.OK))
        assert_that(response.hostConfig.management_only, equal_to(True))

    def test_create_vm_with_ephemeral_disks_ttylinux(self):
        self._test_create_vm_with_ephemeral_disks("ttylinux")

    def test_create_vm_with_ephemeral_disks(self):
        image_dir = os.path.join(
            "/tmp/images",
            FakeHypervisor.datastore_id(self.get_image_datastore()))

        try:
            mkdir_p(image_dir)
            with tempfile.NamedTemporaryFile(dir=image_dir,
                                             suffix=".vmdk") as f:
                # The temp file name created is
                # "/tmp/image/<ds>/<uniquepart>.vmdk".
                # This simulates an image being present on the agent,
                # The file is deleted on leaving the context.
                image_id = f.name[f.name.rfind("/") + 1:-5]
                self._test_create_vm_with_ephemeral_disks(image_id)
        finally:
            rm_rf(image_dir)

    def _agent_running(self):
        if not self.proc:
            return False

        try:
            os.kill(self.proc.pid, 0)
            return True
        except OSError as e:
            if e.errno == errno.ESRCH:
                return False
            elif e.errno == errno.EPERM:
                return True
            else:
                raise e
示例#2
0
class TestAgent(unittest.TestCase, AgentCommonTests):
    def shortDescription(self):
        return None

    def configure_host(self):
        config_req = Host.GetConfigRequest()
        host_config = self.host_client.get_host_config(config_req).hostConfig

        leaf_scheduler = SchedulerRole(stable_uuid("leaf scheduler"))
        leaf_scheduler.parent_id = stable_uuid("parent scheduler")
        leaf_scheduler.hosts = [host_config.agent_id]

        config_request = ConfigureRequest(stable_uuid("leaf scheduler"),
                                          Roles([leaf_scheduler]))

        self.host_client.configure(config_request)

    def test_bootstrap(self):
        # Verify the negative case first as it has no sideeffect.
        self._update_agent_invalid_config()
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        # Don't set availability zone since
        # self._update_agent_config() sets it.
        del new_config["--availability-zone"]
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        req = self._update_agent_config()

        # Start back the agent and verify that the config seen by the agent is
        # the same we requested for.
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        self._validate_post_boostrap_config(req)

    @property
    def agent_in_uwsim(self):
        return False

    def setUp(self):
        self.runtime = RuntimeUtils(self.id())
        self.config = self.runtime.get_default_agent_config()
        self._datastores = ["datastore1", "datastore2"]
        self.config["--datastores"] = ",".join(self._datastores)
        res = self.runtime.start_agent(self.config)
        self.proc, self.host_client, self.control_client = res
        self.configure_host()
        self.set_host_mode(HostMode.NORMAL)
        self.clear_datastore_tags()

    def tearDown(self):
        self.runtime.cleanup()

    def test_agent_with_invalid_vsi(self):
        self.runtime.stop_agent(self.proc)
        # Since the config file takes precedence over the command line
        # options we need to remove the directory that was created
        # from the start_agent in the setup method.
        shutil.rmtree(self.config["--config-path"])
        new_config = self.config.copy()
        new_config["--hypervisor"] = "esx"
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res
        time_waited = 0
        while self._agent_running() and time_waited < 5:
            time.sleep(0.2)
            time_waited += 0.2
        self.assertFalse(self._agent_running())

    def test_datastore_parse(self):
        """Test that the agent parses datastore args"""
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        new_config["--datastores"] = " ds1,ds2, ds3, ds4 "
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res

        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        datastore_ids = [ds.id for ds in response.hostConfig.datastores]
        expected_ids = [
            str(uuid.uuid5(uuid.NAMESPACE_DNS, name))
            for name in ["ds1", "ds2", "ds3", "ds4"]
        ]
        assert_that(datastore_ids, equal_to(expected_ids))

    def test_management_only_parse(self):
        # Default option for management_only is False
        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        assert_that(response.result, equal_to(Host.GetConfigResultCode.OK))
        assert_that(response.hostConfig.management_only, equal_to(False))

        # Restart agent with --management-only option, test the flag is set
        # to True
        self.runtime.stop_agent(self.proc)
        new_config = self.config.copy()
        new_config["--management-only"] = None
        res = self.runtime.start_agent(new_config)
        self.proc, self.host_client, self.control_client = res

        request = Host.GetConfigRequest()
        response = self.host_client.get_host_config(request)

        assert_that(response.result, equal_to(Host.GetConfigResultCode.OK))
        assert_that(response.hostConfig.management_only, equal_to(True))

    def test_create_vm_with_ephemeral_disks_ttylinux(self):
        self._test_create_vm_with_ephemeral_disks("ttylinux")

    def test_create_vm_with_ephemeral_disks(self):
        image_dir = os.path.join(
            "/tmp/images",
            FakeHypervisor.datastore_id(self.get_image_datastore()))

        try:
            mkdir_p(image_dir)
            with tempfile.NamedTemporaryFile(dir=image_dir,
                                             suffix=".vmdk") as f:
                # The temp file name created is
                # "/tmp/image/<ds>/<uniquepart>.vmdk".
                # This simulates an image being present on the agent,
                # The file is deleted on leaving the context.
                image_id = f.name[f.name.rfind("/") + 1:-5]
                self._test_create_vm_with_ephemeral_disks(image_id)
        finally:
            rm_rf(image_dir)

    def _agent_running(self):
        if not self.proc:
            return False

        try:
            os.kill(self.proc.pid, 0)
            return True
        except OSError as e:
            if e.errno == errno.ESRCH:
                return False
            elif e.errno == errno.EPERM:
                return True
            else:
                raise e
示例#3
0
class TestChairman(BaseKazooTestCase):
    """
    Test starts a zookeeper server and a chairman process.
    """
    def get_register_host_request(self, port=8080):
        """
        Generates a random register host request
            which has same datastore and same availability zone
        """
        host_id = str(uuid.uuid4())
        if not hasattr(self, "image_datastore"):
            self.image_datastore = str(uuid.uuid4())

        datastores = [Datastore(self.image_datastore)]
        networks = [Network("nw1", [NetworkType.VM])]
        host_config = HostConfig(agent_id=host_id,
                                 datastores=datastores,
                                 address=ServerAddress("127.0.0.1", port=port),
                                 networks=networks)
        host_config.availability_zone = "foo"
        host_config.image_datastore_ids = set(self.image_datastore)
        return RegisterHostRequest(host_id, host_config)

    def report_missing(self):
        """
        Generates a random missing request with two hosts and two schedulers.
        """
        scheduler_id = str(uuid.uuid4())
        schedulers = [str(uuid.uuid4()), str(uuid.uuid4())]
        hosts = [str(uuid.uuid4()), str(uuid.uuid4())]
        return ReportMissingRequest(scheduler_id, schedulers, hosts)

    def report_missing_hosts(self):
        scheduler_id = str(uuid.uuid4())
        hosts = [str(uuid.uuid4()), str(uuid.uuid4())]
        return ReportMissingRequest(scheduler_id, None, hosts)

    def disconnect(self):
        """ Disconnect from the chairman instance and close the transport """
        for transport in self.transports:
            transport.close()

    def setUp(self):
        self.set_up_kazoo_base()
        self.procs = []
        self.thrift_server = None
        self.transports = []
        self.runtime = RuntimeUtils(self.id())

        self.runtime.start_cloud_store()
        host, port = '127.0.0.1', 13000
        self.runtime.start_chairman(host, port)

        (transport, self.chairman_client) = create_chairman_client(host, port)
        self.transports.append(transport)

        # Wait for chairman to finish their elections
        _wait_on_code(self.chairman_client.get_schedulers,
                      GetSchedulersResultCode.OK, GetSchedulersRequest)

    def tearDown(self):
        if self.thrift_server:
            self.thrift_server.stop_server()

        self.disconnect()

        for proc in self.procs:
            stop_service(proc)

        self.runtime.cleanup()
        self.tear_down_kazoo_base()

    def test_get_status(self):
        chairman_client_leader = self.chairman_client

        # starts 2 more chairman instances
        host = '127.0.0.1'
        port_1 = 13001
        port_2 = 13002
        self.runtime.start_chairman(host, port_1)
        self.runtime.start_chairman(host, port_2)
        (transport_1, chairman_client_non_leader_1) = \
            create_chairman_client(host, port_1)
        (transport_2, chairman_client_non_leader_2) = \
            create_chairman_client(host, port_2)
        self.transports.append(transport_1)
        self.transports.append(transport_2)

        h_id_config = {}

        # Register two hosts with the chairman leader.
        reg_host_req_1 = self.get_register_host_request()
        server_address = reg_host_req_1.config.address.host
        server_port = reg_host_req_1.config.address.port
        h_id_config[reg_host_req_1.id] = reg_host_req_1.config

        host_handler = AgentHandler(2)
        self.thrift_server = ThriftServer(server_address, server_port,
                                          host_handler)
        self.thrift_server.start_server()

        rc = chairman_client_leader.register_host(reg_host_req_1)
        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        reg_host_req_2 = self.get_register_host_request()
        h_id_config[reg_host_req_2.id] = reg_host_req_2.config

        rc = self.chairman_client.register_host(reg_host_req_2)
        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        # verify that all the agents received configurations
        host_handler.received_all.wait(20)
        assert_that(len(host_handler.configs), is_(len(h_id_config)))

        # verify chairman leader is in READY status
        get_status_request = GetStatusRequest()
        rc = chairman_client_leader.get_status(get_status_request)
        self.assertEqual(rc.type, StatusType.READY)

        # verify the second chairman which is not leader is in READY status
        # while it notices that there is a leader exist
        rc = chairman_client_non_leader_1.get_status(get_status_request)
        self.assertEqual(rc.type, StatusType.READY)

        # verify the third chairman which is not leader is in READY status
        # while it notices that there is a leader exist
        rc = chairman_client_non_leader_2.get_status(get_status_request)
        self.assertEqual(rc.type, StatusType.READY)

        client = self._get_nonchroot_client()
        client.start()

        read_chairman_leader = client.get_children(CHAIRMAN_SERVICE)[0]

        # normalize from unicode to str
        read_chairman_leader = \
            normalize('NFKD', read_chairman_leader).encode('ascii', 'ignore')

        # expecting chairman leader being deleted
        leader_deleted_event = async_wait_for(EventType.DELETED,
                                              CHAIRMAN_SERVICE,
                                              read_chairman_leader, client)

        # deleting chairman leader from service
        client.delete(CHAIRMAN_SERVICE + "/" + read_chairman_leader)

        leader_deleted_event.wait(20)
        self.assertTrue(leader_deleted_event.isSet())

        def wait_for_status(chairman_client, status):
            retries = 0

            while (retries < 10):
                try:
                    rc = chairman_client.get_status(get_status_request)
                    if rc.type != status:
                        break
                except:
                    logger.exception("get_status() failed")
                retries += 1
                time.sleep(1)
            return rc

    def test_register_host(self):
        """ Register against the chairman and verify it is persisted in zk """
        host_prefix = "/hosts"  # Const in java impl.

        # Register two hosts with the chairman.
        host = []
        h = self.get_register_host_request()
        host.append(h.config)

        retries = 0
        while (retries < 10):
            rc = self.chairman_client.register_host(h)

            if (rc.result == RegisterHostResultCode.NOT_IN_MAJORITY):
                # Possible because the chairman is yet to connect to zk
                retries += 1
                time.sleep(1)
                continue
            break

        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        h = self.get_register_host_request()
        host.append(h.config)
        rc = self.chairman_client.register_host(h)
        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        # Validate the state persisted in zk.
        client = self._get_nonchroot_client()
        client.start()
        self.assertTrue(client.exists(host_prefix))
        read_hosts = client.get_children(host_prefix)
        for h in read_hosts:
            path = host_prefix + "/" + h
            (value, stat) = client.get(path)
            host_config = HostConfig()
            deserialize(host_config, value)
            self.assertTrue(host_config in host)
        client.stop()

    def test_report_missing(self):
        """ Report a set of hosts are missing to the chairman and verify that
            it is persisted in zk.
        """
        missing_prefix = "/missing"
        # Report two hosts are missing.
        missing_req = self.report_missing()
        retries = 0
        while (retries < 10):
            rc = self.chairman_client.report_missing(missing_req)
            if (rc.result == ReportMissingResultCode.NOT_IN_MAJORITY):
                # Possible because the chairman is yet to connect to zk
                retries += 1
                time.sleep(1)
                continue
            break

        self.assertEqual(rc.result, ReportMissingResultCode.OK)
        nodes = missing_req.hosts
        nodes += missing_req.schedulers

        missing_req = self.report_missing_hosts()
        rc = self.chairman_client.report_missing(missing_req)
        self.assertEqual(rc.result, ReportMissingResultCode.OK)
        nodes += missing_req.hosts

        # Validate the state persisted in zk.
        client = self._get_nonchroot_client()
        client.start()
        self.assertTrue(client.exists(missing_prefix))
        missing_hosts = client.get_children(missing_prefix)
        self.assertEqual(len(missing_hosts), len(nodes))
        for host in missing_hosts:
            self.assertTrue(host in nodes)

        client.stop()

    def test_unregister_host(self):
        """unregister host after host being registered or missing
        """
        h_id_config = {}

        # Register two hosts with the chairman.
        reg_host_req_1 = self.get_register_host_request()
        server_address = reg_host_req_1.config.address.host
        server_port = reg_host_req_1.config.address.port
        h_id_config[reg_host_req_1.id] = reg_host_req_1.config

        host_handler = AgentHandler(2)
        self.thrift_server = ThriftServer(server_address, server_port,
                                          host_handler)
        self.thrift_server.start_server()

        rc = self.chairman_client.register_host(reg_host_req_1)
        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        reg_host_req_2 = self.get_register_host_request()
        h_id_config[reg_host_req_2.id] = reg_host_req_2.config

        rc = self.chairman_client.register_host(reg_host_req_2)
        self.assertEqual(rc.result, RegisterHostResultCode.OK)

        # verify that all the agents received configurations
        host_handler.received_all.wait(30)
        assert_that(len(host_handler.configs), is_(len(h_id_config)))

        client = self._get_nonchroot_client()
        client.start()

        # Verify that the host has registered with chairman
        host_registered = wait_for(EventType.CREATED, HOSTS_PREFIX,
                                   h_id_config.keys()[0], client)
        self.assertTrue(host_registered)

        host_registered = wait_for(EventType.CREATED, HOSTS_PREFIX,
                                   h_id_config.keys()[1], client)
        self.assertTrue(host_registered)

        # validate /hosts
        read_hosts = client.get_children(HOSTS_PREFIX)
        self.assertEqual(len(read_hosts), len(h_id_config))

        for h in read_hosts:
            path = HOSTS_PREFIX + "/" + h
            host_config = extract_node_data(client, path, HostConfig)
            self.assertTrue(host_config in h_id_config.values())

        # validate only one leaf scheduler for same
        # availability_zone/datastore agents
        roles_registered = wait_for(EventType.CREATED, ROLES_PREFIX, "",
                                    client, "get_children")
        roles_hosts = client.get_children(ROLES_PREFIX)
        self.assertTrue(roles_registered)
        self.assertEqual(len(roles_hosts), 1)

        # preserve the host id that owns the leaf scheduler
        leaf_scheduler_host_id = None

        # validate leaf scheduler has 2 hosts
        r_id = roles_hosts[0]
        leaf_scheduler_host_id = r_id

        path = ROLES_PREFIX + "/" + r_id
        roles = extract_node_data(client, path, Roles)
        scheduler_role = roles.schedulers[0]
        self.assertEqual(len(scheduler_role.hosts), 2)
        self.assertTrue(h_id_config.keys()[0] in scheduler_role.hosts)
        self.assertTrue(h_id_config.keys()[1] in scheduler_role.hosts)

        # normalize from unicode to str
        leaf_scheduler_host_id = \
            normalize('NFKD', leaf_scheduler_host_id).encode('ascii', 'ignore')

        # validate report missing
        del h_id_config[leaf_scheduler_host_id]
        missing_host_id = h_id_config.keys()[0]
        missing_host_list = h_id_config.keys()

        scheduler_id = host_handler.configs[missing_host_id].scheduler
        missing_req = ReportMissingRequest(scheduler_id, None,
                                           missing_host_list)
        m_hosts = missing_req.hosts
        rc = self.chairman_client.report_missing(missing_req)
        self.assertEqual(rc.result, ReportMissingResultCode.OK)

        # validate /missing
        host_missing = wait_for(EventType.CREATED, MISSING_PREFIX,
                                missing_host_id, client)
        self.assertTrue(host_missing)
        missing_hosts = client.get_children(MISSING_PREFIX)
        self.assertEqual(len(missing_hosts), len(m_hosts))

        for host in missing_hosts:
            self.assertTrue(host in m_hosts)

        # expecting role changed
        role_changed_event = async_wait_for(EventType.CHANGED, ROLES_PREFIX,
                                            leaf_scheduler_host_id, client)

        # unregister missing host
        unreg_req = UnregisterHostRequest(missing_host_id)
        rc = self.chairman_client.unregister_host(unreg_req)
        self.assertEqual(rc.result, UnregisterHostResultCode.OK)

        role_changed_event.wait(20)
        self.assertTrue(role_changed_event.isSet())

        # Validate /missing after unregister host
        missing_hosts = client.get_children(MISSING_PREFIX)
        self.assertEqual(len(missing_hosts), 0)

        # validate leaf scheduler's host number after unregistered a host
        roles_hosts = client.get_children(ROLES_PREFIX)

        r_id = roles_hosts[0]
        path = ROLES_PREFIX + "/" + r_id
        roles = extract_node_data(client, path, Roles)
        scheduler_role = roles.schedulers[0]
        self.assertEqual(len(scheduler_role.hosts), 1)
        self.assertTrue(r_id in scheduler_role.hosts)
        self.assertTrue(missing_host_id not in scheduler_role.hosts)

        # expecting role being deleted
        role_changed_event = async_wait_for(EventType.DELETED, ROLES_PREFIX,
                                            leaf_scheduler_host_id, client)

        # unregister host that owns leaf scheduler
        unreg_req = UnregisterHostRequest(leaf_scheduler_host_id)
        rc = self.chairman_client.unregister_host(unreg_req)
        self.assertEqual(rc.result, UnregisterHostResultCode.OK)

        role_changed_event.wait(20)
        self.assertTrue(role_changed_event.isSet())

        # Validate the state persisted in zk.
        read_hosts = client.get_children(HOSTS_PREFIX)
        self.assertEqual(len(read_hosts), 0)

        client.stop()
class TestTreeIntrospection(BaseKazooTestCase):
    def setUp(self):
        self.set_up_kazoo_base()
        self.zk_client = self._get_nonchroot_client()
        self.zk_client.start()

        self.runtime = RuntimeUtils(self.id())

        # Create zk paths
        self.zk_client.create(MISSING_PREFIX)
        self.zk_client.create(HOSTS_PREFIX)
        self.zk_client.create(ROLES_PREFIX)

        self.root_conf = {}
        self.root_conf["healthcheck"] = {}
        self.root_conf["zookeeper"] = {}
        self.root_conf["zookeeper"]["quorum"] = "localhost:%i" % (DEFAULT_ZK_PORT,)
        self.root_conf["healthcheck"]["timeout_ms"] = ROOT_SCHEDULER_TIME_OUT
        self.root_conf["healthcheck"]["period_ms"] = ROOT_SCHEDULER_PERIOD

        # start root scheduler
        self.root_host = "localhost"
        self.root_port = 15000
        self.root_conf["bind"] = self.root_host
        self.root_conf["port"] = self.root_port
        self.runtime.start_root_scheduler(self.root_conf)

        (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host)

        # start chairman
        self.chairman_host = "localhost"
        self.chairman_port = 13000
        self.leaf_fanout = 2
        self.runtime.start_chairman(self.chairman_host, self.chairman_port, self.leaf_fanout)
        (self.chairman_transport, self.chairman_client) = create_chairman_client(self.chairman_host, self.chairman_port)
        # Wait for chairman and root scheduler to finish their elections
        _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK)
        _wait_on_code(self.chairman_client.get_schedulers, GetSchedulersResultCode.OK, GetSchedulersRequest)

    def tearDown(self):
        self.runtime.cleanup()
        self.zk_client.stop()
        self.zk_client.close()
        self.tear_down_kazoo_base()

    def test_get_service_leader(self):
        """Test get service leader"""
        # Check the chairman leader
        (address, port) = get_service_leader(self.zk_client, CHAIRMAN_SERVICE)
        assert_that(address, is_(self.chairman_host))
        assert_that(port, is_(self.chairman_port))

        deleted = threading.Event()

        def _deleted(children):
            if not children:
                deleted.set()

        self.zk_client.ChildrenWatch(CHAIRMAN_SERVICE, _deleted)
        # Stop chairman
        stop_service(self.runtime.chairman_procs[0])
        # Wait for the leader to leave
        deleted.wait(30)
        res = get_service_leader(self.zk_client, CHAIRMAN_SERVICE)
        assert_that(res, is_(None))

    def test_get_root_scheduler(self):
        """Test root scheduler introspection"""
        (root_host, root_port) = get_service_leader(self.zk_client, ROOT_SCHEDULER_SERVICE)
        # Verify that an empty root scheduler is constructed
        # correctly
        root_sch = get_root_scheduler(root_host, root_port)

        assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE))
        assert_that(len(root_sch.children), is_(0))
        assert_that(root_sch.owner, not_none())
        root_owner = root_sch.owner
        assert_that(root_owner.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_owner.address, is_(root_host))
        assert_that(root_owner.port, is_(root_port))
        assert_that(root_owner.parent, is_(None))

        # Start an agent
        agent_host = "localhost"
        agent_port = 20000
        config = self.runtime.get_agent_config(agent_host, agent_port, self.chairman_host, self.chairman_port)
        res = self.runtime.start_agent(config)
        agent_client = res[1]

        # Wait for the root scheduler to be configured
        _wait_for_configuration(self.root_sch_client, 1)

        new_root_sch = get_root_scheduler(root_host, root_port)
        assert_that(len(new_root_sch.children), is_(1))

        req = THost.GetConfigRequest()
        agent_id = agent_client.get_host_config(req).hostConfig.agent_id

        leaf = new_root_sch.children.values()[0]
        assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE))
        assert_that(leaf.parent, is_(new_root_sch))
        assert_that(len(leaf.children), is_(0))
        assert_that(leaf.owner.id, is_(agent_id))
        assert_that(leaf.owner.address, is_(agent_host))
        assert_that(leaf.owner.port, is_(agent_port))
        assert_that(leaf.owner.parent, is_(leaf))

        deleted = threading.Event()

        def _deleted(children):
            if not children:
                deleted.set()

        self.zk_client.ChildrenWatch(ROOT_SCHEDULER_SERVICE, _deleted)
        stop_service(self.runtime.root_procs[0])
        # Wait for the leader to leave
        deleted.wait(30)

        emoty_root = get_root_scheduler(root_host, root_port)
        assert_that(emoty_root, is_(emoty_root))

    def test_get_leaf_scheduler(self):
        """Test agent introspection"""

        agent_host = "localhost"
        agent_port = 20000

        # Agent not online
        leaf = get_leaf_scheduler(agent_host, agent_port)
        assert_that(leaf, is_(None))

        # Start an agent with an invalid chairman, so that it doesn't
        # get configured, because we want to configure it manually
        config = self.runtime.get_agent_config(agent_host, agent_port, "localhost", 24234)
        res = self.runtime.start_agent(config)
        agent_client = res[1]

        # Agent is online but not a leaf scheduler
        leaf = get_leaf_scheduler(agent_host, agent_port)
        assert_that(leaf, is_(None))

        leafId1 = stable_uuid("leaf scheduler")
        config_req = THost.GetConfigRequest()
        host_config = agent_client.get_host_config(config_req).hostConfig

        leaf_scheduler = SchedulerRole(leafId1)
        leaf_scheduler.parent_id = stable_uuid("parent scheduler")
        leaf_scheduler.hosts = [host_config.agent_id]
        leaf_scheduler.host_children = [ChildInfo(id=host_config.agent_id, address=agent_host, port=agent_port)]
        config_request = ConfigureRequest(leafId1, Roles([leaf_scheduler]))

        resp = agent_client.configure(config_request)
        assert_that(resp.result, is_(ConfigureResultCode.OK))

        leaf = get_leaf_scheduler(agent_host, agent_port)

        assert_that(leaf.id, not_none())
        assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE))
        assert_that(len(leaf.children), is_(1))
        # Verify the owner host
        owner_host = leaf.owner
        assert_that(owner_host, not_none())
        assert_that(owner_host.id, is_(host_config.agent_id))
        assert_that(owner_host.address, is_(agent_host))
        assert_that(owner_host.port, is_(agent_port))
        assert_that(owner_host.parent, is_(leaf))

    def _check_tree(self, root_sch, root_address, root_port, _fanout, agents_list):
        """
        This method checks if a hierarchy is correctly constructed, assuming
        the agents were sequently added to the hierarchy. The check will fail
        on a condition by failing an assertion.
        root_address: a string, root scheduler's address
        root_port: an int, root scheduler's port
        fanout: an integer that specifies the max fanout
        agent_list: a list of tubles (id, address, port), where every tuple
                    represents an agent
        """

        # This method will split a list into multiple lists, where the
        # inner lists represent leaf schedulers i.e.
        # [[leaf1_owner, leaf1_child2 ... ],[leaf2_owner, leaf2_child2 ... ]]
        # leafX_owner is a tuple of (id, address, port)
        def split_list_by_fanout(_list, fanout):
            for i in xrange(0, len(_list), fanout):
                yield _list[i : i + fanout]

        leaves = list(split_list_by_fanout(agents_list, _fanout))

        # check root
        assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE))
        assert_that(root_sch.owner, not_none())
        assert_that(root_sch.owner.address, is_(root_address))
        assert_that(root_sch.owner.port, is_(root_port))
        assert_that(len(root_sch.children), is_(len(leaves)))

        # Map scheduler hosts, the map will look like this:
        # {leaf_owner_host_id:[(leaf_owner_host_id, address, port)]...}\
        sch_hosts = {}
        for leaf in leaves:
            sch_hosts[leaf[0][0]] = leaf

        for child in root_sch.children.values():
            leaf_owner_id = child.owner.id
            assert_that(leaf_owner_id, is_(sch_hosts[leaf_owner_id][0][0]))
            assert_that(child.parent.owner.id, is_(ROOT_SCHEDULER_ID))
            assert_that(child.owner.address, is_(sch_hosts[leaf_owner_id][0][1]))
            assert_that(child.owner.port, is_(sch_hosts[leaf_owner_id][0][2]))
            assert_that(child.owner.parent, is_(child))
            assert_that(child.type, is_(LEAF_SCHEDULER_TYPE))

            # Veirfy the leaf's child hosts
            children = sch_hosts[leaf_owner_id]

            # map child hosts
            children_map = {}
            for c in children:
                children_map[c[0]] = c

            for child_host in child.children.values():
                assert_that(children_map.get(child_host.id, None), not_none())
                assert_that(children_map[child_host.id][0], is_(child_host.id))
                assert_that(children_map[child_host.id][1], is_(child_host.address))
                assert_that(children_map[child_host.id][2], is_(child_host.port))
                assert_that(child_host.parent, is_(child))

    def wait_for_registration(self, agent_id, timeout=10):
        """Waits for _id to be created in /hosts"""
        completed = threading.Event()

        def wait_created(data, stat, event):
            """Set the event once the node exists."""
            if stat:
                completed.set()

        self.zk_client.DataWatch(ROLES_PREFIX + "/" + agent_id, wait_created)
        completed.wait(timeout)
        assert_that(completed.isSet(), is_(True))

    def _start_agents(self, agent_host, agent_ports):
        """Start agents on different ports.

        When agents register sequentially, the order of events
        of onHostAdded is not guaranteed. For example, if agent
        A, B then C register, they wont be inserted into the hierarchy
        in that order,  a possible order can be B, A then C. Thus,
        we wait on hosts that we think will own a leaf scheduler before
        registering more agents that will go under the same leaf.
        """
        agent_ids = []
        for ind in xrange(len(agent_ports)):
            config = self.runtime.get_agent_config(agent_host, agent_ports[ind], self.chairman_host, self.chairman_port)
            res = self.runtime.start_agent(config)
            agent_client = res[1]
            config_req = THost.GetConfigRequest()
            config = agent_client.get_host_config(config_req).hostConfig
            agent_id = config.agent_id
            if ind % self.leaf_fanout == 0:
                self.wait_for_registration(agent_id)
            agent_ids.append(agent_id)
        return agent_ids

    def test_get_hierarchy_from_zk(self):
        agent_host = "localhost"
        agent_port1 = 20000
        agent_port2 = 20001
        agent_port3 = 20002

        agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3])

        # The chairman will persist the schedulers then push the
        # configurations, thus after we detect that the root scheduler
        # has been configured we know that the leaf schedulers have already
        # been persisted to zk
        _wait_for_configuration(self.root_sch_client, 2)

        root = get_hierarchy_from_zk(self.zk_client)

        agent_list = [
            (agent_ids[0], agent_host, agent_port1),
            (agent_ids[1], agent_host, agent_port2),
            (agent_ids[2], agent_host, agent_port3),
        ]
        # verify the hierarchy structure
        self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list)

    def test_get_hierarchy_from_chairman(self):
        agent_host = "localhost"
        agent_port1 = 20000
        agent_port2 = 20001
        agent_port3 = 20002

        agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3])
        _wait_for_configuration(self.root_sch_client, 2)

        root = get_hierarchy_from_chairman(self.chairman_host, self.chairman_port, self.root_host, self.root_port)
        agent_list = [
            (agent_ids[0], agent_host, agent_port1),
            (agent_ids[1], agent_host, agent_port2),
            (agent_ids[2], agent_host, agent_port3),
        ]
        # verify the hierarchy structure
        self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list)

    def test_update_status(self):
        agent_host = "localhost"
        agent_port1 = 20000
        config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port)
        res = self.runtime.start_agent(config)
        _wait_for_configuration(self.root_sch_client, 1)

        root = get_hierarchy_from_zk(self.zk_client)
        # Update the hierarchy status
        root.update_status()

        # verify that the root scheduler and leaf are online
        assert_that(root.owner.status, is_(STATUS_ONLINE))
        assert_that(len(root.children), is_(1))
        assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE))
        # Kill both root scheduler and leaf host
        stop_service(self.runtime.root_procs[0])
        self.runtime.stop_agent(res[0])
        # Update the hierarchy status
        root.update_status()
        assert_that(root.owner.status, is_(STATUS_OFFLINE))
        assert_that(root.children.values()[0].owner.status, is_(STATUS_OFFLINE))

        # Start the root scheduler and leaf scheduler
        self.runtime.start_root_scheduler(self.root_conf)
        config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port)
        res = self.runtime.start_agent(config)
        (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host)
        # Wait for the root scheduler's leader election
        _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK)

        # Check the status again
        root.update_status()
        # verify that the root scheduler and leaf are online
        assert_that(root.owner.status, is_(STATUS_ONLINE))
        assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE))

    def test_get_hosts_from_zk(self):
        hosts = get_hosts_from_zk(self.zk_client)
        assert_that(len(hosts), is_(0))

        networks = [Network("nw1", [NetworkType.VM])]
        dsid = str(uuid.uuid4())
        datastores = [Datastore(dsid, "ds1", DatastoreType.SHARED_VMFS)]

        # Register two hosts
        agent_host = "localhost"
        agent1_port = 12345
        req1 = get_register_host_request(
            agent_host,
            agent1_port,
            agent_id="host1",
            networks=networks,
            datastores=datastores,
            image_datastore=dsid,
            availability_zone="az1",
        )
        agent2_port = 12346
        req2 = get_register_host_request(
            agent_host,
            agent2_port,
            agent_id="host2",
            networks=networks,
            datastores=datastores,
            image_datastore=dsid,
            availability_zone="az1",
        )
        # Register two hosts
        resp = self.chairman_client.register_host(req1)
        assert_that(resp.result, is_(RegisterHostResultCode.OK))
        resp = self.chairman_client.register_host(req2)
        assert_that(resp.result, is_(RegisterHostResultCode.OK))

        hosts = get_hosts_from_zk(self.zk_client)
        # map list to dict indexed by host id
        hosts = dict((h.id, h) for h in hosts)
        assert_that(len(hosts), is_(2))
        _h1 = hosts[req1.config.agent_id]
        _h2 = hosts[req2.config.agent_id]
        # Verify that the requests match the hosts that were
        # constructed by get_hosts_from_zk
        assert_that(req1.config.agent_id, _h1.id)
        assert_that(req2.config.agent_id, _h2.id)
        assert_that(req1.config.address.host, _h1.address)
        assert_that(req2.config.address.port, _h2.port)

    def test_get_missing_hosts_from_zk(self):
        missing = get_missing_hosts_from_zk(self.zk_client)
        assert_that(len(missing), is_(0))
        missing_hosts = ["h2", "h3"]
        req = ReportMissingRequest("host1", None, missing_hosts)
        resp = self.chairman_client.report_missing(req)
        assert_that(resp.result, is_(ReportMissingResultCode.OK))

        missing = get_missing_hosts_from_zk(self.zk_client)
        assert_that(missing[0] in missing_hosts, is_(True))
        assert_that(missing[0] in missing_hosts, is_(True))
示例#5
0
class TestTreeIntrospection(BaseKazooTestCase):
    def setUp(self):
        self.set_up_kazoo_base()
        self.zk_client = self._get_nonchroot_client()
        self.zk_client.start()

        self.runtime = RuntimeUtils(self.id())

        # Create zk paths
        self.zk_client.create(MISSING_PREFIX)
        self.zk_client.create(HOSTS_PREFIX)
        self.zk_client.create(ROLES_PREFIX)

        self.root_conf = {}
        self.root_conf['healthcheck'] = {}
        self.root_conf['zookeeper'] = {}
        self.root_conf['zookeeper']['quorum'] = ("localhost:%i" %
                                                 (DEFAULT_ZK_PORT, ))
        self.root_conf['healthcheck']['timeout_ms'] = ROOT_SCHEDULER_TIME_OUT
        self.root_conf['healthcheck']['period_ms'] = ROOT_SCHEDULER_PERIOD

        # start root scheduler
        self.root_host = "localhost"
        self.root_port = 15000
        self.root_conf['bind'] = self.root_host
        self.root_conf['port'] = self.root_port
        self.runtime.start_root_scheduler(self.root_conf)

        (self.root_transport,
         self.root_sch_client) = create_root_client(self.root_port,
                                                    self.root_host)

        # start chairman
        self.chairman_host = 'localhost'
        self.chairman_port = 13000
        self.leaf_fanout = 2
        self.runtime.start_chairman(self.chairman_host, self.chairman_port,
                                    self.leaf_fanout)
        (self.chairman_transport, self.chairman_client) = \
            create_chairman_client(self.chairman_host, self.chairman_port)
        # Wait for chairman and root scheduler to finish their elections
        _wait_on_code(self.root_sch_client.get_schedulers,
                      GetSchedulersResultCode.OK)
        _wait_on_code(self.chairman_client.get_schedulers,
                      GetSchedulersResultCode.OK, GetSchedulersRequest)

    def tearDown(self):
        self.runtime.cleanup()
        self.zk_client.stop()
        self.zk_client.close()
        self.tear_down_kazoo_base()

    def test_get_service_leader(self):
        """Test get service leader"""
        # Check the chairman leader
        (address, port) = get_service_leader(self.zk_client, CHAIRMAN_SERVICE)
        assert_that(address, is_(self.chairman_host))
        assert_that(port, is_(self.chairman_port))

        deleted = threading.Event()

        def _deleted(children):
            if not children:
                deleted.set()

        self.zk_client.ChildrenWatch(CHAIRMAN_SERVICE, _deleted)
        # Stop chairman
        stop_service(self.runtime.chairman_procs[0])
        # Wait for the leader to leave
        deleted.wait(30)
        res = get_service_leader(self.zk_client, CHAIRMAN_SERVICE)
        assert_that(res, is_(None))

    def test_get_root_scheduler(self):
        """Test root scheduler introspection"""
        (root_host, root_port) = get_service_leader(self.zk_client,
                                                    ROOT_SCHEDULER_SERVICE)
        # Verify that an empty root scheduler is constructed
        # correctly
        root_sch = get_root_scheduler(root_host, root_port)

        assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE))
        assert_that(len(root_sch.children), is_(0))
        assert_that(root_sch.owner, not_none())
        root_owner = root_sch.owner
        assert_that(root_owner.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_owner.address, is_(root_host))
        assert_that(root_owner.port, is_(root_port))
        assert_that(root_owner.parent, is_(None))

        # Start an agent
        agent_host = 'localhost'
        agent_port = 20000
        config = self.runtime.get_agent_config(agent_host, agent_port,
                                               self.chairman_host,
                                               self.chairman_port)
        res = self.runtime.start_agent(config)
        agent_client = res[1]

        # Wait for the root scheduler to be configured
        _wait_for_configuration(self.root_sch_client, 1)

        new_root_sch = get_root_scheduler(root_host, root_port)
        assert_that(len(new_root_sch.children), is_(1))

        req = THost.GetConfigRequest()
        agent_id = agent_client.get_host_config(req).hostConfig.agent_id

        leaf = new_root_sch.children.values()[0]
        assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE))
        assert_that(leaf.parent, is_(new_root_sch))
        assert_that(len(leaf.children), is_(0))
        assert_that(leaf.owner.id, is_(agent_id))
        assert_that(leaf.owner.address, is_(agent_host))
        assert_that(leaf.owner.port, is_(agent_port))
        assert_that(leaf.owner.parent, is_(leaf))

        deleted = threading.Event()

        def _deleted(children):
            if not children:
                deleted.set()

        self.zk_client.ChildrenWatch(ROOT_SCHEDULER_SERVICE, _deleted)
        stop_service(self.runtime.root_procs[0])
        # Wait for the leader to leave
        deleted.wait(30)

        emoty_root = get_root_scheduler(root_host, root_port)
        assert_that(emoty_root, is_(emoty_root))

    def test_get_leaf_scheduler(self):
        """Test agent introspection"""

        agent_host = 'localhost'
        agent_port = 20000

        # Agent not online
        leaf = get_leaf_scheduler(agent_host, agent_port)
        assert_that(leaf, is_(None))

        # Start an agent with an invalid chairman, so that it doesn't
        # get configured, because we want to configure it manually
        config = self.runtime.get_agent_config(agent_host, agent_port,
                                               "localhost", 24234)
        res = self.runtime.start_agent(config)
        agent_client = res[1]

        # Agent is online but not a leaf scheduler
        leaf = get_leaf_scheduler(agent_host, agent_port)
        assert_that(leaf, is_(None))

        leafId1 = stable_uuid("leaf scheduler")
        config_req = THost.GetConfigRequest()
        host_config = agent_client.get_host_config(config_req).hostConfig

        leaf_scheduler = SchedulerRole(leafId1)
        leaf_scheduler.parent_id = stable_uuid("parent scheduler")
        leaf_scheduler.hosts = [host_config.agent_id]
        leaf_scheduler.host_children = [
            ChildInfo(id=host_config.agent_id,
                      address=agent_host,
                      port=agent_port)
        ]
        config_request = ConfigureRequest(leafId1, Roles([leaf_scheduler]))

        resp = agent_client.configure(config_request)
        assert_that(resp.result, is_(ConfigureResultCode.OK))

        leaf = get_leaf_scheduler(agent_host, agent_port)

        assert_that(leaf.id, not_none())
        assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE))
        assert_that(len(leaf.children), is_(1))
        # Verify the owner host
        owner_host = leaf.owner
        assert_that(owner_host, not_none())
        assert_that(owner_host.id, is_(host_config.agent_id))
        assert_that(owner_host.address, is_(agent_host))
        assert_that(owner_host.port, is_(agent_port))
        assert_that(owner_host.parent, is_(leaf))

    def _check_tree(self, root_sch, root_address, root_port, _fanout,
                    agents_list):
        """
        This method checks if a hierarchy is correctly constructed, assuming
        the agents were sequently added to the hierarchy. The check will fail
        on a condition by failing an assertion.
        root_address: a string, root scheduler's address
        root_port: an int, root scheduler's port
        fanout: an integer that specifies the max fanout
        agent_list: a list of tubles (id, address, port), where every tuple
                    represents an agent
        """

        # This method will split a list into multiple lists, where the
        # inner lists represent leaf schedulers i.e.
        # [[leaf1_owner, leaf1_child2 ... ],[leaf2_owner, leaf2_child2 ... ]]
        # leafX_owner is a tuple of (id, address, port)
        def split_list_by_fanout(_list, fanout):
            for i in xrange(0, len(_list), fanout):
                yield _list[i:i + fanout]

        leaves = list(split_list_by_fanout(agents_list, _fanout))

        # check root
        assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID))
        assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE))
        assert_that(root_sch.owner, not_none())
        assert_that(root_sch.owner.address, is_(root_address))
        assert_that(root_sch.owner.port, is_(root_port))
        assert_that(len(root_sch.children), is_(len(leaves)))

        # Map scheduler hosts, the map will look like this:
        # {leaf_owner_host_id:[(leaf_owner_host_id, address, port)]...}\
        sch_hosts = {}
        for leaf in leaves:
            sch_hosts[leaf[0][0]] = leaf

        for child in root_sch.children.values():
            leaf_owner_id = child.owner.id
            assert_that(leaf_owner_id, is_(sch_hosts[leaf_owner_id][0][0]))
            assert_that(child.parent.owner.id, is_(ROOT_SCHEDULER_ID))
            assert_that(child.owner.address,
                        is_(sch_hosts[leaf_owner_id][0][1]))
            assert_that(child.owner.port, is_(sch_hosts[leaf_owner_id][0][2]))
            assert_that(child.owner.parent, is_(child))
            assert_that(child.type, is_(LEAF_SCHEDULER_TYPE))

            # Veirfy the leaf's child hosts
            children = sch_hosts[leaf_owner_id]

            # map child hosts
            children_map = {}
            for c in children:
                children_map[c[0]] = c

            for child_host in child.children.values():
                assert_that(children_map.get(child_host.id, None), not_none())
                assert_that(children_map[child_host.id][0], is_(child_host.id))
                assert_that(children_map[child_host.id][1],
                            is_(child_host.address))
                assert_that(children_map[child_host.id][2],
                            is_(child_host.port))
                assert_that(child_host.parent, is_(child))

    def wait_for_registration(self, agent_id, timeout=10):
        """Waits for _id to be created in /hosts"""
        completed = threading.Event()

        def wait_created(data, stat, event):
            """Set the event once the node exists."""
            if stat:
                completed.set()

        self.zk_client.DataWatch(ROLES_PREFIX + "/" + agent_id, wait_created)
        completed.wait(timeout)
        assert_that(completed.isSet(), is_(True))

    def _start_agents(self, agent_host, agent_ports):
        """Start agents on different ports.

        When agents register sequentially, the order of events
        of onHostAdded is not guaranteed. For example, if agent
        A, B then C register, they wont be inserted into the hierarchy
        in that order,  a possible order can be B, A then C. Thus,
        we wait on hosts that we think will own a leaf scheduler before
        registering more agents that will go under the same leaf.
        """
        agent_ids = []
        for ind in xrange(len(agent_ports)):
            config = self.runtime.get_agent_config(agent_host,
                                                   agent_ports[ind],
                                                   self.chairman_host,
                                                   self.chairman_port)
            res = self.runtime.start_agent(config)
            agent_client = res[1]
            config_req = THost.GetConfigRequest()
            config = agent_client.get_host_config(config_req).hostConfig
            agent_id = config.agent_id
            if ind % self.leaf_fanout == 0:
                self.wait_for_registration(agent_id)
            agent_ids.append(agent_id)
        return agent_ids

    def test_get_hierarchy_from_zk(self):
        agent_host = 'localhost'
        agent_port1 = 20000
        agent_port2 = 20001
        agent_port3 = 20002

        agent_ids = self._start_agents(agent_host,
                                       [agent_port1, agent_port2, agent_port3])

        # The chairman will persist the schedulers then push the
        # configurations, thus after we detect that the root scheduler
        # has been configured we know that the leaf schedulers have already
        # been persisted to zk
        _wait_for_configuration(self.root_sch_client, 2)

        root = get_hierarchy_from_zk(self.zk_client)

        agent_list = [(agent_ids[0], agent_host, agent_port1),
                      (agent_ids[1], agent_host, agent_port2),
                      (agent_ids[2], agent_host, agent_port3)]
        # verify the hierarchy structure
        self._check_tree(root, self.root_host, self.root_port,
                         self.leaf_fanout, agent_list)

    def test_get_hierarchy_from_chairman(self):
        agent_host = 'localhost'
        agent_port1 = 20000
        agent_port2 = 20001
        agent_port3 = 20002

        agent_ids = self._start_agents(agent_host,
                                       [agent_port1, agent_port2, agent_port3])
        _wait_for_configuration(self.root_sch_client, 2)

        root = get_hierarchy_from_chairman(self.chairman_host,
                                           self.chairman_port, self.root_host,
                                           self.root_port)
        agent_list = [(agent_ids[0], agent_host, agent_port1),
                      (agent_ids[1], agent_host, agent_port2),
                      (agent_ids[2], agent_host, agent_port3)]
        # verify the hierarchy structure
        self._check_tree(root, self.root_host, self.root_port,
                         self.leaf_fanout, agent_list)

    def test_update_status(self):
        agent_host = 'localhost'
        agent_port1 = 20000
        config = self.runtime.get_agent_config(agent_host, agent_port1,
                                               self.chairman_host,
                                               self.chairman_port)
        res = self.runtime.start_agent(config)
        _wait_for_configuration(self.root_sch_client, 1)

        root = get_hierarchy_from_zk(self.zk_client)
        # Update the hierarchy status
        root.update_status()

        # verify that the root scheduler and leaf are online
        assert_that(root.owner.status, is_(STATUS_ONLINE))
        assert_that(len(root.children), is_(1))
        assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE))
        # Kill both root scheduler and leaf host
        stop_service(self.runtime.root_procs[0])
        self.runtime.stop_agent(res[0])
        # Update the hierarchy status
        root.update_status()
        assert_that(root.owner.status, is_(STATUS_OFFLINE))
        assert_that(root.children.values()[0].owner.status,
                    is_(STATUS_OFFLINE))

        # Start the root scheduler and leaf scheduler
        self.runtime.start_root_scheduler(self.root_conf)
        config = self.runtime.get_agent_config(agent_host, agent_port1,
                                               self.chairman_host,
                                               self.chairman_port)
        res = self.runtime.start_agent(config)
        (self.root_transport,
         self.root_sch_client) = create_root_client(self.root_port,
                                                    self.root_host)
        # Wait for the root scheduler's leader election
        _wait_on_code(self.root_sch_client.get_schedulers,
                      GetSchedulersResultCode.OK)

        # Check the status again
        root.update_status()
        # verify that the root scheduler and leaf are online
        assert_that(root.owner.status, is_(STATUS_ONLINE))
        assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE))

    def test_get_hosts_from_zk(self):
        hosts = get_hosts_from_zk(self.zk_client)
        assert_that(len(hosts), is_(0))

        networks = [Network("nw1", [NetworkType.VM])]
        dsid = str(uuid.uuid4())
        datastores = [Datastore(dsid, "ds1", DatastoreType.SHARED_VMFS)]

        # Register two hosts
        agent_host = "localhost"
        agent1_port = 12345
        req1 = get_register_host_request(agent_host,
                                         agent1_port,
                                         agent_id="host1",
                                         networks=networks,
                                         datastores=datastores,
                                         image_datastore=dsid,
                                         availability_zone="az1")
        agent2_port = 12346
        req2 = get_register_host_request(agent_host,
                                         agent2_port,
                                         agent_id="host2",
                                         networks=networks,
                                         datastores=datastores,
                                         image_datastore=dsid,
                                         availability_zone="az1")
        # Register two hosts
        resp = self.chairman_client.register_host(req1)
        assert_that(resp.result, is_(RegisterHostResultCode.OK))
        resp = self.chairman_client.register_host(req2)
        assert_that(resp.result, is_(RegisterHostResultCode.OK))

        hosts = get_hosts_from_zk(self.zk_client)
        # map list to dict indexed by host id
        hosts = dict((h.id, h) for h in hosts)
        assert_that(len(hosts), is_(2))
        _h1 = hosts[req1.config.agent_id]
        _h2 = hosts[req2.config.agent_id]
        # Verify that the requests match the hosts that were
        # constructed by get_hosts_from_zk
        assert_that(req1.config.agent_id, _h1.id)
        assert_that(req2.config.agent_id, _h2.id)
        assert_that(req1.config.address.host, _h1.address)
        assert_that(req2.config.address.port, _h2.port)

    def test_get_missing_hosts_from_zk(self):
        missing = get_missing_hosts_from_zk(self.zk_client)
        assert_that(len(missing), is_(0))
        missing_hosts = ["h2", "h3"]
        req = ReportMissingRequest("host1", None, missing_hosts)
        resp = self.chairman_client.report_missing(req)
        assert_that(resp.result, is_(ReportMissingResultCode.OK))

        missing = get_missing_hosts_from_zk(self.zk_client)
        assert_that(missing[0] in missing_hosts, is_(True))
        assert_that(missing[0] in missing_hosts, is_(True))