示例#1
0
    def test_node_exclusive_bug(self):
        """test_node_exclusive_bug

        If two processes with the same node exclusive attribute where scheduled
        in the same matchmaking cycle, they could be scheduled to the same
        resource, due to a caching issue. This test tests the fix.
        """
        self.mm.initialize()

        n1 = NodeRecord.new("n1", "d1")
        self.store.add_node(n1)

        props = {"engine": "engine1"}
        r1 = ResourceRecord.new("r1", "n1", 2, properties=props)
        self.store.add_resource(r1)

        n2 = NodeRecord.new("n2", "d1")
        self.store.add_node(n2)

        props = {"engine": "engine1"}
        r2 = ResourceRecord.new("r2", "n2", 2, properties=props)
        self.store.add_resource(r2)

        xattr_1 = "port5000"
        constraints = {}
        p1 = ProcessRecord.new(None, "p1", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_1)
        p1key = p1.get_key()
        self.store.add_process(p1)
        self.store.enqueue_process(*p1key)

        p2 = ProcessRecord.new(None, "p2", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_1)
        p2key = p2.get_key()
        self.store.add_process(p2)
        self.store.enqueue_process(*p2key)

        # sneak into MM and force it to update this info from the store
        self.mm._get_queued_processes()
        self.mm._get_resource_set()

        self.mm.matchmake()

        # Ensure these processes are pending and scheduled to different nodes

        p1 = self.store.get_process(None, "p1")
        p2 = self.store.get_process(None, "p2")
        self.assertNotEqual(p1.assigned, p2.assigned)
示例#2
0
    def test_node_filo(self):
        """test_node_filo

        We prioritize shutting down the newest VMs as a workaround for OOI
        Testing strategy
        """
        self.mm.initialize()

        n1 = NodeRecord.new("n1", "d1")
        self.store.add_node(n1)

        props = {"engine": "engine4"}
        r1 = ResourceRecord.new("r1", "n1", 2, properties=props)
        self.store.add_resource(r1)

        n2 = NodeRecord.new("n2", "d1")
        self.store.add_node(n2)

        props = {"engine": "engine4"}
        r2 = ResourceRecord.new("r2", "n2", 2, properties=props)
        self.store.add_resource(r2)

        constraints = {"engine": "engine4"}
        p1 = ProcessRecord.new(None, "p1", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints)
        p1key = p1.get_key()
        self.store.add_process(p1)
        self.store.enqueue_process(*p1key)

        # sneak into MM and force it to update this info from the store
        self.mm._get_queued_processes()
        self.mm._get_resource_set()

        self.mm.register_needs()
        self.epum_client.clear()

        self.mm.queued_processes = []

        self.mm.register_needs()
        conf = self.epum_client.reconfigures['pd_domain_engine4'][0]
        retired_nodes = conf['engine_conf']['retirable_nodes']
        assert len(retired_nodes) == 1

        # This should be the second node we started
        assert retired_nodes[0] == "n2"
示例#3
0
文件: core.py 项目: oldpatricka/epu
    def node_state(self, node_id, domain_id, state, properties=None):
        """
        Handle updates about available domain nodes.

        @param node_id: unique instance identifier
        @param domain_id: domain of instance
        @param state: EPU state of instance
        @param properties: Optional properties about this instance
        @return:

        This operation is the recipient of a "subscription" the PD makes to
        domain state updates. Calls to this operation are NOT RPC-style.

        This information is used for two purposes:

            1. To correlate EE agent heartbeats with a node and various deploy
               information (site, allocation, security groups, etc).

            2. To detect EEs which have been killed due to underlying death
               of a resource (VM).
        """

        if state == InstanceState.RUNNING:
            node = self.store.get_node(node_id)
            if node is None:
                node = NodeRecord.new(node_id, domain_id, properties)

                try:
                    self.store.add_node(node)
                except WriteConflictError:
                    # if the node record was written by someone else,
                    # no big deal.
                    return

                log.info("Domain %s node %s is %s", domain_id, node_id, state)

        elif state in (InstanceState.TERMINATING, InstanceState.TERMINATED):
            # reschedule processes running on node

            node = self.store.get_node(node_id)
            if node is None:
                log.warn("got state for unknown node %s in state %s",
                         node_id, state)
                return
            self.evacuate_node(node)
示例#4
0
    def test_node_exclusive(self):
        self._run_in_thread()

        n1 = NodeRecord.new("n1", "d1")
        self.store.add_node(n1)

        props = {"engine": "engine1"}
        n1_r1 = ResourceRecord.new("n1_r1", "n1", 2, properties=props)
        self.store.add_resource(n1_r1)

        n1_r2 = ResourceRecord.new("n1_r2", "n1", 2, properties=props)
        self.store.add_resource(n1_r2)

        xattr_1 = "port5000"
        constraints = {}
        p1 = ProcessRecord.new(None, "p1", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_1)
        p1key = p1.get_key()
        self.store.add_process(p1)
        self.store.enqueue_process(*p1key)

        # The first process should be assigned, since nothing else needs this
        # attr
        # TODO: it's possible that this could be assigned to n1_r2, but hopefully not
        self.wait_resource(n1_r1.resource_id, lambda r: list(p1key) in r.assigned)
        time.sleep(0.05)
        self.resource_client.check_process_launched(p1, n1_r1.resource_id)
        self.wait_process(p1.owner, p1.upid,
                          lambda p: p.assigned == n1_r1.resource_id and
                                    p.state == ProcessState.PENDING)

        p2 = ProcessRecord.new(None, "p2", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_1)
        p2key = p2.get_key()
        self.store.add_process(p2)
        self.store.enqueue_process(*p2key)

        # The second process should wait, since first process wants this attr
        # as well
        self.wait_process(p2.owner, p2.upid,
                          lambda p: p.state == ProcessState.WAITING)

        # If we start another node, we should see that second process be
        # scheduled
        n2 = NodeRecord.new("n2", "d1")
        self.store.add_node(n2)

        props = {"engine": "engine1"}
        n2_r1 = ResourceRecord.new("n2_r1", "n2", 2, properties=props)
        self.store.add_resource(n2_r1)

        props = {"engine": "engine1"}
        n2_r2 = ResourceRecord.new("n2_r2", "n2", 2, properties=props)
        self.store.add_resource(n2_r2)

        # The second process should now be assigned
        self.wait_resource(n2_r1.resource_id, lambda r: list(p2key) in r.assigned)
        time.sleep(0.05)
        self.resource_client.check_process_launched(p2, n2_r1.resource_id)
        self.wait_process(p2.owner, p2.upid,
                          lambda p: p.assigned == n2_r1.resource_id and
                                    p.state == ProcessState.PENDING)

        # Now we submit another process with a different exclusive attribute
        # It should be assigned right away
        xattr_2 = "port5001"
        constraints = {}
        p3 = ProcessRecord.new(None, "p3", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_2)
        p3key = p3.get_key()
        self.store.add_process(p3)
        self.store.enqueue_process(*p3key)

        p3_resource = None
        for resource in [n1_r1, n1_r2, n2_r1, n2_r2]:
            try:
                self.wait_resource(resource.resource_id, lambda r: list(p3key) in r.assigned,
                    timeout=0.5)
            except Exception:
                continue
            time.sleep(0.05)
            self.resource_client.check_process_launched(p3, resource.resource_id)
            self.wait_process(p3.owner, p3.upid,
                              lambda p: p.assigned == resource.resource_id and
                                        p.state == ProcessState.PENDING)
            p3_resource = resource

        self.assertIsNotNone(p3_resource)

        # Now submit a fourth process, which should be scheduled to a different
        # node from p3
        p4 = ProcessRecord.new(None, "p4", get_process_definition(),
                               ProcessState.REQUESTED, constraints=constraints,
                               node_exclusive=xattr_2)
        p4key = p4.get_key()
        self.store.add_process(p4)
        self.store.enqueue_process(*p4key)

        p4_resource = None
        for resource in [n1_r1, n1_r2, n2_r1, n2_r2]:
            try:
                self.wait_resource(resource.resource_id, lambda r: list(p4key) in r.assigned,
                    timeout=0.5)
            except Exception:
                continue
            time.sleep(0.05)
            self.resource_client.check_process_launched(p4, resource.resource_id)
            self.wait_process(p4.owner, p4.upid,
                              lambda p: p.assigned == resource.resource_id and
                                        p.state == ProcessState.PENDING)
            p4_resource = resource

        self.assertIsNotNone(p4_resource)

        self.assertNotEqual(p3_resource.node_id, p4_resource.node_id)