Exemplo n.º 1
0
    def test_static_waiting_handler(self):
        configfile = self.setup_config('static-2-nodes-multilabel.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req, zk.FULFILLED)
        node = self.zk.getNode(req.nodes[0])
        self.zk.lockNode(node)
        node.state = zk.USED
        self.zk.storeNode(node)

        req_waiting = zk.NodeRequest()
        req_waiting.state = zk.REQUESTED
        req_waiting.node_types.append('fake-label')
        self.zk.storeNodeRequest(req_waiting)
        req_waiting = self.waitForNodeRequest(req_waiting, zk.PENDING)

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label2')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req, zk.FULFILLED)

        req_waiting = self.zk.getNodeRequest(req_waiting.id)
        self.assertEqual(req_waiting.state, zk.PENDING)

        self.zk.unlockNode(node)
        self.waitForNodeDeletion(node)
        self.waitForNodeRequest(req_waiting, zk.FULFILLED)
Exemplo n.º 2
0
    def test_fail_minready_request_at_capacity(self):
        '''
        A min-ready request to a provider that is already at capacity should
        be declined.
        '''
        configfile = self.setup_config('node_min_ready_capacity.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        # Get an initial node ready
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append("fake-label")
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)

        # Now simulate a min-ready request
        min_ready_req = zk.NodeRequest()
        min_ready_req.state = zk.REQUESTED
        min_ready_req.node_types.append("fake-label")
        min_ready_req.requestor = "NodePool:min-ready"
        self.zk.storeNodeRequest(min_ready_req)
        min_ready_req = self.waitForNodeRequest(min_ready_req)
        self.assertEqual(min_ready_req.state, zk.FAILED)
        self.assertNotEqual(min_ready_req.declined_by, [])
Exemplo n.º 3
0
    def test_static_multiprovider_handler(self):
        configfile = self.setup_config('multiproviders.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        self.wait_for_config(pool)
        manager = pool.getProviderManager('openstack-provider')
        manager._client.create_image(name="fake-image")

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-static-label')
        self.zk.storeNodeRequest(req)

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 1)

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-openstack-label')
        self.zk.storeNodeRequest(req)

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 1)
Exemplo n.º 4
0
    def test_node_assignment_order(self):
        """Test that nodes are assigned in the order requested"""
        configfile = self.setup_config('node_many_labels.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')

        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        self.waitForNodes('fake-label1')
        self.waitForNodes('fake-label2')
        self.waitForNodes('fake-label3')
        self.waitForNodes('fake-label4')

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label3')
        req.node_types.append('fake-label1')
        req.node_types.append('fake-label4')
        req.node_types.append('fake-label2')
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(4, len(req.nodes))
        nodes = []
        for node_id in req.nodes:
            nodes.append(self.zk.getNode(node_id))
        self.assertEqual(nodes[0].type, 'fake-label3')
        self.assertEqual(nodes[1].type, 'fake-label1')
        self.assertEqual(nodes[2].type, 'fake-label4')
        self.assertEqual(nodes[3].type, 'fake-label2')
Exemplo n.º 5
0
    def test_liveness_check(self):
        '''
        Test liveness check during request handling.
        '''
        configfile = self.setup_config('static-basic.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        nodes = self.waitForNodes('fake-label')
        self.assertEqual(len(nodes), 1)

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')

        with mock.patch("nodepool.nodeutils.nodescan") as nodescan_mock:
            nodescan_mock.side_effect = OSError
            self.zk.storeNodeRequest(req)
            self.waitForNodeDeletion(nodes[0])

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)

        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 1)
        self.assertNotEqual(req.nodes[0], nodes[0].id)
    def test_openshift_native(self):
        configfile = self.setup_config('openshift.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('openshift-project')
        self.zk.storeNodeRequest(req)

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)

        self.assertNotEqual(req.nodes, [])
        node = self.zk.getNode(req.nodes[0])
        self.assertEqual(node.allocated_to, req.id)
        self.assertEqual(node.state, zk.READY)
        self.assertIsNotNone(node.launcher)
        self.assertEqual(node.connection_type, 'project')
        self.assertEqual(node.connection_port.get('token'), 'fake-token')

        node.state = zk.DELETING
        self.zk.storeNode(node)

        self.waitForNodeDeletion(node)
Exemplo n.º 7
0
    def test_request_list_json(self):
        configfile = self.setup_config('node.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        self.useBuilder(configfile)
        pool.start()
        webapp = self.useWebApp(pool, port=0)
        webapp.start()
        port = webapp.server.socket.getsockname()[1]

        self.waitForImage('fake-provider', 'fake-image')
        self.waitForNodes('fake-label')
        req = zk.NodeRequest()
        req.state = zk.PENDING   # so it will be ignored
        req.node_types = ['fake-label']
        req.requestor = 'test_request_list'
        self.zk.storeNodeRequest(req)

        http_req = request.Request(
            "http://localhost:%s/request-list.json" % port)
        f = request.urlopen(http_req)
        self.assertEqual(f.info().get('Content-Type'),
                         'application/json')
        data = f.read()
        objs = json.loads(data.decode('utf8'))
        self.assertDictContainsSubset({'node_types': ['fake-label'],
                                       'requestor': 'test_request_list', },
                                      objs[0])
Exemplo n.º 8
0
    def test_provider_wont_wedge(self):
        '''
        A provider should not wedge itself when it is at (1) maximum capacity
        (# registered nodes == max-servers), (2) all of its current nodes are
        not being used, and (3) a request comes in with a label that it does
        not yet have available. Normally, situation (3) combined with (1)
        would cause the provider to pause until capacity becomes available,
        but because of (2), it never will and we would wedge the provider.
        '''
        configfile = self.setup_config('wedge_test.yaml')
        self.useBuilder(configfile)
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        # Wait for fake-label1 min-ready request to be fulfilled, which will
        # put us at maximum capacity with max-servers of 1.
        label1_nodes = self.waitForNodes('fake-label1')
        self.assertEqual(1, len(label1_nodes))

        # Now we submit a request for fake-label2, which is not yet available.
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label2')
        self.zk.storeNodeRequest(req)

        # The provider should pause here to handle the fake-label2 request.
        # But because the fake-label1 node is not being used, and will never
        # be freed because we are paused and not handling additional requests,
        # the pool worker thread should recognize that and delete the unused
        # fake-label1 node for us. It can then fulfill the fake-label2 request.
        self.waitForNodeDeletion(label1_nodes[0])
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
Exemplo n.º 9
0
    def test_static_request_handled(self):
        '''
        Test that a node is reregistered after handling a request.
        '''
        configfile = self.setup_config('static-basic.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        nodes = self.waitForNodes('fake-label')
        self.assertEqual(len(nodes), 1)

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 1)
        self.assertEqual(req.nodes[0], nodes[0].id)

        # Mark node as used
        nodes[0].state = zk.USED
        self.zk.storeNode(nodes[0])

        # Our single node should have been used, deleted, then reregistered
        new_nodes = self.waitForNodes('fake-label')
        self.assertEqual(len(new_nodes), 1)
        self.assertEqual(nodes[0].hostname, new_nodes[0].hostname)
Exemplo n.º 10
0
 def _create_node_request(self):
     req = zk.NodeRequest()
     req.state = zk.REQUESTED
     req.node_types.append('label1')
     self.zk.storeNodeRequest(req)
     self.assertIsNotNone(
         self.zk.client.exists(self.zk._requestPath(req.id)))
     return req
Exemplo n.º 11
0
 def test_getNodeRequest(self):
     r = zk.NodeRequest("500-123")
     r.state = zk.REQUESTED
     path = self.zk._requestPath(r.id)
     self.zk.client.create(path, value=r.serialize(),
                           makepath=True, ephemeral=True)
     o = self.zk.getNodeRequest(r.id)
     self.assertIsInstance(o, zk.NodeRequest)
     self.assertEqual(r.id, o.id)
Exemplo n.º 12
0
 def createRequest(label_name):
     req = zk.NodeRequest()
     req.state = zk.REQUESTED
     req.requestor = "NodePool:min-ready"
     req.node_types.append(label_name)
     req.reuse = False  # force new node launches
     self.zk.storeNodeRequest(req, priority="100")
     if label_name not in self._submittedRequests:
         self._submittedRequests[label_name] = []
     self._submittedRequests[label_name].append(req)
Exemplo n.º 13
0
    def test_paused_gets_declined(self):
        """Test that a paused request, that later gets declined, unpauses."""

        # First config has max-servers set to 2
        configfile = self.setup_config('pause_declined_1.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        # Create a request that uses all capacity (2 servers)
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 2)

        # Now that we have 2 nodes in use, create another request that
        # requests two nodes, which should cause the request to pause.
        req2 = zk.NodeRequest()
        req2.state = zk.REQUESTED
        req2.node_types.append('fake-label')
        req2.node_types.append('fake-label')
        self.zk.storeNodeRequest(req2)
        req2 = self.waitForNodeRequest(req2, (zk.PENDING, ))

        # Second config decreases max-servers to 1
        self.replace_config(configfile, 'pause_declined_2.yaml')

        # Because the second request asked for 2 nodes, but that now exceeds
        # max-servers, req2 should get declined now, and transition to FAILED
        req2 = self.waitForNodeRequest(req2, (zk.FAILED, ))
        self.assertNotEqual(req2.declined_by, [])
Exemplo n.º 14
0
    def test_static_multinode_handler(self):
        configfile = self.setup_config('static.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        req.node_types.append('fake-concurrent-label')
        self.zk.storeNodeRequest(req)

        self.log.debug("Waiting for request %s", req.id)
        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(len(req.nodes), 2)
Exemplo n.º 15
0
 def test_NodeRequest_toDict(self):
     o = zk.NodeRequest("500-123")
     o.declined_by.append("abc")
     o.node_types.append('trusty')
     o.nodes.append('100')
     o.reuse = False
     o.requestor = 'zuul'
     d = o.toDict()
     self.assertNotIn('id', d)
     self.assertIn('state', d)
     self.assertIn('state_time', d)
     self.assertEqual(d['declined_by'], o.declined_by)
     self.assertEqual(d['node_types'], o.node_types)
     self.assertEqual(d['nodes'], o.nodes)
     self.assertEqual(d['reuse'], o.reuse)
     self.assertEqual(d['requestor'], o.requestor)
Exemplo n.º 16
0
    def test_invalid_image_fails(self):
        '''
        Test that an invalid image declines and fails the request.
        '''
        configfile = self.setup_config('node.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append("zorky-zumba")
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FAILED)
        self.assertNotEqual(req.declined_by, [])
Exemplo n.º 17
0
    def test_request_list(self):
        configfile = self.setup_config('node.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        self.useBuilder(configfile)
        pool.start()
        self.waitForImage('fake-provider', 'fake-image')
        nodes = self.waitForNodes('fake-label')
        self.assertEqual(len(nodes), 1)

        req = zk.NodeRequest()
        req.state = zk.PENDING  # so it will be ignored
        req.node_types = ['fake-label']
        req.requestor = 'test_request_list'
        self.zk.storeNodeRequest(req)

        self.assert_listed(configfile, ['request-list'], 0, req.id, 1)
Exemplo n.º 18
0
    def test_node_assignment(self):
        '''
        Successful node launch should have unlocked nodes in READY state
        and assigned to the request.
        '''
        configfile = self.setup_config('node_no_min_ready.yaml')
        self.useBuilder(configfile)
        image = self.waitForImage('fake-provider', 'fake-image')
        self.assertEqual(image.username, 'zuul')

        nodepool.launcher.LOCK_CLEANUP = 1
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FULFILLED)

        self.assertNotEqual(req.nodes, [])
        for node_id in req.nodes:
            node = self.zk.getNode(node_id)
            self.assertEqual(node.allocated_to, req.id)
            self.assertEqual(node.state, zk.READY)
            self.assertIsNotNone(node.launcher)
            self.assertEqual(node.cloud, 'fake')
            self.assertEqual(node.region, 'fake-region')
            self.assertEqual(node.az, "az1")
            self.assertEqual(node.username, "zuul")
            self.assertEqual(node.connection_type, 'ssh')
            p = "{path}/{id}".format(path=self.zk._imageUploadPath(
                image.image_name, image.build_id, image.provider_name),
                                     id=image.id)
            self.assertEqual(node.image_id, p)
            self.zk.lockNode(node, blocking=False)
            self.zk.unlockNode(node)

        # Verify the cleanup thread removed the lock
        self.assertIsNotNone(
            self.zk.client.exists(self.zk._requestLockPath(req.id)))
        self.zk.deleteNodeRequest(req)
        self.waitForNodeRequestLockDeletion(req.id)
        self.assertReportedStat('nodepool.nodes.ready', '1|g')
        self.assertReportedStat('nodepool.nodes.building', '0|g')
Exemplo n.º 19
0
    def _create_pending_request(self):
        req = zk.NodeRequest()
        req.state = zk.PENDING
        req.requestor = 'test_nodepool'
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        # Create a node that is allocated to the request, but not yet assigned
        # within the NodeRequest object
        node = zk.Node()
        node.state = zk.READY
        node.type = 'fake-label'
        node.public_ipv4 = 'fake'
        node.provider = 'fake-provider'
        node.pool = 'main'
        node.allocated_to = req.id
        self.zk.storeNode(node)

        return (req, node)
Exemplo n.º 20
0
    def test_disabled_provider(self):
        '''
        A request should fail even with a provider that is disabled by
        setting max-servers to 0. Because we look to see that all providers
        decline a request by comparing the declined_by request attribute to
        the list of registered launchers, this means that each must attempt
        to handle it at least once, and thus decline it.
        '''
        configfile = self.setup_config('disabled_provider.yaml')
        self.useBuilder(configfile)
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FAILED)
Exemplo n.º 21
0
    def test_node_launch_retries(self):
        configfile = self.setup_config('node_launch_retry.yaml')
        pool = self.useNodepool(configfile, watermark_sleep=1)
        self.useBuilder(configfile)
        pool.start()
        self.wait_for_config(pool)
        manager = pool.getProviderManager('fake-provider')
        manager.createServer_fails = 2
        self.waitForImage('fake-provider', 'fake-image')

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(req.state, zk.FAILED)

        # retries in config is set to 2, so 2 attempts to create a server
        self.assertEqual(0, manager.createServer_fails)
Exemplo n.º 22
0
    def test_fail_request_on_launch_failure(self):
        '''
        Test that provider launch error fails the request.
        '''
        configfile = self.setup_config('node_launch_retry.yaml')
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')

        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        self.wait_for_config(pool)
        manager = pool.getProviderManager('fake-provider')
        manager.createServer_fails = 2

        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label')
        self.zk.storeNodeRequest(req)

        req = self.waitForNodeRequest(req)
        self.assertEqual(0, manager.createServer_fails)
        self.assertEqual(req.state, zk.FAILED)
        self.assertNotEqual(req.declined_by, [])
Exemplo n.º 23
0
    def test_over_quota(self, config='node_quota_cloud.yaml'):
        '''
        This tests what happens when a cloud unexpectedly returns an
        over-quota error.

        '''
        # Start with an instance quota of 2
        max_cores = math.inf
        max_instances = 2
        max_ram = math.inf

        # patch the cloud with requested quota
        def fake_get_quota():
            return (max_cores, max_instances, max_ram)

        self.useFixture(
            fixtures.MockPatchObject(
                Drivers.get('fake')['provider'].fake_cloud, '_get_quota',
                fake_get_quota))

        configfile = self.setup_config(config)
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')

        nodepool.launcher.LOCK_CLEANUP = 1
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        self.wait_for_config(pool)

        client = pool.getProviderManager('fake-provider')._getClient()

        # Wait for a single node to be created
        req1 = zk.NodeRequest()
        req1.state = zk.REQUESTED
        req1.node_types.append('fake-label')
        self.log.debug("Adding first request")
        self.zk.storeNodeRequest(req1)
        req1 = self.waitForNodeRequest(req1)
        self.assertEqual(req1.state, zk.FULFILLED)

        # Lock this node so it appears as used and not deleted
        req1_node = self.zk.getNode(req1.nodes[0])
        self.zk.lockNode(req1_node, blocking=False)

        # Now, reduce the quota so the next node unexpectedly
        # (according to nodepool's quota estimate) fails.
        client.max_instances = 1

        # Request a second node; this request should fail.
        req2 = zk.NodeRequest()
        req2.state = zk.REQUESTED
        req2.node_types.append('fake-label')
        self.log.debug("Adding second request")
        self.zk.storeNodeRequest(req2)
        req2 = self.waitForNodeRequest(req2)
        self.assertEqual(req2.state, zk.FAILED)

        # After the second request failed, the internal quota estimate
        # should be reset, so the next request should pause to wait
        # for more quota to become available.
        req3 = zk.NodeRequest()
        req3.state = zk.REQUESTED
        req3.node_types.append('fake-label')
        self.log.debug("Adding third request")
        self.zk.storeNodeRequest(req3)
        req3 = self.waitForNodeRequest(req3, (zk.PENDING, ))
        self.assertEqual(req3.state, zk.PENDING)

        # Wait until there is a paused request handler and verify that
        # there is still only one server built (from the first
        # request).
        pool_worker = pool.getPoolWorkers('fake-provider')
        while not pool_worker[0].paused_handler:
            time.sleep(0.1)
        self.assertEqual(len(client._server_list), 1)
Exemplo n.º 24
0
    def _test_node_assignment_at_quota(self,
                                       config,
                                       max_cores=100,
                                       max_instances=20,
                                       max_ram=1000000):
        '''
        Successful node launch should have unlocked nodes in READY state
        and assigned to the request. This should be run with a quota that
        fits for two nodes.
        '''

        # patch the cloud with requested quota
        def fake_get_quota():
            return (max_cores, max_instances, max_ram)

        self.useFixture(
            fixtures.MockPatchObject(
                Drivers.get('fake')['provider'].fake_cloud, '_get_quota',
                fake_get_quota))

        configfile = self.setup_config(config)
        self.useBuilder(configfile)
        self.waitForImage('fake-provider', 'fake-image')

        nodepool.launcher.LOCK_CLEANUP = 1
        pool = self.useNodepool(configfile, watermark_sleep=1)
        pool.start()
        self.wait_for_config(pool)

        client = pool.getProviderManager('fake-provider')._getClient()

        req1 = zk.NodeRequest()
        req1.state = zk.REQUESTED
        req1.node_types.append('fake-label')
        req1.node_types.append('fake-label')
        self.zk.storeNodeRequest(req1)

        self.log.debug("Waiting for 1st request %s", req1.id)
        req1 = self.waitForNodeRequest(req1, (zk.FULFILLED, ))
        self.assertEqual(len(req1.nodes), 2)

        # Mark the first request's nodes as in use so they won't be deleted
        # when we pause. Locking them is enough.
        req1_node1 = self.zk.getNode(req1.nodes[0])
        req1_node2 = self.zk.getNode(req1.nodes[1])
        self.zk.lockNode(req1_node1, blocking=False)
        self.zk.lockNode(req1_node2, blocking=False)

        # One of the things we want to test is that if we spawn many
        # node launches at once, we do not deadlock while the request
        # handler pauses for quota.  To ensure we test that case,
        # pause server creation until we have accepted all of the node
        # requests we submit.  This will ensure that we hold locks on
        # all of the nodes before pausing so that we can validate they
        # are released.
        req2 = zk.NodeRequest()
        req2.state = zk.REQUESTED
        req2.node_types.append('fake-label')
        req2.node_types.append('fake-label')
        self.zk.storeNodeRequest(req2)
        req2 = self.waitForNodeRequest(req2, (zk.PENDING, ))

        # At this point, we should have already created two servers for the
        # first request, and the request handler has accepted the second node
        # request but paused waiting for the server count to go below quota.
        # Wait until there is a paused request handler and check if there
        # are exactly two servers
        pool_worker = pool.getPoolWorkers('fake-provider')
        while not pool_worker[0].paused_handler:
            time.sleep(0.1)
        self.assertEqual(len(client._server_list), 2)

        # Mark the first request's nodes as USED, which will get them deleted
        # and allow the second to proceed.
        self.log.debug("Marking first node as used %s", req1.id)
        req1_node1.state = zk.USED
        self.zk.storeNode(req1_node1)
        self.zk.unlockNode(req1_node1)
        self.waitForNodeDeletion(req1_node1)

        # To force the sequential nature of what we're testing, wait for
        # the 2nd request to get a node allocated to it now that we've
        # freed up a node.
        self.log.debug("Waiting for node allocation for 2nd request")
        done = False
        while not done:
            for n in self.zk.nodeIterator():
                if n.allocated_to == req2.id:
                    done = True
                    break

        self.log.debug("Marking second node as used %s", req1.id)
        req1_node2.state = zk.USED
        self.zk.storeNode(req1_node2)
        self.zk.unlockNode(req1_node2)
        self.waitForNodeDeletion(req1_node2)

        self.log.debug("Deleting 1st request %s", req1.id)
        self.zk.deleteNodeRequest(req1)
        self.waitForNodeRequestLockDeletion(req1.id)

        req2 = self.waitForNodeRequest(req2, (zk.FULFILLED, ))
        self.assertEqual(len(req2.nodes), 2)
Exemplo n.º 25
0
    def test_ec2_machine(self):
        aws_id = 'AK000000000000000000'
        aws_key = '0123456789abcdef0123456789abcdef0123456789abcdef'
        self.useFixture(
            fixtures.EnvironmentVariable('AWS_ACCESS_KEY_ID', aws_id))
        self.useFixture(
            fixtures.EnvironmentVariable('AWS_SECRET_ACCESS_KEY', aws_key))

        ec2 = boto3.client('ec2', region_name='us-west-2')

        # TEST-NET-3
        vpc = ec2.create_vpc(CidrBlock='203.0.113.0/24')

        subnet = ec2.create_subnet(
            CidrBlock='203.0.113.128/25', VpcId=vpc['Vpc']['VpcId'])
        subnet_id = subnet['Subnet']['SubnetId']
        sg = ec2.create_security_group(
            GroupName='zuul-nodes', VpcId=vpc['Vpc']['VpcId'],
            Description='Zuul Nodes')
        sg_id = sg['GroupId']

        ec2_template = os.path.join(
            os.path.dirname(__file__), '..', 'fixtures', 'aws.yaml')
        raw_config = yaml.safe_load(open(ec2_template))
        raw_config['zookeeper-servers'][0] = {
            'host': self.zookeeper_host,
            'port': self.zookeeper_port,
            'chroot': self.zookeeper_chroot,
        }
        raw_config['providers'][0]['pools'][0]['subnet-id'] = subnet_id
        raw_config['providers'][0]['pools'][0]['security-group-id'] = sg_id
        with tempfile.NamedTemporaryFile() as tf:
            tf.write(yaml.safe_dump(
                raw_config, default_flow_style=False).encode('utf-8'))
            tf.flush()
            configfile = self.setup_config(tf.name)
            pool = self.useNodepool(configfile, watermark_sleep=1)
            pool.start()
            req = zk.NodeRequest()
            req.state = zk.REQUESTED
            req.node_types.append('ubuntu1404')
            with patch('nodepool.driver.aws.handler.nodescan') as nodescan:
                nodescan.return_value = 'MOCK KEY'
                self.zk.storeNodeRequest(req)

                self.log.debug("Waiting for request %s", req.id)
                req = self.waitForNodeRequest(req)

                self.assertEqual(req.state, zk.FULFILLED)

                self.assertNotEqual(req.nodes, [])
                node = self.zk.getNode(req.nodes[0])
                self.assertEqual(node.allocated_to, req.id)
                self.assertEqual(node.state, zk.READY)
                self.assertIsNotNone(node.launcher)
                self.assertEqual(node.connection_type, 'ssh')
                nodescan.assert_called_with(
                    node.interface_ip,
                    port=22,
                    timeout=180,
                    gather_hostkeys=True)
                # A new request will be paused and for lack of quota until this
                # one is deleted
                req2 = zk.NodeRequest()
                req2.state = zk.REQUESTED
                req2.node_types.append('ubuntu1404')
                self.zk.storeNodeRequest(req2)
                req2 = self.waitForNodeRequest(
                    req2, (zk.PENDING, zk.FAILED, zk.FULFILLED))
                self.assertEqual(req2.state, zk.PENDING)
                # It could flip from PENDING to one of the others, so sleep a
                # bit and be sure
                time.sleep(1)
                req2 = self.waitForNodeRequest(
                    req2, (zk.PENDING, zk.FAILED, zk.FULFILLED))
                self.assertEqual(req2.state, zk.PENDING)

                node.state = zk.DELETING
                self.zk.storeNode(node)

                self.waitForNodeDeletion(node)

                req2 = self.waitForNodeRequest(req2, (zk.FAILED, zk.FULFILLED))
                self.assertEqual(req2.state, zk.FULFILLED)
                node = self.zk.getNode(req2.nodes[0])
                node.state = zk.DELETING
                self.zk.storeNode(node)
                self.waitForNodeDeletion(node)
Exemplo n.º 26
0
    def test_failed_provider(self):
        """Test that broken provider doesn't fail node requests."""
        configfile = self.setup_config('launcher_two_provider_max_1.yaml')
        self.useBuilder(configfile)
        pool = self.useNodepool(configfile, watermark_sleep=.5)
        pool.start()
        self.wait_for_config(pool)

        # Steady state at images available.
        self.waitForImage('fake-provider', 'fake-image')
        self.waitForImage('fake-provider2', 'fake-image')
        # We have now reached steady state and can manipulate the system to
        # test failing cloud behavior.

        # Make two requests so that the next requests are paused.
        # Note we use different provider specific labels here to avoid
        # a race where a single provider fulfills both of these initial
        # requests.

        # fake-provider
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label2')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req, zk.FULFILLED)

        # fake-provider2
        req = zk.NodeRequest()
        req.state = zk.REQUESTED
        req.node_types.append('fake-label3')
        self.zk.storeNodeRequest(req)
        req = self.waitForNodeRequest(req, zk.FULFILLED)

        nodes = map(pool.zk.getNode, pool.zk.getNodes())
        provider1_first = None
        provider2_first = None
        for node in nodes:
            if node.provider == 'fake-provider2':
                provider2_first = node
            elif node.provider == 'fake-provider':
                provider1_first = node

        # Mark the nodes as being used so they won't be deleted at pause.
        # Locking them is enough.
        self.zk.lockNode(provider1_first, blocking=False)
        self.zk.lockNode(provider2_first, blocking=False)

        # Next two requests will go pending one for each provider.
        req1 = zk.NodeRequest()
        req1.state = zk.REQUESTED
        req1.node_types.append('fake-label')
        self.zk.storeNodeRequest(req1)
        req1 = self.waitForNodeRequest(req1, zk.PENDING)

        req2 = zk.NodeRequest()
        req2.state = zk.REQUESTED
        req2.node_types.append('fake-label')
        self.zk.storeNodeRequest(req2)
        req2 = self.waitForNodeRequest(req2, zk.PENDING)

        # Delete node attached to provider2 this will cause provider2 to
        # fulfill the request it had pending.
        provider2_first.state = zk.DELETING
        self.zk.storeNode(provider2_first)
        self.zk.unlockNode(provider2_first)
        self.waitForNodeDeletion(provider2_first)

        while True:
            # Wait for provider2 node to be created. Also find the request
            # that was not fulfilled. This is the request that fake-provider
            # is pending on.
            req = self.zk.getNodeRequest(req1.id)
            if req.state == zk.FULFILLED:
                final_req = req2
                break
            req = self.zk.getNodeRequest(req2.id)
            if req.state == zk.FULFILLED:
                final_req = req1
                break

        provider2_second = None
        nodes = map(pool.zk.getNode, pool.zk.getNodes())
        for node in nodes:
            if (node and node.provider == 'fake-provider2'
                    and node.state == zk.READY):
                provider2_second = node
                break

        # Now delete the new node we had provider2 build. At this point,
        # the only provider with any requests is fake-provider.
        provider2_second.state = zk.DELETING
        self.zk.storeNode(provider2_second)

        # Set provider1 run_handler to throw exception to simulate a
        # broken cloud. Note the pool worker instantiates request handlers on
        # demand which is why we have a somewhat convoluted monkey patch here.
        # We must patch deep enough in the request handler that
        # despite being paused fake-provider will still trip over this code.
        pool_worker = pool.getPoolWorkers('fake-provider')[0]
        request_handler = pool_worker.request_handlers[0]

        def raise_KeyError(node):
            raise KeyError('fake-provider')

        request_handler.launch_manager.launch = raise_KeyError

        # Delete instance in fake-provider. This should cause provider2
        # to service the request that was held pending by fake-provider.
        provider1_first.state = zk.DELETING
        self.zk.storeNode(provider1_first)
        self.zk.unlockNode(provider1_first)

        # Request is fulfilled by provider 2
        req = self.waitForNodeRequest(final_req)
        self.assertEqual(req.state, zk.FULFILLED)
        self.assertEqual(1, len(req.declined_by))
        self.assertIn('fake-provider-main', req.declined_by[0])