def test_static_waiting_handler(self): configfile = self.setup_config('static-2-nodes-multilabel.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req, zk.FULFILLED) node = self.zk.getNode(req.nodes[0]) self.zk.lockNode(node) node.state = zk.USED self.zk.storeNode(node) req_waiting = zk.NodeRequest() req_waiting.state = zk.REQUESTED req_waiting.node_types.append('fake-label') self.zk.storeNodeRequest(req_waiting) req_waiting = self.waitForNodeRequest(req_waiting, zk.PENDING) req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label2') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req, zk.FULFILLED) req_waiting = self.zk.getNodeRequest(req_waiting.id) self.assertEqual(req_waiting.state, zk.PENDING) self.zk.unlockNode(node) self.waitForNodeDeletion(node) self.waitForNodeRequest(req_waiting, zk.FULFILLED)
def test_fail_minready_request_at_capacity(self): ''' A min-ready request to a provider that is already at capacity should be declined. ''' configfile = self.setup_config('node_min_ready_capacity.yaml') self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() # Get an initial node ready req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append("fake-label") self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) # Now simulate a min-ready request min_ready_req = zk.NodeRequest() min_ready_req.state = zk.REQUESTED min_ready_req.node_types.append("fake-label") min_ready_req.requestor = "NodePool:min-ready" self.zk.storeNodeRequest(min_ready_req) min_ready_req = self.waitForNodeRequest(min_ready_req) self.assertEqual(min_ready_req.state, zk.FAILED) self.assertNotEqual(min_ready_req.declined_by, [])
def test_static_multiprovider_handler(self): configfile = self.setup_config('multiproviders.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) manager = pool.getProviderManager('openstack-provider') manager._client.create_image(name="fake-image") req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-static-label') self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 1) req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-openstack-label') self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 1)
def test_node_assignment_order(self): """Test that nodes are assigned in the order requested""" configfile = self.setup_config('node_many_labels.yaml') self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.waitForNodes('fake-label1') self.waitForNodes('fake-label2') self.waitForNodes('fake-label3') self.waitForNodes('fake-label4') req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label3') req.node_types.append('fake-label1') req.node_types.append('fake-label4') req.node_types.append('fake-label2') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(4, len(req.nodes)) nodes = [] for node_id in req.nodes: nodes.append(self.zk.getNode(node_id)) self.assertEqual(nodes[0].type, 'fake-label3') self.assertEqual(nodes[1].type, 'fake-label1') self.assertEqual(nodes[2].type, 'fake-label4') self.assertEqual(nodes[3].type, 'fake-label2')
def test_liveness_check(self): ''' Test liveness check during request handling. ''' configfile = self.setup_config('static-basic.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') with mock.patch("nodepool.nodeutils.nodescan") as nodescan_mock: nodescan_mock.side_effect = OSError self.zk.storeNodeRequest(req) self.waitForNodeDeletion(nodes[0]) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 1) self.assertNotEqual(req.nodes[0], nodes[0].id)
def test_openshift_native(self): configfile = self.setup_config('openshift.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('openshift-project') self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertNotEqual(req.nodes, []) node = self.zk.getNode(req.nodes[0]) self.assertEqual(node.allocated_to, req.id) self.assertEqual(node.state, zk.READY) self.assertIsNotNone(node.launcher) self.assertEqual(node.connection_type, 'project') self.assertEqual(node.connection_port.get('token'), 'fake-token') node.state = zk.DELETING self.zk.storeNode(node) self.waitForNodeDeletion(node)
def test_request_list_json(self): configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self.useBuilder(configfile) pool.start() webapp = self.useWebApp(pool, port=0) webapp.start() port = webapp.server.socket.getsockname()[1] self.waitForImage('fake-provider', 'fake-image') self.waitForNodes('fake-label') req = zk.NodeRequest() req.state = zk.PENDING # so it will be ignored req.node_types = ['fake-label'] req.requestor = 'test_request_list' self.zk.storeNodeRequest(req) http_req = request.Request( "http://localhost:%s/request-list.json" % port) f = request.urlopen(http_req) self.assertEqual(f.info().get('Content-Type'), 'application/json') data = f.read() objs = json.loads(data.decode('utf8')) self.assertDictContainsSubset({'node_types': ['fake-label'], 'requestor': 'test_request_list', }, objs[0])
def test_provider_wont_wedge(self): ''' A provider should not wedge itself when it is at (1) maximum capacity (# registered nodes == max-servers), (2) all of its current nodes are not being used, and (3) a request comes in with a label that it does not yet have available. Normally, situation (3) combined with (1) would cause the provider to pause until capacity becomes available, but because of (2), it never will and we would wedge the provider. ''' configfile = self.setup_config('wedge_test.yaml') self.useBuilder(configfile) pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() # Wait for fake-label1 min-ready request to be fulfilled, which will # put us at maximum capacity with max-servers of 1. label1_nodes = self.waitForNodes('fake-label1') self.assertEqual(1, len(label1_nodes)) # Now we submit a request for fake-label2, which is not yet available. req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label2') self.zk.storeNodeRequest(req) # The provider should pause here to handle the fake-label2 request. # But because the fake-label1 node is not being used, and will never # be freed because we are paused and not handling additional requests, # the pool worker thread should recognize that and delete the unused # fake-label1 node for us. It can then fulfill the fake-label2 request. self.waitForNodeDeletion(label1_nodes[0]) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED)
def test_static_request_handled(self): ''' Test that a node is reregistered after handling a request. ''' configfile = self.setup_config('static-basic.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 1) self.assertEqual(req.nodes[0], nodes[0].id) # Mark node as used nodes[0].state = zk.USED self.zk.storeNode(nodes[0]) # Our single node should have been used, deleted, then reregistered new_nodes = self.waitForNodes('fake-label') self.assertEqual(len(new_nodes), 1) self.assertEqual(nodes[0].hostname, new_nodes[0].hostname)
def _create_node_request(self): req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('label1') self.zk.storeNodeRequest(req) self.assertIsNotNone( self.zk.client.exists(self.zk._requestPath(req.id))) return req
def test_getNodeRequest(self): r = zk.NodeRequest("500-123") r.state = zk.REQUESTED path = self.zk._requestPath(r.id) self.zk.client.create(path, value=r.serialize(), makepath=True, ephemeral=True) o = self.zk.getNodeRequest(r.id) self.assertIsInstance(o, zk.NodeRequest) self.assertEqual(r.id, o.id)
def createRequest(label_name): req = zk.NodeRequest() req.state = zk.REQUESTED req.requestor = "NodePool:min-ready" req.node_types.append(label_name) req.reuse = False # force new node launches self.zk.storeNodeRequest(req, priority="100") if label_name not in self._submittedRequests: self._submittedRequests[label_name] = [] self._submittedRequests[label_name].append(req)
def test_paused_gets_declined(self): """Test that a paused request, that later gets declined, unpauses.""" # First config has max-servers set to 2 configfile = self.setup_config('pause_declined_1.yaml') self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() # Create a request that uses all capacity (2 servers) req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 2) # Now that we have 2 nodes in use, create another request that # requests two nodes, which should cause the request to pause. req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') req2.node_types.append('fake-label') self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2, (zk.PENDING, )) # Second config decreases max-servers to 1 self.replace_config(configfile, 'pause_declined_2.yaml') # Because the second request asked for 2 nodes, but that now exceeds # max-servers, req2 should get declined now, and transition to FAILED req2 = self.waitForNodeRequest(req2, (zk.FAILED, )) self.assertNotEqual(req2.declined_by, [])
def test_static_multinode_handler(self): configfile = self.setup_config('static.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') req.node_types.append('fake-concurrent-label') self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(len(req.nodes), 2)
def test_NodeRequest_toDict(self): o = zk.NodeRequest("500-123") o.declined_by.append("abc") o.node_types.append('trusty') o.nodes.append('100') o.reuse = False o.requestor = 'zuul' d = o.toDict() self.assertNotIn('id', d) self.assertIn('state', d) self.assertIn('state_time', d) self.assertEqual(d['declined_by'], o.declined_by) self.assertEqual(d['node_types'], o.node_types) self.assertEqual(d['nodes'], o.nodes) self.assertEqual(d['reuse'], o.reuse) self.assertEqual(d['requestor'], o.requestor)
def test_invalid_image_fails(self): ''' Test that an invalid image declines and fails the request. ''' configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append("zorky-zumba") self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED) self.assertNotEqual(req.declined_by, [])
def test_request_list(self): configfile = self.setup_config('node.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self.useBuilder(configfile) pool.start() self.waitForImage('fake-provider', 'fake-image') nodes = self.waitForNodes('fake-label') self.assertEqual(len(nodes), 1) req = zk.NodeRequest() req.state = zk.PENDING # so it will be ignored req.node_types = ['fake-label'] req.requestor = 'test_request_list' self.zk.storeNodeRequest(req) self.assert_listed(configfile, ['request-list'], 0, req.id, 1)
def test_node_assignment(self): ''' Successful node launch should have unlocked nodes in READY state and assigned to the request. ''' configfile = self.setup_config('node_no_min_ready.yaml') self.useBuilder(configfile) image = self.waitForImage('fake-provider', 'fake-image') self.assertEqual(image.username, 'zuul') nodepool.launcher.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertNotEqual(req.nodes, []) for node_id in req.nodes: node = self.zk.getNode(node_id) self.assertEqual(node.allocated_to, req.id) self.assertEqual(node.state, zk.READY) self.assertIsNotNone(node.launcher) self.assertEqual(node.cloud, 'fake') self.assertEqual(node.region, 'fake-region') self.assertEqual(node.az, "az1") self.assertEqual(node.username, "zuul") self.assertEqual(node.connection_type, 'ssh') p = "{path}/{id}".format(path=self.zk._imageUploadPath( image.image_name, image.build_id, image.provider_name), id=image.id) self.assertEqual(node.image_id, p) self.zk.lockNode(node, blocking=False) self.zk.unlockNode(node) # Verify the cleanup thread removed the lock self.assertIsNotNone( self.zk.client.exists(self.zk._requestLockPath(req.id))) self.zk.deleteNodeRequest(req) self.waitForNodeRequestLockDeletion(req.id) self.assertReportedStat('nodepool.nodes.ready', '1|g') self.assertReportedStat('nodepool.nodes.building', '0|g')
def _create_pending_request(self): req = zk.NodeRequest() req.state = zk.PENDING req.requestor = 'test_nodepool' req.node_types.append('fake-label') self.zk.storeNodeRequest(req) # Create a node that is allocated to the request, but not yet assigned # within the NodeRequest object node = zk.Node() node.state = zk.READY node.type = 'fake-label' node.public_ipv4 = 'fake' node.provider = 'fake-provider' node.pool = 'main' node.allocated_to = req.id self.zk.storeNode(node) return (req, node)
def test_disabled_provider(self): ''' A request should fail even with a provider that is disabled by setting max-servers to 0. Because we look to see that all providers decline a request by comparing the declined_by request attribute to the list of registered launchers, this means that each must attempt to handle it at least once, and thus decline it. ''' configfile = self.setup_config('disabled_provider.yaml') self.useBuilder(configfile) pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED)
def test_node_launch_retries(self): configfile = self.setup_config('node_launch_retry.yaml') pool = self.useNodepool(configfile, watermark_sleep=1) self.useBuilder(configfile) pool.start() self.wait_for_config(pool) manager = pool.getProviderManager('fake-provider') manager.createServer_fails = 2 self.waitForImage('fake-provider', 'fake-image') req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FAILED) # retries in config is set to 2, so 2 attempts to create a server self.assertEqual(0, manager.createServer_fails)
def test_fail_request_on_launch_failure(self): ''' Test that provider launch error fails the request. ''' configfile = self.setup_config('node_launch_retry.yaml') self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) manager = pool.getProviderManager('fake-provider') manager.createServer_fails = 2 req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) self.assertEqual(0, manager.createServer_fails) self.assertEqual(req.state, zk.FAILED) self.assertNotEqual(req.declined_by, [])
def test_over_quota(self, config='node_quota_cloud.yaml'): ''' This tests what happens when a cloud unexpectedly returns an over-quota error. ''' # Start with an instance quota of 2 max_cores = math.inf max_instances = 2 max_ram = math.inf # patch the cloud with requested quota def fake_get_quota(): return (max_cores, max_instances, max_ram) self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'].fake_cloud, '_get_quota', fake_get_quota)) configfile = self.setup_config(config) self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') nodepool.launcher.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) client = pool.getProviderManager('fake-provider')._getClient() # Wait for a single node to be created req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') self.log.debug("Adding first request") self.zk.storeNodeRequest(req1) req1 = self.waitForNodeRequest(req1) self.assertEqual(req1.state, zk.FULFILLED) # Lock this node so it appears as used and not deleted req1_node = self.zk.getNode(req1.nodes[0]) self.zk.lockNode(req1_node, blocking=False) # Now, reduce the quota so the next node unexpectedly # (according to nodepool's quota estimate) fails. client.max_instances = 1 # Request a second node; this request should fail. req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') self.log.debug("Adding second request") self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2) self.assertEqual(req2.state, zk.FAILED) # After the second request failed, the internal quota estimate # should be reset, so the next request should pause to wait # for more quota to become available. req3 = zk.NodeRequest() req3.state = zk.REQUESTED req3.node_types.append('fake-label') self.log.debug("Adding third request") self.zk.storeNodeRequest(req3) req3 = self.waitForNodeRequest(req3, (zk.PENDING, )) self.assertEqual(req3.state, zk.PENDING) # Wait until there is a paused request handler and verify that # there is still only one server built (from the first # request). pool_worker = pool.getPoolWorkers('fake-provider') while not pool_worker[0].paused_handler: time.sleep(0.1) self.assertEqual(len(client._server_list), 1)
def _test_node_assignment_at_quota(self, config, max_cores=100, max_instances=20, max_ram=1000000): ''' Successful node launch should have unlocked nodes in READY state and assigned to the request. This should be run with a quota that fits for two nodes. ''' # patch the cloud with requested quota def fake_get_quota(): return (max_cores, max_instances, max_ram) self.useFixture( fixtures.MockPatchObject( Drivers.get('fake')['provider'].fake_cloud, '_get_quota', fake_get_quota)) configfile = self.setup_config(config) self.useBuilder(configfile) self.waitForImage('fake-provider', 'fake-image') nodepool.launcher.LOCK_CLEANUP = 1 pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() self.wait_for_config(pool) client = pool.getProviderManager('fake-provider')._getClient() req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') req1.node_types.append('fake-label') self.zk.storeNodeRequest(req1) self.log.debug("Waiting for 1st request %s", req1.id) req1 = self.waitForNodeRequest(req1, (zk.FULFILLED, )) self.assertEqual(len(req1.nodes), 2) # Mark the first request's nodes as in use so they won't be deleted # when we pause. Locking them is enough. req1_node1 = self.zk.getNode(req1.nodes[0]) req1_node2 = self.zk.getNode(req1.nodes[1]) self.zk.lockNode(req1_node1, blocking=False) self.zk.lockNode(req1_node2, blocking=False) # One of the things we want to test is that if we spawn many # node launches at once, we do not deadlock while the request # handler pauses for quota. To ensure we test that case, # pause server creation until we have accepted all of the node # requests we submit. This will ensure that we hold locks on # all of the nodes before pausing so that we can validate they # are released. req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') req2.node_types.append('fake-label') self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2, (zk.PENDING, )) # At this point, we should have already created two servers for the # first request, and the request handler has accepted the second node # request but paused waiting for the server count to go below quota. # Wait until there is a paused request handler and check if there # are exactly two servers pool_worker = pool.getPoolWorkers('fake-provider') while not pool_worker[0].paused_handler: time.sleep(0.1) self.assertEqual(len(client._server_list), 2) # Mark the first request's nodes as USED, which will get them deleted # and allow the second to proceed. self.log.debug("Marking first node as used %s", req1.id) req1_node1.state = zk.USED self.zk.storeNode(req1_node1) self.zk.unlockNode(req1_node1) self.waitForNodeDeletion(req1_node1) # To force the sequential nature of what we're testing, wait for # the 2nd request to get a node allocated to it now that we've # freed up a node. self.log.debug("Waiting for node allocation for 2nd request") done = False while not done: for n in self.zk.nodeIterator(): if n.allocated_to == req2.id: done = True break self.log.debug("Marking second node as used %s", req1.id) req1_node2.state = zk.USED self.zk.storeNode(req1_node2) self.zk.unlockNode(req1_node2) self.waitForNodeDeletion(req1_node2) self.log.debug("Deleting 1st request %s", req1.id) self.zk.deleteNodeRequest(req1) self.waitForNodeRequestLockDeletion(req1.id) req2 = self.waitForNodeRequest(req2, (zk.FULFILLED, )) self.assertEqual(len(req2.nodes), 2)
def test_ec2_machine(self): aws_id = 'AK000000000000000000' aws_key = '0123456789abcdef0123456789abcdef0123456789abcdef' self.useFixture( fixtures.EnvironmentVariable('AWS_ACCESS_KEY_ID', aws_id)) self.useFixture( fixtures.EnvironmentVariable('AWS_SECRET_ACCESS_KEY', aws_key)) ec2 = boto3.client('ec2', region_name='us-west-2') # TEST-NET-3 vpc = ec2.create_vpc(CidrBlock='203.0.113.0/24') subnet = ec2.create_subnet( CidrBlock='203.0.113.128/25', VpcId=vpc['Vpc']['VpcId']) subnet_id = subnet['Subnet']['SubnetId'] sg = ec2.create_security_group( GroupName='zuul-nodes', VpcId=vpc['Vpc']['VpcId'], Description='Zuul Nodes') sg_id = sg['GroupId'] ec2_template = os.path.join( os.path.dirname(__file__), '..', 'fixtures', 'aws.yaml') raw_config = yaml.safe_load(open(ec2_template)) raw_config['zookeeper-servers'][0] = { 'host': self.zookeeper_host, 'port': self.zookeeper_port, 'chroot': self.zookeeper_chroot, } raw_config['providers'][0]['pools'][0]['subnet-id'] = subnet_id raw_config['providers'][0]['pools'][0]['security-group-id'] = sg_id with tempfile.NamedTemporaryFile() as tf: tf.write(yaml.safe_dump( raw_config, default_flow_style=False).encode('utf-8')) tf.flush() configfile = self.setup_config(tf.name) pool = self.useNodepool(configfile, watermark_sleep=1) pool.start() req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('ubuntu1404') with patch('nodepool.driver.aws.handler.nodescan') as nodescan: nodescan.return_value = 'MOCK KEY' self.zk.storeNodeRequest(req) self.log.debug("Waiting for request %s", req.id) req = self.waitForNodeRequest(req) self.assertEqual(req.state, zk.FULFILLED) self.assertNotEqual(req.nodes, []) node = self.zk.getNode(req.nodes[0]) self.assertEqual(node.allocated_to, req.id) self.assertEqual(node.state, zk.READY) self.assertIsNotNone(node.launcher) self.assertEqual(node.connection_type, 'ssh') nodescan.assert_called_with( node.interface_ip, port=22, timeout=180, gather_hostkeys=True) # A new request will be paused and for lack of quota until this # one is deleted req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('ubuntu1404') self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest( req2, (zk.PENDING, zk.FAILED, zk.FULFILLED)) self.assertEqual(req2.state, zk.PENDING) # It could flip from PENDING to one of the others, so sleep a # bit and be sure time.sleep(1) req2 = self.waitForNodeRequest( req2, (zk.PENDING, zk.FAILED, zk.FULFILLED)) self.assertEqual(req2.state, zk.PENDING) node.state = zk.DELETING self.zk.storeNode(node) self.waitForNodeDeletion(node) req2 = self.waitForNodeRequest(req2, (zk.FAILED, zk.FULFILLED)) self.assertEqual(req2.state, zk.FULFILLED) node = self.zk.getNode(req2.nodes[0]) node.state = zk.DELETING self.zk.storeNode(node) self.waitForNodeDeletion(node)
def test_failed_provider(self): """Test that broken provider doesn't fail node requests.""" configfile = self.setup_config('launcher_two_provider_max_1.yaml') self.useBuilder(configfile) pool = self.useNodepool(configfile, watermark_sleep=.5) pool.start() self.wait_for_config(pool) # Steady state at images available. self.waitForImage('fake-provider', 'fake-image') self.waitForImage('fake-provider2', 'fake-image') # We have now reached steady state and can manipulate the system to # test failing cloud behavior. # Make two requests so that the next requests are paused. # Note we use different provider specific labels here to avoid # a race where a single provider fulfills both of these initial # requests. # fake-provider req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label2') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req, zk.FULFILLED) # fake-provider2 req = zk.NodeRequest() req.state = zk.REQUESTED req.node_types.append('fake-label3') self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req, zk.FULFILLED) nodes = map(pool.zk.getNode, pool.zk.getNodes()) provider1_first = None provider2_first = None for node in nodes: if node.provider == 'fake-provider2': provider2_first = node elif node.provider == 'fake-provider': provider1_first = node # Mark the nodes as being used so they won't be deleted at pause. # Locking them is enough. self.zk.lockNode(provider1_first, blocking=False) self.zk.lockNode(provider2_first, blocking=False) # Next two requests will go pending one for each provider. req1 = zk.NodeRequest() req1.state = zk.REQUESTED req1.node_types.append('fake-label') self.zk.storeNodeRequest(req1) req1 = self.waitForNodeRequest(req1, zk.PENDING) req2 = zk.NodeRequest() req2.state = zk.REQUESTED req2.node_types.append('fake-label') self.zk.storeNodeRequest(req2) req2 = self.waitForNodeRequest(req2, zk.PENDING) # Delete node attached to provider2 this will cause provider2 to # fulfill the request it had pending. provider2_first.state = zk.DELETING self.zk.storeNode(provider2_first) self.zk.unlockNode(provider2_first) self.waitForNodeDeletion(provider2_first) while True: # Wait for provider2 node to be created. Also find the request # that was not fulfilled. This is the request that fake-provider # is pending on. req = self.zk.getNodeRequest(req1.id) if req.state == zk.FULFILLED: final_req = req2 break req = self.zk.getNodeRequest(req2.id) if req.state == zk.FULFILLED: final_req = req1 break provider2_second = None nodes = map(pool.zk.getNode, pool.zk.getNodes()) for node in nodes: if (node and node.provider == 'fake-provider2' and node.state == zk.READY): provider2_second = node break # Now delete the new node we had provider2 build. At this point, # the only provider with any requests is fake-provider. provider2_second.state = zk.DELETING self.zk.storeNode(provider2_second) # Set provider1 run_handler to throw exception to simulate a # broken cloud. Note the pool worker instantiates request handlers on # demand which is why we have a somewhat convoluted monkey patch here. # We must patch deep enough in the request handler that # despite being paused fake-provider will still trip over this code. pool_worker = pool.getPoolWorkers('fake-provider')[0] request_handler = pool_worker.request_handlers[0] def raise_KeyError(node): raise KeyError('fake-provider') request_handler.launch_manager.launch = raise_KeyError # Delete instance in fake-provider. This should cause provider2 # to service the request that was held pending by fake-provider. provider1_first.state = zk.DELETING self.zk.storeNode(provider1_first) self.zk.unlockNode(provider1_first) # Request is fulfilled by provider 2 req = self.waitForNodeRequest(final_req) self.assertEqual(req.state, zk.FULFILLED) self.assertEqual(1, len(req.declined_by)) self.assertIn('fake-provider-main', req.declined_by[0])