示例#1
0
文件: test_core.py 项目: timf/epu
 def setUp(self):
     self.notifier = FakeProvisionerNotifier()
     self.store = ProvisionerStore()
     self.ctx = FakeContextClient()
     self.driver = FakeNodeDriver()
     self.dtrs = FakeDTRS()
     drivers = {'fake': self.driver}
     self.core = ProvisionerCore(store=self.store,
                                 notifier=self.notifier,
                                 dtrs=self.dtrs,
                                 site_drivers=drivers,
                                 context=self.ctx)
示例#2
0
class ProvisionerService(ServiceProcess):
    """Provisioner service interface
    """

    # Declaration of service
    declare = ServiceProcess.service_declare(name='provisioner',
                                             version='0.1.0',
                                             dependencies=[])

    @defer.inlineCallbacks
    def slc_init(self):
        cei_events.event("provisioner", "init_begin")

        try:
            store = self.spawn_args['store']
            site_drivers = self.spawn_args['site_drivers']
            context_client = self.spawn_args['context_client']
        except KeyError, e:
            raise KeyError("Missing provisioner spawn_arg: " + str(e))

        self.store = store

        notifier = self.spawn_args.get('notifier')
        self.notifier = notifier or ProvisionerNotifier(self)
        self.dtrs = DeployableTypeRegistryClient(self)

        self.core = ProvisionerCore(self.store, self.notifier, self.dtrs,
                                    site_drivers, context_client)
        yield self.core.recover()
        cei_events.event("provisioner", "init_end")

        # operator can disable new launches
        self.enabled = True
        self.terminate_all_deferred = None
示例#3
0
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.dtrs.sites = {
            "asterix": {
                "site1": {
                    "type": "fake"
                }
            },
            None: {
                "site1": {
                    "type": "fake"
                },
                "site2": {
                    "type": "fake"
                }
            }
        }

        self.dtrs.credentials['site'][("asterix", "site1")] = self.dtrs.credentials['site'][("asterix", "site2")] = {
            "access_key": "mykey",
            "secret_key": "mysecret"
        }

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()
        self.site1_driver.initialize()
        self.site2_driver.initialize()

        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx)
示例#4
0
文件: test_core.py 项目: timf/epu
 def setUp(self):
     self.notifier = FakeProvisionerNotifier()
     self.store = ProvisionerStore()
     self.ctx = FakeContextClient()
     self.driver = FakeNodeDriver()
     self.dtrs = FakeDTRS()
     drivers = {'fake' : self.driver}
     self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                 dtrs=self.dtrs, site_drivers=drivers,
                                 context=self.ctx)
示例#5
0
文件: test_core.py 项目: timf/epu
class ProvisionerCoreRecoveryTests(unittest.TestCase):
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.driver = FakeNodeDriver()
        self.dtrs = FakeDTRS()
        drivers = {'fake': self.driver}
        self.core = ProvisionerCore(store=self.store,
                                    notifier=self.notifier,
                                    dtrs=self.dtrs,
                                    site_drivers=drivers,
                                    context=self.ctx)

    @defer.inlineCallbacks
    def test_recover_launch_incomplete(self):
        """Ensures that launches in REQUESTED state are completed
        """
        launch_id = _new_id()
        doc = "<cluster><workspace><name>node</name><image>fake</image>"+\
              "<quantity>3</quantity>"+\
              "</workspace><workspace><name>running</name><image>fake"+\
              "</image><quantity>1</quantity></workspace></cluster>"
        context = {
            'broker_uri': _new_id(),
            'context_id': _new_id(),
            'secret': _new_id(),
            'uri': _new_id()
        }

        requested_node_ids = [_new_id(), _new_id()]

        node_records = [
            make_node(launch_id,
                      states.RUNNING,
                      site='fake',
                      ctx_name='running'),
            make_node(launch_id,
                      states.REQUESTED,
                      site='fake',
                      node_id=requested_node_ids[0],
                      ctx_name='node'),
            make_node(launch_id,
                      states.REQUESTED,
                      site='fake',
                      node_id=requested_node_ids[1],
                      ctx_name='node'),
            make_node(launch_id, states.RUNNING, ctx_name='node')
        ]
        launch_record = make_launch(launch_id,
                                    states.REQUESTED,
                                    node_records,
                                    document=doc,
                                    context=context)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        # 2 nodes are in REQUESTED state, so those should be launched
        yield self.core.recover()

        # because we rely on IaaS idempotency, we get full Node responses
        # for all nodes in the group. What really would cause this scenario
        # is successfully launching the full group but failing before records
        # could be written for the two REQUESTED nodes.
        self.assertEqual(3, len(self.driver.created))
        iaas_ids = set(node.id for node in self.driver.created)
        self.assertEqual(3, len(iaas_ids))

        for node_id in requested_node_ids:
            node = yield self.store.get_node(node_id)
            self.assertEqual(states.PENDING, node['state'])
            self.assertTrue(node['iaas_id'] in iaas_ids)

        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(states.PENDING, launch['state'])

    @defer.inlineCallbacks
    def test_recovery_nodes_terminating(self):
        launch_id = _new_id()

        terminating_iaas_id = _new_id()

        node_records = [
            make_node(launch_id,
                      states.TERMINATING,
                      iaas_id=terminating_iaas_id,
                      site='fake'),
            make_node(launch_id, states.TERMINATED),
            make_node(launch_id, states.RUNNING)
        ]

        launch_record = make_launch(launch_id, states.RUNNING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.recover()

        self.assertEqual(1, len(self.driver.destroyed))
        self.assertEqual(self.driver.destroyed[0].id, terminating_iaas_id)

        terminated = yield self.store.get_nodes(state=states.TERMINATED)
        self.assertEqual(2, len(terminated))

    @defer.inlineCallbacks
    def test_recovery_launch_terminating(self):
        launch_id = _new_id()

        terminating_iaas_ids = [_new_id(), _new_id()]

        node_records = [
            make_node(launch_id,
                      states.TERMINATING,
                      iaas_id=terminating_iaas_ids[0],
                      site='fake'),
            make_node(launch_id, states.TERMINATED),
            make_node(launch_id,
                      states.RUNNING,
                      iaas_id=terminating_iaas_ids[1],
                      site='fake')
        ]

        launch_record = make_launch(launch_id, states.TERMINATING,
                                    node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.recover()

        self.assertEqual(2, len(self.driver.destroyed))
        self.assertTrue(self.driver.destroyed[0].id in terminating_iaas_ids)
        self.assertTrue(self.driver.destroyed[1].id in terminating_iaas_ids)

        terminated = yield self.store.get_nodes(state=states.TERMINATED)
        self.assertEqual(3, len(terminated))

        launch_record = yield self.store.get_launch(launch_id)
        self.assertEqual(launch_record['state'], states.TERMINATED)

    @defer.inlineCallbacks
    def test_terminate_all(self):
        running_launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(
            running_launch_id, 3, states.RUNNING)
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        pending_launch_id = _new_id()
        pending_launch, pending_nodes = make_launch_and_nodes(
            pending_launch_id, 3, states.PENDING)
        yield self.store.put_launch(pending_launch)
        yield self.store.put_nodes(pending_nodes)

        terminated_launch_id = _new_id()
        terminated_launch, terminated_nodes = make_launch_and_nodes(
            terminated_launch_id, 3, states.TERMINATED)
        yield self.store.put_launch(terminated_launch)
        yield self.store.put_nodes(terminated_nodes)

        yield self.core.terminate_all()

        self.assertEqual(6, len(self.driver.destroyed))

        all_launches = yield self.store.get_launches()
        self.assertEqual(3, len(all_launches))
        self.assertTrue(
            all(l['state'] == states.TERMINATED for l in all_launches))

        all_nodes = yield self.store.get_nodes()
        self.assertEqual(9, len(all_nodes))
        self.assertTrue(all(n['state'] == states.TERMINATED
                            for n in all_nodes))

        state = yield self.core.check_terminate_all()
        self.assertTrue(state)
示例#6
0
文件: test_core.py 项目: timf/epu
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1': self.site1_driver, 'site2': self.site2_driver}
        self.core = ProvisionerCore(store=self.store,
                                    notifier=self.notifier,
                                    dtrs=self.dtrs,
                                    context=self.ctx,
                                    site_drivers=drivers)

    @defer.inlineCallbacks
    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        nodes = {
            "i1": dict(ids=[_new_id()], site="chicago", allocation="small")
        }
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {
            'document': "<fake>document</fake>",
            "nodes": {
                "i1": {}
            }
        }
        nodes = {"i1": dict(ids=[_new_id()], site="site1", allocation="small")}
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_execute(self):
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    @defer.inlineCallbacks
    def test_prepare_execute_iaas_fail(self):
        self.site1_driver.create_node_error = InvalidCredsError()
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def _prepare_execute(self):
        self.dtrs.result = {
            'document': _get_one_node_cluster_doc("node1", "image1"),
            "nodes": {
                "node1": {}
            }
        }
        request_node = dict(ids=[_new_id()], site="site1", allocation="small")
        request_nodes = {"node1": request_node}
        request = dict(launch_id=_new_id(),
                       deployable_type="foo",
                       subscribers=('blah', ),
                       nodes=request_nodes)

        launch, nodes = yield self.core.prepare_provision(request)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], request_node['ids'][0])
        self.assertEqual(launch['launch_id'], request['launch_id'])

        self.assertTrue(self.ctx.last_create)
        self.assertEqual(launch['context'], self.ctx.last_create)
        for key in ('uri', 'secret', 'context_id', 'broker_uri'):
            self.assertIn(key, launch['context'])
        self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        yield self.core.execute_provision(launch, nodes)

    @defer.inlineCallbacks
    def test_execute_bad_doc(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': "<this><isnt><a><real><doc>",
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        #self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    @defer.inlineCallbacks
    def test_execute_bad_doc_nodes(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "adifferentname"
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_execute_bad_doc_node_count(self):
        ctx = yield self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'subscribers': [],
            'state': states.PENDING,
            'node_ids': ['node1']
        }

        # two nodes where doc expects 1
        nodes = [{
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "node1"
        }, {
            'node_id': 'node1',
            'launch_id': "thelaunchid",
            'state': states.REQUESTED,
            'ctx_name': "node1"
        }]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        ts = time.time() - 30.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts
        }
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                                       driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 0)

    @defer.inlineCallbacks
    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts
        }
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                                       driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id,
            'node_ids': [node_id],
            'state': states.PENDING,
            'subscribers': 'fake-subscribers'
        }
        node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.PENDING,
            'pending_timestamp': ts,
            'iaas_id': iaas_node.id,
            'site': 'site1'
        }

        req_node = {
            'launch_id': launch_id,
            'node_id': _new_id(),
            'state': states.REQUESTED
        }
        nodes = [node, req_node]
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)
        yield self.store.put_node(req_node)

        yield self.core.query_one_site('site1', nodes)

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.STARTED)

        # query again should detect no changes
        yield self.core.query_one_site('site1', nodes)

        # now destroy
        yield self.core.terminate_nodes([node_id])
        node = yield self.store.get_node(node_id)
        yield self.core.query_one_site('site1', [node])

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)

    @defer.inlineCallbacks
    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        #first query with no ctx nodes. zero records should be updated
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))

        # all but 1 node have reported ok
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(node_records[i]['public_ip'], _new_id(),
                                  _new_id()) for i in range(node_count - 1)
        ]

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count - 1)

        #last node reports ok
        self.ctx.nodes.append(
            _one_fake_ctx_node_ok(node_records[-1]['public_ip'], _new_id(),
                                  _new_id()))

        self.ctx.complete = True
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))

    @defer.inlineCallbacks
    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(node_records[i]['public_ip'], _new_id(),
                                  _new_id()) for i in range(node_count - 1)
        ]
        self.ctx.nodes.append(
            _one_fake_ctx_node_error(node_records[-1]['public_ip'], _new_id(),
                                     _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count - 1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(
            self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    @defer.inlineCallbacks
    def test_query_ctx_nodes_not_started(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING) for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    @defer.inlineCallbacks
    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [
            make_node(launch_id, states.STARTED) for i in range(node_count)
        ]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.query_error = ContextNotFoundError()
        yield self.core.query_contexts()

        self.assertTrue(
            self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None)
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

        iaas_node = Mock(public_ip=[], private_ip=[])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED) for i in range(5)]
        ctx_nodes = [
            _one_fake_ctx_node_ok(node['public_ip'], _new_id(), _new_id())
            for node in nodes
        ]

        self.assertEquals(len(nodes),
                          len(update_nodes_from_context(nodes, ctx_nodes)))

    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED) for i in range(5)]
        #libcloud puts the hostname in the public_ip field
        ctx_nodes = [
            _one_fake_ctx_node_ok(ip=_new_id(),
                                  hostname=node['public_ip'],
                                  pubkey=_new_id()) for node in nodes
        ]

        self.assertEquals(len(nodes),
                          len(update_nodes_from_context(nodes, ctx_nodes)))

    @defer.inlineCallbacks
    def test_query_broker_exception(self):
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = yield self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [
            _one_fake_ctx_node_ok(ok_node['public_ip'], _new_id(), _new_id())
        ]
        self.ctx.complete = True
        yield self.core.query_contexts()

        launches = yield self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_ctx_without_valid_nodes(self):

        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = yield self.store.get_node(
            error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        yield self.store.put_node(error_launch_node)

        yield self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = yield self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_unexpected_exception(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)
        self.ctx.query_error = ValueError("bad programmer")

        # digging into internals a bit: patching one of the methods query()
        # calls to raise an exception. This will let us ensure exceptions do
        # not bubble up
        def raiser(self):
            raise KeyError("notreallyaproblem")

        self.patch(self.core, 'query_nodes', raiser)

        yield self.core.query()  # ensure that exception doesn't bubble up

    @defer.inlineCallbacks
    def test_dump_state(self):
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING, nodes)
            yield self.store.put_launch(launch)
            yield self.store.put_nodes(nodes)

        yield self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    @defer.inlineCallbacks
    def test_mark_nodes_terminating(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING) for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING, node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        first_two_node_ids = [
            node_records[0]['node_id'], node_records[1]['node_id']
        ]
        yield self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(
            self.notifier.assure_state(states.TERMINATING,
                                       nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = yield self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)
示例#7
0
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.dtrs.sites["site1"] = self.dtrs.sites["site2"] = {
            "type": "fake"
        }

        self.dtrs.credentials[("asterix", "site1")] = self.dtrs.credentials[("asterix", "site2")] = {
            "access_key": "mykey",
            "secret_key": "mysecret"
        }

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()
        self.site1_driver.initialize()
        self.site2_driver.initialize()

        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx)

    def test_terminate_all(self):
        caller = 'asterix'
        running_launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(
            running_launch_id, 3, states.RUNNING, caller=caller)
        self.store.add_launch(running_launch)
        for node in running_nodes:
            self.store.add_node(node)

        pending_launch_id = _new_id()
        pending_launch, pending_nodes = make_launch_and_nodes(
            pending_launch_id, 3, states.PENDING, caller=caller)
        self.store.add_launch(pending_launch)
        for node in pending_nodes:
            self.store.add_node(node)

        terminated_launch_id = _new_id()
        terminated_launch, terminated_nodes = make_launch_and_nodes(
            terminated_launch_id, 3, states.TERMINATED, caller=caller)
        self.store.add_launch(terminated_launch)
        for node in terminated_nodes:
            self.store.add_node(node)

        self.core.terminate_all()

        all_nodes = self.store.get_nodes()
        self.assertEqual(9, len(all_nodes))
        self.assertTrue(all(n['state'] == states.TERMINATING or
                        n['state'] == states.TERMINATED for n in all_nodes))

    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        self.core.prepare_provision(
            launch_id=_new_id(), deployable_type="foo",
            instance_ids=[_new_id()], site="chicago")
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {'document': "<fake>document</fake>",
                            "node": {}}
        self.core.prepare_provision(
            launch_id=_new_id(), deployable_type="foo",
            instance_ids=[_new_id()], site="chicago")
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_execute(self):
        self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def test_prepare_execute_iaas_fail(self):
        with patch('epu.provisioner.test.util.FakeNodeDriver.create_node') as mock_method:
            mock_method.return_value = InvalidCredsError()
            self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_prepare_execute_no_ctx(self):
        self.core.context = None

        # just in case
        self.ctx.create_error = NotImplementedError()
        self.ctx.query_error = NotImplementedError()

        self._prepare_execute(context_enabled=False)
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def test_prepare_execute_existing_launch(self):
        self.core.context = None
        launch_id = _new_id()
        instance_id = _new_id()

        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False)
        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False, assure_state=False)

        self.assertTrue(self.notifier.assure_state(states.PENDING))

    def _prepare_execute(self, launch_id=None, instance_ids=None,
                         context_enabled=True, assure_state=True):
        self.dtrs.result = {'document': _get_one_node_cluster_doc("node1", "image1"),
                            "node": {}}

        caller = "asterix"
        if not launch_id:
            launch_id = _new_id()
        if not instance_ids:
            instance_ids = [_new_id()]
        launch, nodes = self.core.prepare_provision(
            launch_id=launch_id,
            deployable_type="foo", instance_ids=instance_ids,
            site="site1", caller=caller)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], instance_ids[0])
        self.assertEqual(launch['launch_id'], launch_id)
        self.assertEqual(launch['node_ids'], instance_ids)

        if context_enabled:
            self.assertTrue(self.ctx.last_create)
            self.assertEqual(launch['context'], self.ctx.last_create)
            for key in ('uri', 'secret', 'context_id', 'broker_uri'):
                self.assertIn(key, launch['context'])
        else:
            self.assertEqual(launch['context'], None)

        if assure_state:
            self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        self.core.execute_provision(launch, nodes, caller)

    def test_execute_bad_doc(self):
        caller = "asterix"
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': "<this><isnt><a><real><doc>",
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                 'state': states.REQUESTED, 'creator': caller}]

        self.store.add_launch(launch_record)
        self.store.add_node(nodes[0])

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        # self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    def test_execute_bad_doc_nodes(self):
        caller = 'asterix'
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                  'state': states.REQUESTED, 'ctx_name': "adifferentname",
                  'creator': caller}]

        self.store.add_launch(launch_record)
        self.store.add_node(nodes[0])

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_execute_bad_doc_node_count(self):
        caller = "asterix"
        ctx = self.ctx.create()
        launch_record = {
            'launch_id': "thelaunchid",
            'document': _get_one_node_cluster_doc("node1", "image1"),
            'deployable_type': "dt",
            'context': ctx,
            'state': states.PENDING,
            'node_ids': ['node1']}

        # two nodes where doc expects 1
        nodes = [{'node_id': 'node1', 'launch_id': "thelaunchid",
                 'state': states.REQUESTED, 'ctx_name': "node1", 'creator': caller},
                 {'node_id': 'node2', 'launch_id': "thelaunchid",
                     'state': states.REQUESTED, 'ctx_name': "node1",
                     'creator': caller}]

        self.store.add_launch(launch_record)
        for node in nodes:
            self.store.add_node(node)

        self.core.execute_provision(launch_record, nodes, caller)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        caller = 'asterix'
        ts = time.time() - 30.0
        launch = {'launch_id': launch_id, 'node_ids': [node_id],
                  'state': states.PENDING,
                  'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 0)

    def test_query_missing_started_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        caller = 'asterix'
        ts = time.time() - 30.0
        launch = {'launch_id': launch_id, 'node_ids': [node_id],
                  'state': states.PENDING,
                  'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.STARTED,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'
        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    def test_query_missing_node_terminating(self):
        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.RUNNING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.TERMINATING,
                'creator': caller}
        self.store.add_launch(launch)
        self.store.add_node(node)

        with patch.object(FakeNodeDriver, 'list_nodes', return_value=[]):
            self.core.query_one_site('site1', [node], caller=caller)
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.TERMINATED))

    def test_query(self):
        caller = "asterix"
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'creator': caller,
                'site': 'site1'}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        self.core.query_one_site('site1', nodes, caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node.get('public_ip'), iaas_node.public_ip)
        self.assertEqual(node.get('private_ip'), iaas_node.private_ip)
        self.assertEqual(node.get('state'), states.STARTED)

        # query again should detect no changes
        self.core.query_one_site('site1', nodes, caller=caller)

        # now destroy
        self.core.terminate_nodes([node_id], remove_terminating=False)
        node = self.store.get_node(node_id)
        self.core.query_one_site('site1', [node], caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)

    def test_terminate_requested_node(self):
        caller = "asterix"
        launch_id = _new_id()
        node_id = _new_id()

        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        req_node = {
            'launch_id': launch_id,
            'node_id': node_id,
            'state': states.REQUESTED,
            'site': 'site1'}
        self.store.add_launch(launch)
        self.store.add_node(req_node)

        # destroy
        self.core.terminate_nodes([node_id], remove_terminating=False)
        node = self.store.get_node(node_id)
        self.assertEqual(node['state'], states.TERMINATED)

    def test_query_no_contextualization(self):

        self.core.context = None

        launch_id = _new_id()
        node_id = _new_id()

        caller = 'asterix'

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'site': 'site1',
                'creator': caller}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        self.core.query_one_site('site1', nodes, caller=caller)

        node = self.store.get_node(node_id)
        self.assertEqual(node.get('public_ip'), iaas_node.public_ip)
        self.assertEqual(node.get('private_ip'), iaas_node.private_ip)

        # since contextualization is disabled we should jump straight
        # to RUNNING
        self.assertEqual(node.get('state'), states.RUNNING)

    @raises(timeout)
    def test_query_iaas_timeout(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        caller = 'asterix'
        ts = time.time() - 120.0
        launch = {
            'launch_id': launch_id, 'node_ids': [node_id],
            'state': states.PENDING,
            'creator': caller}
        node = {'name': 'hello',
                'launch_id': launch_id,
                'node_id': node_id,
                'state': states.PENDING,
                'pending_timestamp': ts,
                'iaas_id': iaas_node.id,
                'site': 'site1',
                'creator': caller}

        req_node = {'launch_id': launch_id,
                    'node_id': _new_id(),
                    'state': states.REQUESTED}
        nodes = [node, req_node]
        self.store.add_launch(launch)
        self.store.add_node(node)
        self.store.add_node(req_node)

        def x():
            raise timeout("Took too long to query iaas")
        self.core._IAAS_DEFAULT_TIMEOUT = 0.5

        with patch.object(FakeNodeDriver, 'list_nodes', side_effect=x):
            self.core.query_one_site('site1', nodes, caller=caller)

    def test_launch_one_iaas_full(self):
        def x(**kwargs):
            raise Exception("InstanceLimitExceeded: too many vms :(")

        with patch.object(FakeNodeDriver, 'create_node', side_effect=x):
            self.core._IAAS_DEFAULT_TIMEOUT = 0.5

            node_id = _new_id()
            launch_id = _new_id()

            self._prepare_execute(launch_id=launch_id, instance_ids=[node_id])

            self.assertTrue(self.notifier.assure_state(states.FAILED))
            self.assertIn('IAAS_FULL', self.notifier.nodes[node_id]['state_desc'])
            launch = self.store.get_launch(launch_id)
            self.assertEqual(launch['state'], states.FAILED)

    def test_launch_one_iaas_timeout(self):
        def x(**kwargs):
            raise timeout("Launch took too long")

        with patch.object(FakeNodeDriver, 'create_node', side_effect=x):
            self.core._IAAS_DEFAULT_TIMEOUT = 0.5

            node_id = _new_id()
            launch_id = _new_id()

            self._prepare_execute(launch_id=launch_id, instance_ids=[node_id])

            self.assertTrue(self.notifier.assure_state(states.FAILED))
            self.assertEqual(self.notifier.nodes[node_id]['state_desc'], 'IAAS_TIMEOUT')
            launch = self.store.get_launch(launch_id)
            self.assertEqual(launch['state'], states.FAILED)

    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # first query with no ctx nodes. zero records should be updated
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]

        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count - 1)

        # last node reports ok
        self.ctx.nodes.append(_one_fake_ctx_node_ok(node_records[-1]['public_ip'],
                              _new_id(), _new_id()))

        self.ctx.complete = True
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))

    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]
        self.ctx.nodes.append(_one_fake_ctx_node_error(node_records[-1]['public_ip'],
                              _new_id(), _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count - 1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    def test_query_ctx_nodes_not_pending(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.REQUESTED)
                        for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    def test_query_ctx_nodes_pending_but_actually_running(self):
        """
        When doing large runs, a few EC2 instances get their status changed to
        "running" a long time after having requested them (up to 15 minutes,
        compared to about 30 seconds normally).
        It appears that these instances have been booted successfully for a
        while, because they are reachable through SSH and the context broker
        has OK'ed them.
        Test that we detect these "pending but actually running" instances
        early.
        """
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING)
                        for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(),
                          _new_id()) for node in node_records]
        self.ctx.complete = True

        self.core.query_contexts()

        launch = self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.RUNNING)

        for node_id in launch['node_ids']:
            node = self.store.get_node(node_id)
            self.assertEqual(states.RUNNING, node['state'])

    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING, node_records)
        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.query_error = ContextNotFoundError()
        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_query_ctx_with_one_node_timeout(self):
        launch_id = _new_id()
        node_record = make_node(launch_id, states.STARTED)
        launch_record = make_launch(launch_id, states.PENDING, [node_record])

        ts = time.time()
        node_record['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        self.store.add_node(node_record)

        self.ctx.expected_count = 1
        self.ctx.complete = False
        self.ctx.error = False

        self.ctx.nodes = []
        self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED))
        self.assertTrue(self.notifier.assure_record_count(1))

    def test_query_ctx_with_several_nodes_timeout(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                        for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records)
        node_ids = map(lambda node: node['node_id'], node_records)

        ts = time.time()
        for i in range(node_count - 1):
            node_records[i]['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT + 10
        node_records[-1]['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'],
                          _new_id(), _new_id()) for i in range(node_count - 1)]

        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING, node_ids[:node_count - 1]))
        self.assertEqual(len(self.notifier.nodes), node_count)
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids[node_count - 1:]))
        self.assertTrue(self.notifier.assure_record_count(1, node_ids[node_count - 1:]))

    def test_query_ctx_with_no_timeout(self):
        caller = "asterix"
        launch_id = _new_id()
        node_record = make_node(launch_id, states.STARTED)
        launch_record = make_launch(launch_id, states.PENDING, [node_record],
                                    caller=caller)

        ts = time.time()
        node_record['running_timestamp'] = ts - INSTANCE_READY_TIMEOUT - 10

        self.store.add_launch(launch_record)
        self.store.add_node(node_record)

        self.ctx.expected_count = 1
        self.ctx.complete = False
        self.ctx.error = False

        self.ctx.nodes = [_one_fake_ctx_node_not_done(node_record['public_ip'],
                          _new_id(), _new_id())]
        self.core.query_contexts()

        self.assertTrue(self.notifier.assure_record_count(0))

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None, extra={'dns_name': None})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"], extra={'dns_name': 'host'})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")
        self.assertEqual(node['hostname'], "host")

        iaas_node = Mock(public_ip=[], private_ip=[], extra={'dns_name': []})
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")
        self.assertEqual(node['hostname'], "host")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                 for i in range(5)]
        ctx_nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(),
                     _new_id()) for node in nodes]

        self.assertEquals(
            len(nodes),
            len(update_nodes_from_context(match_nodes_from_context(nodes, ctx_nodes))))

    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                 for i in range(5)]
        # libcloud puts the hostname in the public_ip field
        ctx_nodes = [_one_fake_ctx_node_ok(ip=_new_id(), hostname=node['public_ip'],
                     pubkey=_new_id()) for node in nodes]

        self.assertEquals(
            len(nodes),
            len(update_nodes_from_context(match_nodes_from_context(nodes, ctx_nodes))))

    def test_query_broker_exception(self):
        caller = "asterix"
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING, node_records, caller=caller)

            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [_one_fake_ctx_node_ok(ok_node['public_ip'],
                          _new_id(), _new_id())]
        self.ctx.complete = True
        self.core.query_contexts()

        launches = self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    def test_query_ctx_without_valid_nodes(self):

        caller = "asterix"
        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                        node_records, caller=caller)

            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        launches = self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = self.store.get_node(error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        self.store.update_node(error_launch_node)

        self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    def test_dump_state(self):
        caller = "asterix"
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING,
                                 nodes, caller=caller)
            self.store.add_launch(launch)
            for node in nodes:
                self.store.add_node(node)

        self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    def test_mark_nodes_terminating(self):
        caller = "asterix"
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING)
                        for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING,
                                    node_records, caller=caller)

        self.store.add_launch(launch_record)
        for node in node_records:
            self.store.add_node(node)

        first_two_node_ids = [node_records[0]['node_id'],
                              node_records[1]['node_id']]
        self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(self.notifier.assure_state(states.TERMINATING,
                                                   nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)

    def test_describe(self):
        caller = "asterix"
        node_ids = []
        for _ in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.RUNNING)]
            node_ids.append(node_records[0]['node_id'])
            launch_record = make_launch(
                launch_id, states.PENDING,
                node_records, caller=caller)
            self.store.add_launch(launch_record)
            for node in node_records:
                self.store.add_node(node)

        all_nodes = self.core.describe_nodes()
        all_node_ids = [n['node_id'] for n in all_nodes]
        self.assertEqual(set(all_node_ids), set(node_ids))
        self.assertFalse(any(VERSION_KEY in n for n in all_nodes))

        all_nodes = self.core.describe_nodes(node_ids)
        all_node_ids = [m['node_id'] for m in all_nodes]
        self.assertEqual(set(all_node_ids), set(node_ids))

        subset_nodes = self.core.describe_nodes(node_ids[1:])
        subset_node_ids = [o['node_id'] for o in subset_nodes]
        self.assertEqual(set(subset_node_ids), set(node_ids[1:]))

        one_node = self.core.describe_nodes([node_ids[0]])
        self.assertEqual(len(one_node), 1)
        self.assertEqual(one_node[0]['node_id'], node_ids[0])
        self.assertEqual(one_node[0]['state'], states.RUNNING)

        self.assertNotIn(VERSION_KEY, one_node[0])

        try:
            self.core.describe_nodes([node_ids[0], "not-a-real-node"])
        except KeyError:
            pass
        else:
            self.fail("Expected exception for bad node_id")

    def test_maybe_update_node(self):

        node = dict(launch_id="somelaunch", node_id="anode",
                    state=states.REQUESTED)
        self.store.add_node(node)

        node2 = self.store.get_node("anode")

        node['state'] = states.PENDING
        self.store.update_node(node)

        # this should succeed even though we are basing off of an older copy
        node2['state'] = states.RUNNING
        node3, updated = self.core.maybe_update_node(node2)
        self.assertTrue(updated)
        self.assertEqual(node3['state'], states.RUNNING)

        node4 = self.store.get_node("anode")
        self.assertEqual(node4['state'], states.RUNNING)

    def test_out_of_order_launch_and_terminate(self):

        # test case where a node terminate request arrives before
        # the launch request.
        self.core.context = None
        launch_id = _new_id()
        instance_id = _new_id()

        self.core.mark_nodes_terminating([instance_id])
        self.assertTrue(self.notifier.assure_state(states.TERMINATED,
                                                   nodes=[instance_id]))
        self._prepare_execute(
            launch_id=launch_id, instance_ids=[instance_id],
            context_enabled=False, assure_state=False)
        self.assertTrue(self.notifier.assure_state(states.TERMINATED,
                                                   nodes=[instance_id]))
        # make sure nothing was launched
        self.assertFalse(self.site1_driver.list_nodes())
示例#8
0
文件: test_core.py 项目: timf/epu
class ProvisionerCoreRecoveryTests(unittest.TestCase):

    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.driver = FakeNodeDriver()
        self.dtrs = FakeDTRS()
        drivers = {'fake' : self.driver}
        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, site_drivers=drivers,
                                    context=self.ctx)

    @defer.inlineCallbacks
    def test_recover_launch_incomplete(self):
        """Ensures that launches in REQUESTED state are completed
        """
        launch_id = _new_id()
        doc = "<cluster><workspace><name>node</name><image>fake</image>"+\
              "<quantity>3</quantity>"+\
              "</workspace><workspace><name>running</name><image>fake"+\
              "</image><quantity>1</quantity></workspace></cluster>"
        context = {'broker_uri' : _new_id(), 'context_id' : _new_id(),
                  'secret' : _new_id(), 'uri' : _new_id()}

        requested_node_ids = [_new_id(), _new_id()]

        node_records = [make_node(launch_id, states.RUNNING,
                                              site='fake',
                                              ctx_name='running'),
                        make_node(launch_id, states.REQUESTED,
                                              site='fake',
                                              node_id=requested_node_ids[0],
                                              ctx_name='node'),
                        make_node(launch_id, states.REQUESTED,
                                              site='fake',
                                              node_id=requested_node_ids[1],
                                              ctx_name='node'),
                        make_node(launch_id, states.RUNNING,
                                              ctx_name='node')]
        launch_record = make_launch(launch_id, states.REQUESTED,
                                                node_records, document=doc,
                                                context=context)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        # 2 nodes are in REQUESTED state, so those should be launched
        yield self.core.recover()

        # because we rely on IaaS idempotency, we get full Node responses
        # for all nodes in the group. What really would cause this scenario
        # is successfully launching the full group but failing before records
        # could be written for the two REQUESTED nodes.
        self.assertEqual(3, len(self.driver.created))
        iaas_ids = set(node.id for node in self.driver.created)
        self.assertEqual(3, len(iaas_ids))

        for node_id in requested_node_ids:
            node = yield self.store.get_node(node_id)
            self.assertEqual(states.PENDING, node['state'])
            self.assertTrue(node['iaas_id'] in iaas_ids)

        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(states.PENDING, launch['state'])

    @defer.inlineCallbacks
    def test_recovery_nodes_terminating(self):
        launch_id = _new_id()

        terminating_iaas_id = _new_id()

        node_records = [make_node(launch_id, states.TERMINATING,
                                              iaas_id=terminating_iaas_id,
                                              site='fake'),
                        make_node(launch_id, states.TERMINATED),
                        make_node(launch_id, states.RUNNING)]

        launch_record = make_launch(launch_id, states.RUNNING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.recover()

        self.assertEqual(1, len(self.driver.destroyed))
        self.assertEqual(self.driver.destroyed[0].id, terminating_iaas_id)

        terminated = yield self.store.get_nodes(state=states.TERMINATED)
        self.assertEqual(2, len(terminated))

    @defer.inlineCallbacks
    def test_recovery_launch_terminating(self):
        launch_id = _new_id()

        terminating_iaas_ids = [_new_id(), _new_id()]

        node_records = [make_node(launch_id, states.TERMINATING,
                                              iaas_id=terminating_iaas_ids[0],
                                              site='fake'),
                        make_node(launch_id, states.TERMINATED),
                        make_node(launch_id, states.RUNNING,
                                              iaas_id=terminating_iaas_ids[1],
                                              site='fake')]

        launch_record = make_launch(launch_id, states.TERMINATING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.recover()

        self.assertEqual(2, len(self.driver.destroyed))
        self.assertTrue(self.driver.destroyed[0].id in terminating_iaas_ids)
        self.assertTrue(self.driver.destroyed[1].id in terminating_iaas_ids)

        terminated = yield self.store.get_nodes(state=states.TERMINATED)
        self.assertEqual(3, len(terminated))

        launch_record = yield self.store.get_launch(launch_id)
        self.assertEqual(launch_record['state'], states.TERMINATED)

    @defer.inlineCallbacks
    def test_terminate_all(self):
        running_launch_id = _new_id()
        running_launch, running_nodes = make_launch_and_nodes(
                running_launch_id, 3, states.RUNNING)
        yield self.store.put_launch(running_launch)
        yield self.store.put_nodes(running_nodes)

        pending_launch_id = _new_id()
        pending_launch, pending_nodes = make_launch_and_nodes(
                pending_launch_id, 3, states.PENDING)
        yield self.store.put_launch(pending_launch)
        yield self.store.put_nodes(pending_nodes)

        terminated_launch_id = _new_id()
        terminated_launch, terminated_nodes = make_launch_and_nodes(
                terminated_launch_id, 3, states.TERMINATED)
        yield self.store.put_launch(terminated_launch)
        yield self.store.put_nodes(terminated_nodes)

        yield self.core.terminate_all()

        self.assertEqual(6, len(self.driver.destroyed))

        all_launches = yield self.store.get_launches()
        self.assertEqual(3, len(all_launches))
        self.assertTrue(all(l['state'] == states.TERMINATED
                           for l in all_launches))

        all_nodes = yield self.store.get_nodes()
        self.assertEqual(9, len(all_nodes))
        self.assertTrue(all(n['state'] == states.TERMINATED
                           for n in all_nodes))

        state = yield self.core.check_terminate_all()
        self.assertTrue(state)
示例#9
0
文件: test_core.py 项目: timf/epu
class ProvisionerCoreTests(unittest.TestCase):
    """Testing the provisioner core functionality
    """
    def setUp(self):
        self.notifier = FakeProvisionerNotifier()
        self.store = ProvisionerStore()
        self.ctx = FakeContextClient()
        self.dtrs = FakeDTRS()

        self.site1_driver = FakeNodeDriver()
        self.site2_driver = FakeNodeDriver()

        drivers = {'site1' : self.site1_driver, 'site2' : self.site2_driver}
        self.core = ProvisionerCore(store=self.store, notifier=self.notifier,
                                    dtrs=self.dtrs, context=self.ctx,
                                    site_drivers=drivers)

    @defer.inlineCallbacks
    def test_prepare_dtrs_error(self):
        self.dtrs.error = DeployableTypeLookupError()

        nodes = {"i1" : dict(ids=[_new_id()], site="chicago", allocation="small")}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_broker_error(self):
        self.ctx.create_error = BrokerError("fake ctx create failed")
        self.dtrs.result = {'document' : "<fake>document</fake>",
                            "nodes" : {"i1" : {}}}
        nodes = {"i1" : dict(ids=[_new_id()], site="site1", allocation="small")}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=nodes)
        yield self.core.prepare_provision(request)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_prepare_execute(self):
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.PENDING))

    @defer.inlineCallbacks
    def test_prepare_execute_iaas_fail(self):
        self.site1_driver.create_node_error = InvalidCredsError()
        yield self._prepare_execute()
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def _prepare_execute(self):
        self.dtrs.result = {'document' : _get_one_node_cluster_doc("node1", "image1"),
                            "nodes" : {"node1" : {}}}
        request_node = dict(ids=[_new_id()], site="site1", allocation="small")
        request_nodes = {"node1" : request_node}
        request = dict(launch_id=_new_id(), deployable_type="foo",
                       subscribers=('blah',), nodes=request_nodes)

        launch, nodes = yield self.core.prepare_provision(request)

        self.assertEqual(len(nodes), 1)
        node = nodes[0]
        self.assertEqual(node['node_id'], request_node['ids'][0])
        self.assertEqual(launch['launch_id'], request['launch_id'])

        self.assertTrue(self.ctx.last_create)
        self.assertEqual(launch['context'], self.ctx.last_create)
        for key in ('uri', 'secret', 'context_id', 'broker_uri'):
            self.assertIn(key, launch['context'])
        self.assertTrue(self.notifier.assure_state(states.REQUESTED))

        yield self.core.execute_provision(launch, nodes)

    @defer.inlineCallbacks
    def test_execute_bad_doc(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : "<this><isnt><a><real><doc>",
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

        # TODO this should be a better error coming from nimboss
        #self.assertEqual(self.notifier.nodes['node1']['state_desc'], "CONTEXT_DOC_INVALID")

    @defer.inlineCallbacks
    def test_execute_bad_doc_nodes(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : _get_one_node_cluster_doc("node1", "image1"),
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "adifferentname"}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_execute_bad_doc_node_count(self):
        ctx = yield self.ctx.create()
        launch_record = {
                'launch_id' : "thelaunchid",
                'document' : _get_one_node_cluster_doc("node1", "image1"),
                'deployable_type' : "dt",
                'context' : ctx,
                'subscribers' : [],
                'state' : states.PENDING,
                'node_ids' : ['node1']}

        # two nodes where doc expects 1
        nodes = [{'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "node1"},
                 {'node_id' : 'node1', 'launch_id' : "thelaunchid",
                  'state' : states.REQUESTED, 'ctx_name' : "node1"}]

        yield self.core.execute_provision(launch_record, nodes)
        self.assertTrue(self.notifier.assure_state(states.FAILED))


    @defer.inlineCallbacks
    def test_query_missing_node_within_window(self):
        launch_id = _new_id()
        node_id = _new_id()
        ts = time.time() - 30.0
        launch = {'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts}
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 0)
    
    @defer.inlineCallbacks
    def test_query_missing_node_past_window(self):
        launch_id = _new_id()
        node_id = _new_id()

        ts = time.time() - 120.0
        launch = {
                'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts}
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)

        yield self.core.query_one_site('fake-site', [node],
                driver=FakeEmptyNodeQueryDriver())
        self.assertEqual(len(self.notifier.nodes), 1)
        self.assertTrue(self.notifier.assure_state(states.FAILED))

    @defer.inlineCallbacks
    def test_query(self):
        launch_id = _new_id()
        node_id = _new_id()

        iaas_node = self.site1_driver.create_node()[0]
        self.site1_driver.set_node_running(iaas_node.id)

        ts = time.time() - 120.0
        launch = {
                'launch_id' : launch_id, 'node_ids' : [node_id],
                'state' : states.PENDING,
                'subscribers' : 'fake-subscribers'}
        node = {'launch_id' : launch_id,
                'node_id' : node_id,
                'state' : states.PENDING,
                'pending_timestamp' : ts,
                'iaas_id' : iaas_node.id,
                'site':'site1'}

        req_node = {'launch_id' : launch_id,
                'node_id' : _new_id(),
                'state' : states.REQUESTED}
        nodes = [node, req_node]
        yield self.store.put_launch(launch)
        yield self.store.put_node(node)
        yield self.store.put_node(req_node)

        yield self.core.query_one_site('site1', nodes)

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.STARTED)

        # query again should detect no changes
        yield self.core.query_one_site('site1', nodes)

        # now destroy
        yield self.core.terminate_nodes([node_id])
        node = yield self.store.get_node(node_id)
        yield self.core.query_one_site('site1', [node])

        node = yield self.store.get_node(node_id)
        self.assertEqual(node['public_ip'], iaas_node.public_ip)
        self.assertEqual(node['private_ip'], iaas_node.private_ip)
        self.assertEqual(node['state'], states.TERMINATED)


    @defer.inlineCallbacks
    def test_query_ctx(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        #first query with no ctx nodes. zero records should be updated
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_record_count(0))
        
        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'], 
            _new_id(),  _new_id()) for i in range(node_count-1)]

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertEqual(len(self.notifier.nodes), node_count-1)

        #last node reports ok
        self.ctx.nodes.append(_one_fake_ctx_node_ok(node_records[-1]['public_ip'],
            _new_id(), _new_id()))

        self.ctx.complete = True
        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING))
        self.assertTrue(self.notifier.assure_record_count(1))
    
    @defer.inlineCallbacks
    def test_query_ctx_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.expected_count = len(node_records)
        self.ctx.complete = False
        self.ctx.error = False

        # all but 1 node have reported ok
        self.ctx.nodes = [_one_fake_ctx_node_ok(node_records[i]['public_ip'], 
            _new_id(),  _new_id()) for i in range(node_count-1)]
        self.ctx.nodes.append(_one_fake_ctx_node_error(node_records[-1]['public_ip'],
            _new_id(), _new_id()))

        ok_ids = [node_records[i]['node_id'] for i in range(node_count-1)]
        error_ids = [node_records[-1]['node_id']]

        self.ctx.complete = True
        self.ctx.error = True

        yield self.core.query_contexts()
        self.assertTrue(self.notifier.assure_state(states.RUNNING, ok_ids))
        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, error_ids))

    @defer.inlineCallbacks
    def test_query_ctx_nodes_not_started(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.PENDING)
                for i in range(3)]
        node_records.append(make_node(launch_id, states.STARTED))
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        yield self.core.query_contexts()

        # ensure that no context was actually queried. See the note in
        # _query_one_context for the reason why this is important.
        self.assertEqual(len(self.ctx.queried_uris), 0)

    @defer.inlineCallbacks
    def test_query_ctx_permanent_broker_error(self):
        node_count = 3
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)
                for i in range(node_count)]
        node_ids = [node['node_id'] for node in node_records]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        self.ctx.query_error = ContextNotFoundError()
        yield self.core.query_contexts()

        self.assertTrue(self.notifier.assure_state(states.RUNNING_FAILED, node_ids))
        launch = yield self.store.get_launch(launch_id)
        self.assertEqual(launch['state'], states.FAILED)

    def test_update_node_ip_info(self):
        node = dict(public_ip=None)
        iaas_node = Mock(public_ip=None, private_ip=None)
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], None)
        self.assertEqual(node['private_ip'], None)

        iaas_node = Mock(public_ip=["pub1"], private_ip=["priv1"])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

        iaas_node = Mock(public_ip=[], private_ip=[])
        update_node_ip_info(node, iaas_node)
        self.assertEqual(node['public_ip'], "pub1")
        self.assertEqual(node['private_ip'], "priv1")

    def test_update_nodes_from_ctx(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                for i in range(5)]
        ctx_nodes = [_one_fake_ctx_node_ok(node['public_ip'], _new_id(), 
            _new_id()) for node in nodes]

        self.assertEquals(len(nodes), len(update_nodes_from_context(nodes, ctx_nodes)))
        
    def test_update_nodes_from_ctx_with_hostname(self):
        launch_id = _new_id()
        nodes = [make_node(launch_id, states.STARTED)
                for i in range(5)]
        #libcloud puts the hostname in the public_ip field
        ctx_nodes = [_one_fake_ctx_node_ok(ip=_new_id(), hostname=node['public_ip'],
            pubkey=_new_id()) for node in nodes]

        self.assertEquals(len(nodes), len(update_nodes_from_context(nodes, ctx_nodes)))

    @defer.inlineCallbacks
    def test_query_broker_exception(self):
        for i in range(2):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                                    node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        # no guaranteed order here so grabbing first launch from store
        # and making that one return a BrokerError during context query.
        # THe goal is to ensure that one error doesn't prevent querying
        # for other contexts.

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]
        error_launch_ctx = error_launch['context']['uri']
        ok_node_id = launches[1]['node_ids'][0]
        ok_node = yield self.store.get_node(ok_node_id)

        self.ctx.uri_query_error[error_launch_ctx] = BrokerError("bad broker")
        self.ctx.nodes = [_one_fake_ctx_node_ok(ok_node['public_ip'],
            _new_id(), _new_id())]
        self.ctx.complete = True
        yield self.core.query_contexts()

        launches = yield self.store.get_launches()
        for launch in launches:
            self.assertIn(launch['context']['uri'], self.ctx.queried_uris)

            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED
            else:
                self.assertEqual(launch['state'], states.RUNNING)
                expected_node_state = states.RUNNING

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)

    @defer.inlineCallbacks
    def test_query_ctx_without_valid_nodes(self):

        # if there are no nodes < TERMINATING, no broker query should happen
        for i in range(3):
            launch_id = _new_id()
            node_records = [make_node(launch_id, states.STARTED)]
            launch_record = make_launch(launch_id, states.PENDING,
                                                    node_records)

            yield self.store.put_launch(launch_record)
            yield self.store.put_nodes(node_records)

        launches = yield self.store.get_launches(state=states.PENDING)
        error_launch = launches[0]

        # mark first launch's node as TERMINATING, should prevent
        # context query and result in launch being marked FAILED
        error_launch_node = yield self.store.get_node(error_launch['node_ids'][0])
        error_launch_node['state'] = states.TERMINATING
        yield self.store.put_node(error_launch_node)

        yield self.core.query_contexts()
        self.assertNotIn(error_launch['context']['uri'], self.ctx.queried_uris)

        launches = yield self.store.get_launches()
        for launch in launches:
            if launch['launch_id'] == error_launch['launch_id']:
                self.assertEqual(launch['state'], states.FAILED)
                expected_node_state = states.TERMINATING
            else:
                self.assertEqual(launch['state'], states.PENDING)
                expected_node_state = states.STARTED

            node = yield self.store.get_node(launch['node_ids'][0])
            self.assertEqual(node['state'], expected_node_state)


    @defer.inlineCallbacks
    def test_query_unexpected_exception(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.STARTED)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)
        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)
        self.ctx.query_error = ValueError("bad programmer")


        # digging into internals a bit: patching one of the methods query()
        # calls to raise an exception. This will let us ensure exceptions do
        # not bubble up
        def raiser(self):
            raise KeyError("notreallyaproblem")
        self.patch(self.core, 'query_nodes', raiser)

        yield self.core.query() # ensure that exception doesn't bubble up

    @defer.inlineCallbacks
    def test_dump_state(self):
        node_ids = []
        node_records = []
        for i in range(3):
            launch_id = _new_id()
            nodes = [make_node(launch_id, states.PENDING)]
            node_ids.append(nodes[0]['node_id'])
            node_records.extend(nodes)
            launch = make_launch(launch_id, states.PENDING,
                                                    nodes)
            yield self.store.put_launch(launch)
            yield self.store.put_nodes(nodes)

        yield self.core.dump_state(node_ids[:2])

        # should have gotten notifications about the 2 nodes
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[0]], 1)
        self.assertEqual(node_records[0], self.notifier.nodes[node_ids[0]])
        self.assertEqual(node_records[1], self.notifier.nodes[node_ids[1]])
        self.assertEqual(self.notifier.nodes_rec_count[node_ids[1]], 1)
        self.assertNotIn(node_ids[2], self.notifier.nodes)

    @defer.inlineCallbacks
    def test_mark_nodes_terminating(self):
        launch_id = _new_id()
        node_records = [make_node(launch_id, states.RUNNING)
                        for i in range(3)]
        launch_record = make_launch(launch_id, states.PENDING,
                                                node_records)

        yield self.store.put_launch(launch_record)
        yield self.store.put_nodes(node_records)

        first_two_node_ids = [node_records[0]['node_id'],
                              node_records[1]['node_id']]
        yield self.core.mark_nodes_terminating(first_two_node_ids)

        self.assertTrue(self.notifier.assure_state(states.TERMINATING,
                                                   nodes=first_two_node_ids))
        self.assertNotIn(node_records[2]['node_id'], self.notifier.nodes)

        for node_id in first_two_node_ids:
            terminating_node = yield self.store.get_node(node_id)
            self.assertEqual(terminating_node['state'], states.TERMINATING)