class HighAvailabilityServiceOnePDTests(HighAvailabilityServiceMixin, TestFixture): def setUp(self): if not os.environ.get("INT"): raise SkipTest("Skipping Slow integration test") self.exchange = "hatestexchange-%s" % str(uuid.uuid4()) self.sysname = "test-%s" % str(uuid.uuid4()) parsed_deployment = yaml.load(deployment_one_pd_two_eea) self.pd_names = parsed_deployment['process-dispatchers'].keys() self.eea_names = [] for node in parsed_deployment['nodes'].values(): for eeagent in node['eeagents'].keys(): self.eea_names.append(eeagent) policy_params = {'preserve_n': 0} executable = {'exec': 'sleep', 'argv': ['1000']} self.setup_harness(exchange=self.exchange, sysname=self.sysname) self.addCleanup(self.cleanup_harness) self.epuharness.start(deployment_str=deployment_one_pd_two_eea) self.block_until_ready(deployment_one_pd_two_eea, self.dashi) self.process_definition_id = uuid.uuid4().hex for pd_name in self.pd_names: pd_client = ProcessDispatcherClient(self.dashi, pd_name) pd_client.create_definition(self.process_definition_id, None, executable, None, None) self.haservice = HighAvailabilityService(policy_parameters=policy_params, process_dispatchers=self.pd_names, exchange=self.exchange, process_definition_id=self.process_definition_id, sysname=self.sysname) self.haservice_thread = tevent.spawn(self.haservice.start) self.dashi = self.haservice.dashi self.haservice_client = HighAvailabilityServiceClient(self.dashi, topic=self.haservice.topic) def tearDown(self): self.haservice.stop() self.haservice_thread.join() def test_kill_an_eeagent(self): """test_kill_an_eeagent Should do nothing when an eeagent dies The Process Dispatcher should manage this scenario, so HA shouldn't do anything """ # raise SkipTest("Processes aren't running on EEAs") n = 2 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) upids_before_kill = list(self.haservice.core.managed_upids) # Kill an eeagent that has some procs on it print "PD state %s" % self.dashi.call(self.pd_names[0], "dump") for eeagent in self.eea_names: print "Calling Dump State for %s" % eeagent state = self.dashi.call(eeagent, "dump_state", rpc=True) if len(state['processes']) > 0: self.epuharness.stop(services=[eeagent]) break time.sleep(10) msg = "HA shouldn't have touched those procs! Getting too big for its britches!" assert upids_before_kill == self.haservice.core.managed_upids, msg
class HighAvailabilityServiceTests(HighAvailabilityServiceMixin, TestFixture): def setUp(self): if not os.environ.get("INT"): raise SkipTest("Skipping Slow integration test") self.exchange = "hatestexchange-%s" % str(uuid.uuid4()) self.sysname = "test-%s" % str(uuid.uuid4()) parsed_deployment = yaml.load(deployment) self.pd_names = parsed_deployment['process-dispatchers'].keys() policy_params = {'preserve_n': 0} executable = {'exec': 'sleep', 'argv': ['1000']} print "ST: sysname %s" % self.sysname self.setup_harness(exchange=self.exchange, sysname=self.sysname) self.addCleanup(self.cleanup_harness) self.epuharness.start(deployment_str=deployment) self.block_until_ready(deployment, self.dashi) self.process_definition_id = uuid.uuid4().hex for pd_name in self.pd_names: pd_client = ProcessDispatcherClient(self.dashi, pd_name,) pd_client.create_definition(self.process_definition_id, None, executable, None, None) self.haservice = HighAvailabilityService(policy_parameters=policy_params, process_dispatchers=self.pd_names, exchange=self.exchange, process_definition_id=self.process_definition_id, sysname=self.sysname) self.haservice_thread = tevent.spawn(self.haservice.start) self.dashi = self.haservice.dashi self.haservice_client = HighAvailabilityServiceClient(self.dashi, topic=self.haservice.topic) def tearDown(self): self.haservice.stop() self.haservice_thread.join() @attr('INT') def test_basic(self): assert self.haservice.status() == HAState.PENDING n = 2 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) n = 1 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) n = 3 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) n = 0 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) @attr('INT') def test_balance(self): n = 1 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(1) n = 2 self._update_policy_params_and_assert({'preserve_n': n}) for pd in self.pd_names: self._assert_n_processes(1, only_pd=pd) n = 0 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) @attr('INT') def test_kill_a_pd(self): """test_kill_a_pd Recover from killed PD Ensure that procs are balanced between two pds, kill one, then make sure that the HA Service compensates """ n = 1 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) n = 2 self._update_policy_params_and_assert({'preserve_n': n}) for pd in self.pd_names: self._assert_n_processes(1, only_pd=pd) upids_before_kill = list(self.haservice.core.managed_upids) killed_pd = self.pd_names.pop() self.epuharness.stop(services=[killed_pd]) timeout = 30 while timeout >= 0 and upids_before_kill == self.haservice.core.managed_upids: # Waiting for HA Service to notice print "Managed UPIDs: %s" % self.haservice.core.managed_upids time.sleep(1) timeout -= 1 if timeout <= 0: assert "Took too long for haservice to notice missing upid" assert upids_before_kill != self.haservice.core.managed_upids n = 2 self._assert_n_processes(n) @attr('INT') def test_missing_proc(self): """test_missing_proc Kill a proc, and ensure HA starts a replacement """ n = 2 self._update_policy_params_and_assert({'preserve_n': n}) self._assert_n_processes(n) upid_to_kill = self.haservice.core.managed_upids[0] pd = self._find_procs_pd(upid_to_kill) assert pd pd_client = ProcessDispatcherClient(self.dashi, pd) pd_client.terminate_process(upid_to_kill) print self._get_all_procs() print self._get_all_procs() print self._get_all_procs() time.sleep(5) self._assert_n_processes(n) time.sleep(5) self._assert_n_processes(n) print self._get_all_procs()