def test_platform_status_terminate_and_restart_instrument_1_1(self): # # Tests reaction of a platform upon termination and re-start of its # associated instrument. # # platform='LJ01D' # instrument=SBE37_SIM_01 # self._set_receive_timeout() # create the network: self.p_root = p_root = self._create_platform('LJ01D') # create and assign an instrument to LJ01D i_obj = self._create_instrument("SBE37_SIM_01") self._assign_instrument_to_platform(i_obj, p_root) log.debug("OOIION-1077 instrument assigned: %s", i_obj) ##################################################################### # prepare to verify expected ResourceAgentLifecycleEvent is generated when # the instrument agent gets started for the very first time: async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber(i_obj.instrument_device_id, InstrumentAgent.ORIGIN_TYPE, 'STARTED') ##################################################################### # start up the network self._start_platform(p_root) self.addCleanup(self._stop_platform, p_root) self.addCleanup(self._done) self._initialize() self._go_active() self._run() log.debug("OOIION-1077 waiting for ResourceAgentLifecycleEvent STARTED") async_event_result.get(timeout=30) #CFG.endpoint.receive.timeout) self.assertEquals(len(events_received), 1) ##################################################################### # get all root statuses aggstatus, child_agg_status, rollup_status = self._get_all_root_statuses() log.debug("OOIION-1077 publish_event_for_diagnostics") publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : [] ##################################################################### # before any updates in this test verify initial statuses are all OK: self._verify_all_statuses_OK(aggstatus, child_agg_status, rollup_status) ##################################################################### # verify the root platform has set its aparam_child_agg_status with # the assigned instrument: all_origins = [i_obj.instrument_device_id] child_agg_status_keys = sorted(child_agg_status.keys()) self.assertEquals(all_origins, child_agg_status_keys) # Now, the core of this test follows. ##################################################################### # terminate instrument ##################################################################### # before the termination of the instrument: # - prepare to verify the expected ResourceAgentLifecycleEvent is generated: async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber(i_obj.instrument_device_id, InstrumentAgent.ORIGIN_TYPE, 'STOPPED') # now terminate the instrument: log.debug("OOIION-1077 terminating instrument: %s", i_obj) self._stop_instrument(i_obj, use_ims=False) log.debug("OOIION-1077 waiting for ResourceAgentLifecycleEvent STOPPED") async_event_result.get(timeout=CFG.endpoint.receive.timeout) self.assertEquals(len(events_received), 1) # verify the root's child_status are all UNKNOWN # Note: no event is going to be generated from the platform because # its rollup_status is *not* changing. # So, we have to wait for a bit to let the updates propagate: sleep(15) log.debug("OOIION-1077 publish_event_for_diagnostics after instrument termination") publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : ['89ca843fe51745d9a20a3f73a7c330f1'] # and do verification that the child_agg_status are all UNKNOWN: _, child_agg_status, _ = self._get_all_root_statuses() self._verify_statuses(child_agg_status[i_obj.instrument_device_id], [DeviceStatusType.STATUS_UNKNOWN]) ##################################################################### # re-start instrument ##################################################################### # NOTE: platform agents rely on ResourceAgentLifecycleEvent STARTED events and # on regular status events to re-validate re-started children, see status_manager. log.debug("OOIION-1077 re-starting instrument: %s", i_obj) ia_client = self._start_instrument(i_obj, use_ims=False) log.debug("OOIION-1077 instrument re-started: rid=%r", i_obj.instrument_device_id) # again, have to wait for a bit to let the updates propagate: sleep(15) # log shows: # INFO Dummy-182 ion.agents.platform.platform_agent:1060 'LJ01D': OOIION-1077 _child_running: revalidated child with resource_id='25be290ea0bb4ca9924b307db9779703' log.debug("OOIION-1077 publish_event_for_diagnostics after instrument re-start") publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : [] # And do verification that the child_agg_status are all OK again: # NOTE: this assumes that, once running again, the instrument's # aggstatus are in turn back to OK. _, child_agg_status, _ = self._get_all_root_statuses() self._verify_statuses(child_agg_status[i_obj.instrument_device_id], [DeviceStatusType.STATUS_OK]) ##################################################################### # move the instrument to the COMMAND state where it was when terminated # so the shutdown sequence in the root platform completes fine. # This also verifies that we are able to continue interacting with # the instrument after the re-start. ##################################################################### cmd = AgentCommand(command=InstrumentAgentEvent.INITIALIZE) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 INITIALIZE to instrument returned: %s", retval) cmd = AgentCommand(command=InstrumentAgentEvent.GO_ACTIVE) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 GO_ACTIVE to instrument returned: %s", retval) cmd = AgentCommand(command=InstrumentAgentEvent.RUN) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 RUN to instrument returned: %s", retval) # verify instrument is in COMMAND: instr_state = ia_client.get_agent_state() log.debug("instrument state: %s", instr_state) self.assertEquals(InstrumentAgentState.COMMAND, instr_state)
def test_platform_status_launch_instruments_first_2_3(self): # # Test of status propagation in a small network of 2 platforms and # 3 instruments, with the instruments launched (including port # agents) before the root platform. # # MJ01C (with 2 instruments) # LJ01D (with 1 instrument) # # Right after the root platform is launched, it verifies that all its # statuses are updated to OK. Note that this is a scenario in which # the updates are not triggered by the event publications done by the # instruments because those publications happen at a time when the # platform have not been launched yet. Rather, during the launch of # the platforms, they retrieve the statuses of their children to # update the corresponding statuses. This capability was initially # added to support UI testing with instruments whose port agents need # to be manually launched. # # The test also includes some explicitly triggered updates via # publication on behalf of the instruments. # # self._set_receive_timeout() # create the network: p_objs = {} self.p_root = p_root = self._create_hierarchy("MJ01C", p_objs) self.assertEquals(2, len(p_objs)) for platform_id in ["MJ01C", "LJ01D"]: self.assertIn(platform_id, p_objs) # the sub-platform: p_LJ01D = p_objs["LJ01D"] ##################################################################### # create and launch instruments/port_agents: instrs = [] for instr_key in ["SBE37_SIM_01", "SBE37_SIM_02", "SBE37_SIM_03"]: i_obj = self._create_instrument(instr_key, start_port_agent=False) ia_client = self._start_instrument(i_obj) self.addCleanup(self._stop_instrument, i_obj) instrs.append(i_obj) log.debug("started instrument %s", instr_key) ##################################################################### # assign instruments to platforms: # 2 instruments to root: self._assign_instrument_to_platform(instrs[0], p_root) self._assign_instrument_to_platform(instrs[1], p_root) # 1 instrument to sub-platform LJ01D: self._assign_instrument_to_platform(instrs[2], p_LJ01D) ##################################################################### # start up the root platform self._start_platform(p_root) self.addCleanup(self._stop_platform, p_root) self.addCleanup(self._shutdown) log.debug("started root platform") ##################################################################### # get all root statuses aggstatus, child_agg_status, rollup_status = self._get_all_root_statuses() # this logs out: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK publish_event_for_diagnostics() # this makes the status manager log out: # 2013-05-18 09:17:06,043 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,045 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK ##################################################################### # verify the root platform has set its aparam_child_agg_status with # all its descendant nodes (including all instruments): all_origins = [p_obj.platform_device_id for p_obj in p_objs.values()] all_origins.remove(p_root.platform_device_id) all_origins.extend(i_obj.instrument_device_id for i_obj in instrs) all_origins = sorted(all_origins) child_agg_status_keys = sorted(child_agg_status.keys()) self.assertEquals(all_origins, child_agg_status_keys) ##################################################################### # all statuses must be OK (in particular for the instrument children self._verify_all_statuses_OK(aggstatus, child_agg_status, rollup_status) ##################################################################### # trigger some status updates from the instruments and do # corresponding verifications against the root platform. # Note that the sub-platform also should get properly updated but # this test doesn't do these verifications. # ------------------------------------------------------------------- # instrs[0] publishes a STATUS_CRITICAL for AGGREGATE_COMMS self._expect_from_root(p_root) self._publish_for_child(instrs[0], AggregateStatusType.AGGREGATE_COMMS, DeviceStatusType.STATUS_CRITICAL) # confirm root gets updated to STATUS_CRITICAL self._wait_root_event() self._verify_with_get_agent(AggregateStatusType.AGGREGATE_COMMS, DeviceStatusType.STATUS_CRITICAL) log.debug("after AGGREGATE_COMMS <- STATUS_CRITICAL on behalf of instr[0]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,111 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,112 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # ------------------------------------------------------------------- # instrs[0] publishes a STATUS_WARNING for AGGREGATE_DATA self._expect_from_root(p_root) self._publish_for_child(instrs[0], AggregateStatusType.AGGREGATE_DATA, DeviceStatusType.STATUS_WARNING) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify(AggregateStatusType.AGGREGATE_DATA, DeviceStatusType.STATUS_WARNING) log.debug("after AGGREGATE_DATA <- STATUS_WARNING on behalf of instr[0]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,149 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,150 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # ------------------------------------------------------------------- # instrs[1] publishes a STATUS_WARNING for AGGREGATE_POWER self._expect_from_root(p_root) self._publish_for_child(instrs[1], AggregateStatusType.AGGREGATE_POWER, DeviceStatusType.STATUS_WARNING) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify(AggregateStatusType.AGGREGATE_POWER, DeviceStatusType.STATUS_WARNING) log.debug("after AGGREGATE_POWER <- STATUS_WARNING on behalf of instr[1]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,186 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,187 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_WARNING # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_WARNING # ------------------------------------------------------------------- # instrs[2] publishes a AGGREGATE_LOCATION for STATUS_CRITICAL self._expect_from_root(p_root) self._publish_for_child(instrs[2], AggregateStatusType.AGGREGATE_LOCATION, DeviceStatusType.STATUS_CRITICAL) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify(AggregateStatusType.AGGREGATE_LOCATION, DeviceStatusType.STATUS_CRITICAL) log.debug("after AGGREGATE_LOCATION <- STATUS_CRITICAL on behalf of instr[2]") publish_event_for_diagnostics()
def test_with_intermediate_subplatform_directly_stopped_then_restarted(self): # # Similar to test_with_intermediate_subplatform_directly_stopped but the sub-platform is then # relaunched to verify that it is "revalidated" for subsequent processing. # We can visually verify this via the publish_event_for_diagnostics utility. # The test should complete without any issues. # self._set_receive_timeout() recursion = True p_root = self._set_up_platform_hierarchy_with_some_instruments([]) self._launch_network(p_root, recursion) log.info('platforms in the launched network (%d): %s', len(self._setup_platforms), self._setup_platforms.keys()) p_obj = self._get_platform('LV01B') pa_client = self._create_resource_agent_client(p_obj.platform_device_id) self._ping_agent() self._initialize(recursion) self._go_active(recursion) self._run(recursion) self._assert_agent_client_state(pa_client, ResourceAgentState.COMMAND) async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber(pa_client.resource_id, PlatformAgent.ORIGIN_TYPE, 'STOPPED') # directly stop sub-platform log.info("stopping sub-platform %r", p_obj.platform_device_id) self.IMS.stop_platform_agent_instance(p_obj.platform_agent_instance_id) # verify publication of lifecycle event from sub-platform when stopped async_event_result.get(timeout=self._receive_timeout) self.assertEquals(len(events_received), 1) event_received = events_received[0] log.info("ResourceAgentLifecycleEvent received: %s", event_received) gevent.sleep(3) publish_event_for_diagnostics() # should show the invalidated child for parent Node1B: # INFO ... ion.agents.platform.status_manager:1019 'Node1B'/RESOURCE_AGENT_STATE_COMMAND: (a7f865c34f534e60a14e5f0f8ef2fd53) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 26215ffcf7c94260a99e9c9d103f22f9 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 0e07bc623af64a3a8f61465329451de7 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # a914714894b844a8b42724fe9208fde4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 02d5a770fba8405c868cc8d55bbbb8d3 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # b19f89585e7c43789b60beac5ddec43c : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # a2f81525ab1e425da808191f9bbe945d : STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN # 6ff02a90e34643fe87ecf262a33437cd : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 83b4f74ab1db4c70ae63072336083ac3 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 7639e530740a48a8b299d0d19dcf7abe : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 88b143e311514121adc544c5933f92a6 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # b1453122a5a64ac6868cfc39e12e4e50 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # c996ff0478a6449da62955859020ee50 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : ['a2f81525ab1e425da808191f9bbe945d'] gevent.sleep(3) ############################################ # relaunch the intermediate sub-platform: log.info("relaunching sub-platform 'LV01B': %r", p_obj.platform_device_id) pa_client = self._start_a_platform(p_obj) self._ping_agent(pa_client) # recursion=False because LV01B's children are already in COMMAND self._initialize(recursion=False, pa_client=pa_client) self._go_active(recursion=False, pa_client=pa_client) self._run(recursion=False, pa_client=pa_client) # wait for a bit to allow ancestors to re-validate the child, in particular for the parent Node1B: gevent.sleep(10) publish_event_for_diagnostics() # should show the child re-validated:
def test_platform_status_terminate_and_restart_instrument_1_1(self): # # Tests reaction of a platform upon termination and re-start of its # associated instrument. # # platform='LJ01D' # instrument=SBE37_SIM_01 # self._set_receive_timeout() # create the network: self.p_root = p_root = self._create_platform('LJ01D') # create and assign an instrument to LJ01D i_obj = self._create_instrument("SBE37_SIM_01") self._assign_instrument_to_platform(i_obj, p_root) log.debug("OOIION-1077 instrument assigned: %s", i_obj) ##################################################################### # prepare to verify expected ResourceAgentLifecycleEvent is generated when # the instrument agent gets started for the very first time: async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber( i_obj.instrument_device_id, InstrumentAgent.ORIGIN_TYPE, 'STARTED') ##################################################################### # start up the network self._start_platform(p_root) self.addCleanup(self._stop_platform, p_root) self.addCleanup(self._done) self._initialize() self._go_active() self._run() log.debug( "OOIION-1077 waiting for ResourceAgentLifecycleEvent STARTED") async_event_result.get(timeout=30) #CFG.endpoint.receive.timeout) self.assertEquals(len(events_received), 1) ##################################################################### # get all root statuses aggstatus, child_agg_status, rollup_status = self._get_all_root_statuses( ) log.debug("OOIION-1077 publish_event_for_diagnostics") publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : [] ##################################################################### # before any updates in this test verify initial statuses are all OK: self._verify_all_statuses_OK(aggstatus, child_agg_status, rollup_status) ##################################################################### # verify the root platform has set its aparam_child_agg_status with # the assigned instrument: all_origins = [i_obj.instrument_device_id] child_agg_status_keys = sorted(child_agg_status.keys()) self.assertEquals(all_origins, child_agg_status_keys) # Now, the core of this test follows. ##################################################################### # terminate instrument ##################################################################### # before the termination of the instrument: # - prepare to verify the expected ResourceAgentLifecycleEvent is generated: async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber( i_obj.instrument_device_id, InstrumentAgent.ORIGIN_TYPE, 'STOPPED') # now terminate the instrument: log.debug("OOIION-1077 terminating instrument: %s", i_obj) self._stop_instrument(i_obj, use_ims=False) log.debug( "OOIION-1077 waiting for ResourceAgentLifecycleEvent STOPPED") async_event_result.get(timeout=CFG.endpoint.receive.timeout) self.assertEquals(len(events_received), 1) # verify the root's child_status are all UNKNOWN # Note: no event is going to be generated from the platform because # its rollup_status is *not* changing. # So, we have to wait for a bit to let the updates propagate: sleep(15) log.debug( "OOIION-1077 publish_event_for_diagnostics after instrument termination" ) publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : ['89ca843fe51745d9a20a3f73a7c330f1'] # and do verification that the child_agg_status are all UNKNOWN: _, child_agg_status, _ = self._get_all_root_statuses() self._verify_statuses(child_agg_status[i_obj.instrument_device_id], [DeviceStatusType.STATUS_UNKNOWN]) ##################################################################### # re-start instrument ##################################################################### # NOTE: platform agents rely on ResourceAgentLifecycleEvent STARTED events and # on regular status events to re-validate re-started children, see status_manager. log.debug("OOIION-1077 re-starting instrument: %s", i_obj) ia_client = self._start_instrument(i_obj, use_ims=False) log.debug("OOIION-1077 instrument re-started: rid=%r", i_obj.instrument_device_id) # again, have to wait for a bit to let the updates propagate: sleep(15) # log shows: # INFO Dummy-182 ion.agents.platform.platform_agent:1060 'LJ01D': OOIION-1077 _child_running: revalidated child with resource_id='25be290ea0bb4ca9924b307db9779703' log.debug( "OOIION-1077 publish_event_for_diagnostics after instrument re-start" ) publish_event_for_diagnostics() sleep(3) # INFO ... ion.agents.platform.status_manager:908 'LJ01D'/RESOURCE_AGENT_STATE_COMMAND: (e5525cd8239b40e3a0cf728953acc679) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 89ca843fe51745d9a20a3f73a7c330f1 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : [] # And do verification that the child_agg_status are all OK again: # NOTE: this assumes that, once running again, the instrument's # aggstatus are in turn back to OK. _, child_agg_status, _ = self._get_all_root_statuses() self._verify_statuses(child_agg_status[i_obj.instrument_device_id], [DeviceStatusType.STATUS_OK]) ##################################################################### # move the instrument to the COMMAND state where it was when terminated # so the shutdown sequence in the root platform completes fine. # This also verifies that we are able to continue interacting with # the instrument after the re-start. ##################################################################### cmd = AgentCommand(command=InstrumentAgentEvent.INITIALIZE) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 INITIALIZE to instrument returned: %s", retval) cmd = AgentCommand(command=InstrumentAgentEvent.GO_ACTIVE) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 GO_ACTIVE to instrument returned: %s", retval) cmd = AgentCommand(command=InstrumentAgentEvent.RUN) retval = ia_client.execute_agent(cmd, timeout=CFG.endpoint.receive.timeout) log.debug("OOIION-1077 RUN to instrument returned: %s", retval) # verify instrument is in COMMAND: instr_state = ia_client.get_agent_state() log.debug("instrument state: %s", instr_state) self.assertEquals(InstrumentAgentState.COMMAND, instr_state)
def test_platform_status_launch_instruments_first_2_3(self): # # Test of status propagation in a small network of 2 platforms and # 3 instruments, with the instruments launched (including port # agents) before the root platform. # # MJ01C (with 2 instruments) # LJ01D (with 1 instrument) # # Right after the root platform is launched, it verifies that all its # statuses are updated to OK. Note that this is a scenario in which # the updates are not triggered by the event publications done by the # instruments because those publications happen at a time when the # platform have not been launched yet. Rather, during the launch of # the platforms, they retrieve the statuses of their children to # update the corresponding statuses. This capability was initially # added to support UI testing with instruments whose port agents need # to be manually launched. # # The test also includes some explicitly triggered updates via # publication on behalf of the instruments. # # self._set_receive_timeout() # create the network: p_objs = {} self.p_root = p_root = self._create_hierarchy("MJ01C", p_objs) self.assertEquals(2, len(p_objs)) for platform_id in ["MJ01C", "LJ01D"]: self.assertIn(platform_id, p_objs) # the sub-platform: p_LJ01D = p_objs["LJ01D"] ##################################################################### # create and launch instruments/port_agents: instrs = [] for instr_key in ["SBE37_SIM_01", "SBE37_SIM_02", "SBE37_SIM_03"]: i_obj = self._create_instrument(instr_key, start_port_agent=False) ia_client = self._start_instrument(i_obj) self.addCleanup(self._stop_instrument, i_obj) instrs.append(i_obj) log.debug("started instrument %s", instr_key) ##################################################################### # assign instruments to platforms: # 2 instruments to root: self._assign_instrument_to_platform(instrs[0], p_root) self._assign_instrument_to_platform(instrs[1], p_root) # 1 instrument to sub-platform LJ01D: self._assign_instrument_to_platform(instrs[2], p_LJ01D) ##################################################################### # start up the root platform self._start_platform(p_root) self.addCleanup(self._stop_platform, p_root) self.addCleanup(self._shutdown) log.debug("started root platform") ##################################################################### # get all root statuses aggstatus, child_agg_status, rollup_status = self._get_all_root_statuses( ) # this logs out: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK publish_event_for_diagnostics() # this makes the status manager log out: # 2013-05-18 09:17:06,043 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,045 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK ##################################################################### # verify the root platform has set its aparam_child_agg_status with # all its descendant nodes (including all instruments): all_origins = [p_obj.platform_device_id for p_obj in p_objs.values()] all_origins.remove(p_root.platform_device_id) all_origins.extend(i_obj.instrument_device_id for i_obj in instrs) all_origins = sorted(all_origins) child_agg_status_keys = sorted(child_agg_status.keys()) self.assertEquals(all_origins, child_agg_status_keys) ##################################################################### # all statuses must be OK (in particular for the instrument children self._verify_all_statuses_OK(aggstatus, child_agg_status, rollup_status) ##################################################################### # trigger some status updates from the instruments and do # corresponding verifications against the root platform. # Note that the sub-platform also should get properly updated but # this test doesn't do these verifications. # ------------------------------------------------------------------- # instrs[0] publishes a STATUS_CRITICAL for AGGREGATE_COMMS self._expect_from_root(p_root) self._publish_for_child(instrs[0], AggregateStatusType.AGGREGATE_COMMS, DeviceStatusType.STATUS_CRITICAL) # confirm root gets updated to STATUS_CRITICAL self._wait_root_event() self._verify_with_get_agent(AggregateStatusType.AGGREGATE_COMMS, DeviceStatusType.STATUS_CRITICAL) log.debug( "after AGGREGATE_COMMS <- STATUS_CRITICAL on behalf of instr[0]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,111 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,112 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # ------------------------------------------------------------------- # instrs[0] publishes a STATUS_WARNING for AGGREGATE_DATA self._expect_from_root(p_root) self._publish_for_child(instrs[0], AggregateStatusType.AGGREGATE_DATA, DeviceStatusType.STATUS_WARNING) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify(AggregateStatusType.AGGREGATE_DATA, DeviceStatusType.STATUS_WARNING) log.debug( "after AGGREGATE_DATA <- STATUS_WARNING on behalf of instr[0]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,149 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,150 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # ------------------------------------------------------------------- # instrs[1] publishes a STATUS_WARNING for AGGREGATE_POWER self._expect_from_root(p_root) self._publish_for_child(instrs[1], AggregateStatusType.AGGREGATE_POWER, DeviceStatusType.STATUS_WARNING) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify(AggregateStatusType.AGGREGATE_POWER, DeviceStatusType.STATUS_WARNING) log.debug( "after AGGREGATE_POWER <- STATUS_WARNING on behalf of instr[1]") publish_event_for_diagnostics() # 2013-05-18 09:17:06,186 INFO Dummy-219 ion.agents.platform.status_manager:792 'LJ01D': (14c6d93a196d4d2fa4a4ee19ff945888) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # # 2013-05-18 09:17:06,187 INFO Dummy-218 ion.agents.platform.status_manager:792 'MJ01C': (38389854b6664da08178b3b0f1d33797) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 3bac82f2fe2f4136a0ebbe4207ab3747 : STATUS_OK STATUS_OK STATUS_OK STATUS_WARNING # 04322d41ca9a414e8ef41729fec539b0 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 14c6d93a196d4d2fa4a4ee19ff945888 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 51e37e7c8a684f2dbb5dcc0e9cc758a4 : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_CRITICAL STATUS_WARNING STATUS_OK STATUS_WARNING # ------------------------------------------------------------------- # instrs[2] publishes a AGGREGATE_LOCATION for STATUS_CRITICAL self._expect_from_root(p_root) self._publish_for_child(instrs[2], AggregateStatusType.AGGREGATE_LOCATION, DeviceStatusType.STATUS_CRITICAL) # confirm root gets updated to STATUS_WARNING self._wait_root_event_and_verify( AggregateStatusType.AGGREGATE_LOCATION, DeviceStatusType.STATUS_CRITICAL) log.debug( "after AGGREGATE_LOCATION <- STATUS_CRITICAL on behalf of instr[2]" ) publish_event_for_diagnostics()
def test_with_intermediate_subplatform_directly_stopped_then_restarted( self): # # Similar to test_with_intermediate_subplatform_directly_stopped but the sub-platform is then # relaunched to verify that it is "revalidated" for subsequent processing. # We can visually verify this via the publish_event_for_diagnostics utility. # The test should complete without any issues. # self._set_receive_timeout() recursion = True p_root = self._set_up_platform_hierarchy_with_some_instruments([]) self._launch_network(p_root, recursion) log.info('platforms in the launched network (%d): %s', len(self._setup_platforms), self._setup_platforms.keys()) p_obj = self._get_platform('LV01B') pa_client = self._create_resource_agent_client( p_obj.platform_device_id) self._ping_agent() self._initialize(recursion) self._go_active(recursion) self._run(recursion) self._assert_agent_client_state(pa_client, ResourceAgentState.COMMAND) async_event_result, events_received = self._start_ResourceAgentLifecycleEvent_subscriber( pa_client.resource_id, PlatformAgent.ORIGIN_TYPE, 'STOPPED') # directly stop sub-platform log.info("stopping sub-platform %r", p_obj.platform_device_id) self.IMS.stop_platform_agent_instance(p_obj.platform_agent_instance_id) # verify publication of lifecycle event from sub-platform when stopped async_event_result.get(timeout=self._receive_timeout) self.assertEquals(len(events_received), 1) event_received = events_received[0] log.info("ResourceAgentLifecycleEvent received: %s", event_received) gevent.sleep(3) publish_event_for_diagnostics( ) # should show the invalidated child for parent Node1B: # INFO ... ion.agents.platform.status_manager:1019 'Node1B'/RESOURCE_AGENT_STATE_COMMAND: (a7f865c34f534e60a14e5f0f8ef2fd53) status report triggered by diagnostic event: # AGGREGATE_COMMS AGGREGATE_DATA AGGREGATE_LOCATION AGGREGATE_POWER # 26215ffcf7c94260a99e9c9d103f22f9 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 0e07bc623af64a3a8f61465329451de7 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # a914714894b844a8b42724fe9208fde4 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 02d5a770fba8405c868cc8d55bbbb8d3 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # b19f89585e7c43789b60beac5ddec43c : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # a2f81525ab1e425da808191f9bbe945d : STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN STATUS_UNKNOWN # 6ff02a90e34643fe87ecf262a33437cd : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 83b4f74ab1db4c70ae63072336083ac3 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 7639e530740a48a8b299d0d19dcf7abe : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # 88b143e311514121adc544c5933f92a6 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # b1453122a5a64ac6868cfc39e12e4e50 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # c996ff0478a6449da62955859020ee50 : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # aggstatus : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # rollup_status : STATUS_OK STATUS_OK STATUS_OK STATUS_OK # # invalidated_children : ['a2f81525ab1e425da808191f9bbe945d'] gevent.sleep(3) ############################################ # relaunch the intermediate sub-platform: log.info("relaunching sub-platform 'LV01B': %r", p_obj.platform_device_id) pa_client = self._start_a_platform(p_obj) self._ping_agent(pa_client) # recursion=False because LV01B's children are already in COMMAND self._initialize(recursion=False, pa_client=pa_client) self._go_active(recursion=False, pa_client=pa_client) self._run(recursion=False, pa_client=pa_client) # wait for a bit to allow ancestors to re-validate the child, in particular for the parent Node1B: gevent.sleep(10) publish_event_for_diagnostics() # should show the child re-validated: