Пример #1
0
async def test_service_health_broadcast(hax_client, planner, status: str,
                                        health: ServiceHealth):
    service_health = [{
        'Node': {
            'Node': 'localhost',
            'Address': '10.1.10.12',
        },
        'Service': {
            'ID': '12',
            'Service': 'ios',
            'Tags': [],
            'Port': 8000,
        },
        'Checks': [
            {
                'Node': '12',
                'CheckID': 'service:ios',
                'Name': "Service 'ios' check",
                'Status': status,
                'Notes': '',
                'Output': '',
                'ServiceID': '12',
                'ServiceName': 'ios',
            },
        ],
    }]
    resp = await hax_client.post('/', json=service_health)
    assert resp.status == 200
    assert planner.add_command.called
    planner.add_command.assert_called_once_with(
        BroadcastHAStates(
            states=[HAState(fid=Fid(0x7200000000000001, 12), status=health)],
            reply_to=None))
Пример #2
0
 def fn():
     # import pudb.remote
     # pudb.remote.set_trace(term_size=(80, 40), port=9998)
     LOG.debug('Service health from Consul: %s', data)
     planner.add_command(
         BroadcastHAStates(states=to_ha_states(data, consul_util),
                           reply_to=None))
Пример #3
0
 def fn():
     proc_state_to_objhealth = {
         'M0_CONF_HA_PROCESS_STARTING': ObjHealth.OFFLINE,
         'M0_CONF_HA_PROCESS_STARTED': ObjHealth.OK,
         'M0_CONF_HA_PROCESS_STOPPING': ObjHealth.OFFLINE,
         'M0_CONF_HA_PROCESS_STOPPED': ObjHealth.OFFLINE
     }
     # import pudb.remote
     # pudb.remote.set_trace(term_size=(80, 40), port=9998)
     ha_states: List[HAState] = []
     LOG.debug('process status: %s', data)
     for item in data:
         proc_val = base64.b64decode(item['Value'])
         proc_status = json.loads(str(proc_val.decode('utf-8')))
         LOG.debug('process update item key %s item val: %s',
                   item['Key'].split('/')[1], proc_status)
         proc_fid = Fid.parse(item['Key'].split('/')[1])
         proc_state = proc_status['state']
         proc_type = proc_status['type']
         if (proc_type != 'M0_CONF_HA_PROCESS_M0MKFS'
                 and proc_state in ('M0_CONF_HA_PROCESS_STARTED',
                                    'M0_CONF_HA_PROCESS_STOPPED')):
             ha_states.append(
                 HAState(fid=proc_fid,
                         status=proc_state_to_objhealth[proc_state]))
             planner.add_command(
                 BroadcastHAStates(states=ha_states, reply_to=None))
Пример #4
0
    async def _process(request):
        data = await request.json()

        loop = asyncio.get_event_loop()
        # Note that queue.put is potentially a blocking call
        await loop.run_in_executor(
            None, lambda: queue.put(
                BroadcastHAStates(states=to_ha_states(data), reply_to=None)))
        return web.Response()
Пример #5
0
 def _process_event_cb(self, fid, chp_event, chp_type, chp_pid):
     logging.info('fid=%s, chp_event=%s', fid, chp_event)
     self.queue.put(
         ProcessEvent(
             ConfHaProcess(chp_event=chp_event,
                           chp_type=chp_type,
                           chp_pid=chp_pid,
                           fid=fid)))
     if chp_event == 3:
         self.queue.put(
             BroadcastHAStates(states=[HAState(fid=fid, status='offline')],
                               reply_to=None))
Пример #6
0
    def handle_ioq_stob_error(self, payload: Dict[str, Any]) -> None:
        fid = Fid.parse(payload['conf_sdev'])
        if fid.is_null():
            logging.debug('Fid is 0:0. Skipping the message.')
            return

        q: Queue = Queue(1)
        self.queue.put(
            BroadcastHAStates(states=[HAState(fid, status='offline')],
                              reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
Пример #7
0
    def handle_ioq_stob_error(self, payload: Dict[str, Any]) -> None:
        fid = Fid.parse(payload['conf_sdev'])
        if fid.is_null():
            LOG.debug('Fid is 0:0. Skipping the message.')
            return

        q: Queue = Queue(1)
        self.planner.add_command(
            BroadcastHAStates(states=[HAState(fid, status=ObjHealth.FAILED)],
                              reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
Пример #8
0
 def _process_event_cb(self, fid, chp_event, chp_type, chp_pid):
     LOG.info('fid=%s, chp_event=%s', fid, chp_event)
     self.queue.put(
         ProcessEvent(
             ConfHaProcess(chp_event=chp_event,
                           chp_type=chp_type,
                           chp_pid=chp_pid,
                           fid=fid)))
     if chp_event == 3:
         self.queue.put(
             BroadcastHAStates(
                 states=[HAState(fid=fid, status=ServiceHealth.FAILED)],
                 reply_to=None))
Пример #9
0
    def handle(self, msg: Event) -> None:
        node_fid = self.cns.get_node_fid(msg.node_id)
        if not node_fid:
            LOG.warn('Unknown [node_id=%s] provided. HA event is ignored',
                     msg.node_id)
            return

        get_health = self._get_status_by_text

        self.planner.add_command(
            BroadcastHAStates(states=[
                HAState(fid=node_fid, status=get_health(msg.event_type))
            ],
                              reply_to=None))
Пример #10
0
    def handle_device_state_set(self, payload: Dict[str, Any]) -> None:
        # To add check for multiple object entries in a payload.
        # for objinfo in payload:
        hastate: Optional[HAState] = self.to_ha_state(payload)
        if not hastate:
            LOG.debug('No ha states to broadcast.')
            return

        q: Queue = Queue(1)
        LOG.debug('HA broadcast, node: %s device: %s state: %s',
                  payload['node'], payload['device'], payload['state'])
        self.queue.put(BroadcastHAStates(states=[hastate], reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
Пример #11
0
 def _update_process_status(self, q: Queue, event: ConfHaProcess) -> None:
     # If a consul-related exception appears, it will
     # be processed by repeat_if_fails.
     #
     # This thread will become blocked until that
     # intermittent error gets resolved.
     self.consul.update_process_status(event)
     svc_status = m0HaProcessEvent.event_to_svchealth(event.chp_event)
     if event.chp_type == m0HaProcessType.M0_CONF_HA_PROCESS_M0D:
         # Broadcast the received motr process status to other motr
         # processes in the cluster.
         q.put(
             BroadcastHAStates(
                 states=[HAState(fid=event.fid, status=svc_status)],
                 reply_to=None))
Пример #12
0
    def _process_event_cb(self, fid, chp_event, chp_type, chp_pid):
        LOG.info('fid=%s, chp_event=%s', fid, chp_event)
        self.queue.put(
            ProcessEvent(
                ConfHaProcess(chp_event=chp_event,
                              chp_type=chp_type,
                              chp_pid=chp_pid,
                              fid=fid)))

        if chp_type == m0HaProcessType.M0_CONF_HA_PROCESS_M0D:
            if chp_event == m0HaProcessEvent.M0_CONF_HA_PROCESS_STARTED:
                self.queue.put(
                    BroadcastHAStates(
                        states=[HAState(fid=fid, status=ServiceHealth.OK)],
                        reply_to=None))
Пример #13
0
    def handle(self, msg: Event) -> None:
        node_name = self.cns.get_node_name_by_machineid(msg.resource_id,
                                                        allow_null=True)
        if not node_name:
            LOG.warn('Unknown [resource_id=%s] provided. HA event is ignored',
                     msg.resource_id)
            return
        node_fid = self.cns.get_node_fid(node_name)
        if not node_fid:
            LOG.warn('Unknown [node_name=%s] provided. HA event is ignored',
                     node_name)
            return
        get_health = self._get_status_by_text

        self.planner.add_command(
            BroadcastHAStates(states=[
                HAState(fid=node_fid, status=get_health(msg.event_type))
            ],
                              reply_to=None))
Пример #14
0
 def update_process_failure(self, q: Queue,
                            ha_states: List[HAState]) -> List[HAState]:
     new_ha_states: List[HAState] = []
     for state in ha_states:
         # We are only concerned with process statuses.
         if state.fid.container == ObjT.PROCESS.value:
             current_status = self.consul.get_process_current_status(
                 state.status, state.fid)
             if current_status == ServiceHealth.FAILED:
                 self.consul.service_health_to_m0dstatus_update(
                     state.fid, current_status)
             elif current_status == ServiceHealth.UNKNOWN:
                 # We got service status as UNKNOWN, that means hax was
                 # notified about process failure but hax couldn't
                 # confirm if the process is in failed state or have
                 # failed and restarted. So, we will not loose the
                 # event and try again to confirm the real time
                 # process status by enqueing a broadcast event
                 # specific to this process.
                 # It is expected that the process status gets
                 # eventually confirmed as either failed or passing (OK).
                 # This situation typically arises due to delay
                 # in receiving failure notification during which the
                 # corresponding process might be restarting or have
                 # already restarted. Thus it is important to confirm
                 # the real time status of the process before
                 # broadcasting failure.
                 current_status = ServiceHealth.OK
                 q.put(
                     BroadcastHAStates(states=[
                         HAState(fid=state.fid, status=ServiceHealth.FAILED)
                     ],
                                       reply_to=None))
             new_ha_states.append(
                 HAState(fid=state.fid, status=current_status))
         else:
             new_ha_states.append(state)
     return new_ha_states
Пример #15
0
 def _broadcast(self, state_list: List[HAState]) -> None:
     if not state_list:
         return
     LOG.debug('Changes in statuses: %s', state_list)
     self.q.put(BroadcastHAStates(states=state_list, reply_to=None))
Пример #16
0
def broadcast():
    return BroadcastHAStates(states=[], reply_to=None)
Пример #17
0
 def update_process_failure(self, planner: WorkPlanner,
                            ha_states: List[HAState]) -> List[HAState]:
     new_ha_states: List[HAState] = []
     for state in ha_states:
         # We are only concerned with process statuses.
         if state.fid.container == ObjT.PROCESS.value:
             current_status = self.consul.get_process_current_status(
                 state.status, state.fid)
             if current_status == ServiceHealth.OK:
                 if (self.consul.get_process_local_status(
                         state.fid) == 'M0_CONF_HA_PROCESS_STARTED'):
                     continue
             if current_status in (ServiceHealth.FAILED,
                                   ServiceHealth.STOPPED):
                 if (self.consul.get_process_local_status(
                         state.fid) == 'M0_CONF_HA_PROCESS_STOPPED'):
                     # Consul may report failure of a process multiple
                     # times, so we don't want to send duplicate failure
                     # notifications, it may cause delay in cleanup
                     # activities.
                     continue
             if current_status == ServiceHealth.UNKNOWN:
                 # We got service status as UNKNOWN, that means hax was
                 # notified about process failure but hax couldn't
                 # confirm if the process is in failed state or have
                 # failed and restarted. So, we will not loose the
                 # event and try again to confirm the real time
                 # process status by enqueing a broadcast event
                 # specific to this process.
                 # It is expected that the process status gets
                 # eventually confirmed as either failed or passing (OK).
                 # This situation typically arises due to delay
                 # in receiving failure notification during which the
                 # corresponding process might be restarting or have
                 # already restarted. Thus it is important to confirm
                 # the real time status of the process before
                 # broadcasting failure.
                 current_status = ServiceHealth.UNKNOWN
                 planner.add_command(
                     BroadcastHAStates(states=[
                         HAState(fid=state.fid, status=ServiceHealth.FAILED)
                     ],
                         reply_to=None))
             if current_status not in (ServiceHealth.UNKNOWN,
                                       ServiceHealth.OFFLINE):
                 # We also need to account and report the failure of remote
                 # Motr processes to this node's hax and motr processes.
                 # When Consul reports a remote process failure, hax
                 # confirms its current status from Consul KV and updates
                 # the list of failed services and also adds it to the
                 # broadcast list.
                 if current_status != ServiceHealth.OK:
                     event = m0HaProcessEvent.M0_CONF_HA_PROCESS_STOPPED
                 else:
                     event = m0HaProcessEvent.M0_CONF_HA_PROCESS_STARTED
                 self.consul.update_process_status(
                     ConfHaProcess(
                         chp_event=event,
                         chp_type=int(
                             m0HaProcessType.M0_CONF_HA_PROCESS_M0D),
                         chp_pid=0,
                         fid=state.fid))
             new_ha_states.append(
                 HAState(fid=state.fid, status=current_status))
         else:
             new_ha_states.append(state)
     return new_ha_states