Пример #1
0
 def ncsa_resources_query(self, params, healthy_forwarders):
     job_num = str(params[JOB_NUM])
     raft_list = params[RAFTS]
     needed_workers = len(raft_list)
     LOGGER.info('Sufficient forwarders have been found. Checking NCSA')
     self._pairs_dict = {}
     forwarder_candidate_dict = {}
     for i in range(0, needed_workers):
         forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i]
         self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i],
                                            NCSA_RESOURCES_QUERY)
         # Call this method for testing...
         # There should be a message sent to NCSA here asking for available resources
     timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack")
     ncsa_params = {}
     ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY"
     ncsa_params[JOB_NUM] = job_num
     #ncsa_params[RAFT_NUM] = needed_workers
     ncsa_params[ACK_ID] = timed_ack_id
     ncsa_params["FORWARDERS"] = forwarder_candidate_dict
     self.JOB_SCBD.set_value_for_job(job_num, "STATE",
                                     "NCSA_RESOURCES_QUERY_SENT")
     self.JOB_SCBD.set_value_for_job(job_num,
                                     "TIME_NCSA_RESOURCES_QUERY_SENT",
                                     get_timestamp())
     self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params)
     LOGGER.info(
         'The following forwarders have been sent to NCSA for pairing:')
     LOGGER.info(forwarder_candidate_dict)
     return timed_ack_id
Пример #2
0
 def ncsa_resources_query(self, params, healthy_forwarders):
     job_num = str(params[JOB_NUM])
     raft_list = params[RAFTS]
     needed_workers = len(raft_list)
     LOGGER.info('Sufficient forwarders have been found. Checking NCSA')
     self._pairs_dict = {}
     forwarder_candidate_dict = {}
     for i in range (0, needed_workers):
         forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i]
         self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY)
         # Call this method for testing...
         # There should be a message sent to NCSA here asking for available resources
     timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") 
     ncsa_params = {}
     ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY"
     ncsa_params[JOB_NUM] = job_num
     #ncsa_params[RAFT_NUM] = needed_workers
     ncsa_params[ACK_ID] = timed_ack_id
     ncsa_params["FORWARDERS"] = forwarder_candidate_dict
     self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp())
     self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) 
     LOGGER.info('The following forwarders have been sent to NCSA for pairing:')
     LOGGER.info(forwarder_candidate_dict)
     return timed_ack_id
Пример #3
0
    def insufficient_base_resources(self, params, healthy_forwarders):
        # send response msg to dmcs refusing job
        job_num = str(params[JOB_NUM])
        raft_list = params[RAFTS]
        ack_id = params['ACK_ID']
        needed_workers = len(raft_list)
        LOGGER.info('Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num)
        dmcs_params = {}
        fail_dict = {}
        dmcs_params[MSG_TYPE] = NEW_JOB_ACK
        dmcs_params[JOB_NUM] = job_num
        dmcs_params[ACK_BOOL] = False
        dmcs_params[ACK_ID] = ack_id

        ### NOTE FOR DMCS ACK PROCESSING:
        ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section
        ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a 
        ###   BASE_RESOURCES inside the FAIL_DICT
        ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0,
        ###   there will be only NEEDED and AVAILABLE Forwarder params - nothing more
        ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a 
        ###   NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE'
        ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params
        ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else 
        fail_dict['BASE_RESOURCES'] = '0'
        fail_dict[NEEDED_FORWARDERS] = str(needed_workers)
        fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders))
        dmcs_params['FAIL_DETAILS'] = fail_dict
        self._base_publisher.publish_message("dmcs_consume", dmcs_params)
        # mark job refused, and leave Forwarders in Idle state
        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED")
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp())
        idle_state = {"STATE": "IDLE"}
        self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state)
        return False
Пример #4
0
 def accept_job(self, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = NEW_JOB_ACK
     dmcs_message[ACK_BOOL] = True
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp())
     self._base_publisher.publish_message("dmcs_consume", dmcs_message)
     return True
Пример #5
0
    def forwarder_health_check(self, params):
        job_num = str(params[JOB_NUM])
        raft_list = params['RAFTS']
        needed_workers = len(raft_list)

        self.JOB_SCBD.add_job(job_num, needed_workers)
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED",
                                        get_timestamp())
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E",
                                        get_epoch_timestamp())
        LOGGER.info('Received new job %s. Needed workers is %s', job_num,
                    needed_workers)

        # run forwarder health check
        # get timed_ack_id
        timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK")

        forwarders = self.FWD_SCBD.return_available_forwarders_list()
        # Mark all healthy Forwarders Unknown
        state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"}
        self.FWD_SCBD.set_forwarder_params(forwarders, state_status)
        # send health check messages
        ack_params = {}
        ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK
        ack_params["ACK_ID"] = timed_ack
        ack_params[JOB_NUM] = job_num

        self.JOB_SCBD.set_value_for_job(job_num, "STATE",
                                        "BASE_RESOURCE_QUERY")
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY",
                                        get_timestamp())
        audit_params = {}
        audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST'
        audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK'
        audit_params['ACK_ID'] = timed_ack
        audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN'
        audit_params['TIME'] = get_epoch_timestamp()
        for forwarder in forwarders:
            self._base_publisher.publish_message(
                self.FWD_SCBD.get_value_for_forwarder(forwarder,
                                                      "CONSUME_QUEUE"),
                ack_params)

        return timed_ack
Пример #6
0
 def build_monitor_data(self, params):
     monitor_data = {}
     keez = list(params.keys())
     for kee in keez:
         monitor_data[kee] = params[kee]
     monitor_data['SESSION_ID'] = self.get_current_session()
     monitor_data['VISIT_ID'] = self.get_current_visit()
     monitor_data['TIME'] = get_timestamp()
     monitor_data['DATA_TYPE'] = self.DB_TYPE
     return monitor_data
Пример #7
0
 def accept_job(self, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = NEW_JOB_ACK
     dmcs_message[ACK_BOOL] = True
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED",
                                     get_timestamp())
     self._base_publisher.publish_message("dmcs_consume", dmcs_message)
     return True
Пример #8
0
 def accept_job(self, ack_id, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK
     dmcs_message['COMPONENT'] = self.COMPONENT_NAME
     dmcs_message[ACK_BOOL] = True
     dmcs_message['ACK_ID'] = ack_id
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp())
     self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message)
     return True
Пример #9
0
 def accept_job(self, ack_id, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK
     dmcs_message['COMPONENT'] = self.COMPONENT_NAME
     dmcs_message[ACK_BOOL] = True
     dmcs_message['ACK_ID'] = ack_id
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED",
                                     get_timestamp())
     self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message)
     return True
Пример #10
0
    def forwarder_health_check(self, params):
        job_num = str(params[JOB_NUM])
        raft_list = params['RAFTS']
        needed_workers = len(raft_list)

        self.JOB_SCBD.add_job(job_num, needed_workers)
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp())
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp())
        LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers)

        # run forwarder health check
        # get timed_ack_id
        timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK")

        forwarders = self.FWD_SCBD.return_available_forwarders_list()
        # Mark all healthy Forwarders Unknown
        state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"}
        self.FWD_SCBD.set_forwarder_params(forwarders, state_status)
        # send health check messages
        ack_params = {}
        ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK
        ack_params["ACK_ID"] = timed_ack
        ack_params[JOB_NUM] = job_num
        
        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY")
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp())
        audit_params = {}
        audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST'
        audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK'
        audit_params['ACK_ID'] = timed_ack
        audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN'
        audit_params['TIME'] = get_epoch_timestamp()
        for forwarder in forwarders:
            self._base_publisher.publish_message(self.FWD_SCBD.get_value_for_forwarder(forwarder,"CONSUME_QUEUE"),
                                            ack_params)

        return timed_ack
Пример #11
0
    def distribute_job_params(self, params, pairs):
        #ncsa has enough resources...
        job_num = str(params[JOB_NUM])
        self.JOB_SCBD.set_pairs_for_job(job_num, pairs)          
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp())
        LOGGER.info('The following pairs will be used for Job #%s: %s',
                     job_num, pairs)
        fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK")
        fwders = list(pairs.keys())
        fwd_params = {}
        fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS"
        fwd_params[JOB_NUM] = job_num
        fwd_params[ACK_ID] = fwd_ack_id
        for fwder in fwders:
            fwd_params["TRANSFER_PARAMS"] = pairs[fwder]
            route_key = self.FWD_SCBD.get_value_for_forwarder(fwder, "CONSUME_QUEUE")
            self._base_publisher.publish_message(route_key, fwd_params)

        return fwd_ack_id
Пример #12
0
    def distribute_job_params(self, params, pairs):
        #ncsa has enough resources...
        job_num = str(params[JOB_NUM])
        self.JOB_SCBD.set_pairs_for_job(job_num, pairs)
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED",
                                        get_timestamp())
        LOGGER.info('The following pairs will be used for Job #%s: %s',
                    job_num, pairs)
        fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK")
        fwders = list(pairs.keys())
        fwd_params = {}
        fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS"
        fwd_params[JOB_NUM] = job_num
        fwd_params[ACK_ID] = fwd_ack_id
        for fwder in fwders:
            fwd_params["TRANSFER_PARAMS"] = pairs[fwder]
            route_key = self.FWD_SCBD.get_value_for_forwarder(
                fwder, "CONSUME_QUEUE")
            self._base_publisher.publish_message(route_key, fwd_params)

        return fwd_ack_id
Пример #13
0
    def insufficient_base_resources(self, params, healthy_forwarders):
        # send response msg to dmcs refusing job
        job_num = str(params[JOB_NUM])
        raft_list = params[RAFTS]
        ack_id = params['ACK_ID']
        needed_workers = len(raft_list)
        LOGGER.info(
            'Reporting to DMCS that there are insufficient healthy forwarders for job #%s',
            job_num)
        dmcs_params = {}
        fail_dict = {}
        dmcs_params[MSG_TYPE] = NEW_JOB_ACK
        dmcs_params[JOB_NUM] = job_num
        dmcs_params[ACK_BOOL] = False
        dmcs_params[ACK_ID] = ack_id

        ### NOTE FOR DMCS ACK PROCESSING:
        ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section
        ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a
        ###   BASE_RESOURCES inside the FAIL_DICT
        ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0,
        ###   there will be only NEEDED and AVAILABLE Forwarder params - nothing more
        ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a
        ###   NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE'
        ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params
        ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else
        fail_dict['BASE_RESOURCES'] = '0'
        fail_dict[NEEDED_FORWARDERS] = str(needed_workers)
        fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders))
        dmcs_params['FAIL_DETAILS'] = fail_dict
        self._base_publisher.publish_message("dmcs_consume", dmcs_params)
        # mark job refused, and leave Forwarders in Idle state
        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED")
        self.JOB_SCBD.set_value_for_job(job_num,
                                        "TIME_JOB_ABORTED_BASE_RESOURCES",
                                        get_timestamp())
        idle_state = {"STATE": "IDLE"}
        self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state)
        return False
Пример #14
0
    def process_dmcs_new_job(self, params):
        input_params = params
        needed_workers = len(input_params[RAFTS])
        ack_id = self.forwarder_health_check(input_params)

        self.ack_timer(
            7
        )  # This is a HUGE num seconds for now..final setting will be milliseconds
        healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(
            timed_ack)

        num_healthy_forwarders = len(healthy_forwarders)
        if needed_workers > num_healthy_forwarders:
            result = self.insufficient_base_resources(input_params,
                                                      healthy_forwarders)
            return result
        else:
            healthy_status = {
                "STATUS": "HEALTHY",
                "STATE": "READY_WITHOUT_PARAMS"
            }
            self.FWD_SCBD.set_forwarder_params(healthy_forwarders,
                                               healthy_status)

            ack_id = self.ncsa_resources_query(input_params,
                                               healthy_forwarders)

            self.ack_timer(3)

            #Check ACK scoreboard for response from NCSA
            ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
            if ncsa_response:
                pairs = {}
                ack_bool = None
                try:
                    ack_bool = ncsa_response[ACK_BOOL]
                    if ack_bool == True:
                        pairs = ncsa_response[PAIRS]
                except KeyError as e:
                    pass
                # Distribute job params and tell DMCS I'm ready.
                if ack_bool == TRUE:
                    fwd_ack_id = self.distribute_job_params(
                        input_params, pairs)
                    self.ack_timer(3)

                    fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(
                        fwd_ack_id)
                    if fwd_params_response and (len(fwd_params_response)
                                                == len(fwders)):
                        self.JOB_SCBD.set_value_for_job(
                            job_num, "STATE", "BASE_TASK_PARAMS_SENT")
                        self.JOB_SCBD.set_value_for_job(
                            job_num, "TIME_BASE_TASK_PARAMS_SENT",
                            get_timestamp())
                        in_ready_state = {'STATE': 'READY_WITH_PARAMS'}
                        self.FWD_SCBD.set_forwarder_params(
                            fwders, in_ready_state)
                        # Tell DMCS we are ready
                        result = self.accept_job(job_num)
                else:
                    #not enough ncsa resources to do job - Notify DMCS
                    idle_param = {'STATE': 'IDLE'}
                    self.FWD_SCBD.set_forwarder_params(healthy_forwarders,
                                                       idle_params)
                    result = self.insufficient_ncsa_resources(ncsa_response)
                    return result

            else:
                result = self.ncsa_no_response(input_params)
                idle_param = {'STATE': 'IDLE'}
                self.FWD_SCBD.set_forwarder_params(
                    list(forwarder_candidate_dict.keys()), idle_params)
                return result
Пример #15
0
    def process_dmcs_readout(self, params):
        job_number = params[JOB_NUM]
        pairs = self.JOB_SCBD.get_pairs_for_job(job_number)
        date - get_timestamp()
        self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) 
        # The following line extracts the distributor FQNs from pairs dict using 
        # list comprehension values; faster than for loops
        distributors = [v['FQN'] for v in list(pairs.values())]
        forwarders = list(pairs.keys())

        ack_id = self.get_next_timed_ack_id('NCSA_READOUT')
### Send READOUT to NCSA with ACK_ID
        ncsa_params = {}
        ncsa_params[MSG_TYPE] = 'NCSA_READOUT'
        ncsa_params[ACK_ID] = ack_id
        self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params))


        self.ack_timer(4)

        ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
        if ncsa_response:
            if ncsa_response['ACK_BOOL'] == True:
                #inform forwarders
                fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT')
                for forwarder in forwarders:
                    name = self.FWD_SCBD.get_value_for_forwarder(forwarder, NAME)
                    routing_key = self.FWD_SCBD.get_routing_key(forwarder)
                    msg_params = {}
                    msg_params[MSG_TYPE] = 'FORWARDER_READOUT'
                    msg_params[JOB_NUM] = job_number
                    msg_params['ACK_ID'] = fwd_ack_id
                    self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT)
                    self._publisher.publish_message(routing_key, yaml.dump(msg_params))
                self.ack_timer(4)
                forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id)
                if len(forwarder_responses) == len(forwarders):
                    dmcs_params = {}
                    dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
                    dmcs_params[JOB_NUM] = job_number
                    dmcs_params['ACK_BOOL'] = True
                    dmcs_params['COMMENT'] = "Readout begun at %s" % get_timestamp()
                    self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
                    
            else:
                #send problem with ncsa to DMCS
                dmcs_params = {}
                dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
                dmcs_params[JOB_NUM] = job_number
                dmcs_params['ACK_BOOL'] = False
                dmcs_params['COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS'])
                self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
                    
        else:
            #send 'no response from ncsa' to DMCS               )
            dmcs_params = {}
            dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
            dmcs_params[JOB_NUM] = job_number
            dmcs_params['ACK_BOOL'] = False
            dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA"
            self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
Пример #16
0
    def process_dmcs_readout(self, params):
        job_number = params[JOB_NUM]
        pairs = self.JOB_SCBD.get_pairs_for_job(job_number)
        date - get_timestamp()
        self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date)
        # The following line extracts the distributor FQNs from pairs dict using
        # list comprehension values; faster than for loops
        distributors = [v['FQN'] for v in list(pairs.values())]
        forwarders = list(pairs.keys())

        ack_id = self.get_next_timed_ack_id('NCSA_READOUT')
        ### Send READOUT to NCSA with ACK_ID
        ncsa_params = {}
        ncsa_params[MSG_TYPE] = 'NCSA_READOUT'
        ncsa_params[ACK_ID] = ack_id
        self._ncsa_publisher.publish_message(NCSA_CONSUME,
                                             yaml.dump(ncsa_params))

        self.ack_timer(4)

        ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
        if ncsa_response:
            if ncsa_response['ACK_BOOL'] == True:
                #inform forwarders
                fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT')
                for forwarder in forwarders:
                    name = self.FWD_SCBD.get_value_for_forwarder(
                        forwarder, NAME)
                    routing_key = self.FWD_SCBD.get_routing_key(forwarder)
                    msg_params = {}
                    msg_params[MSG_TYPE] = 'FORWARDER_READOUT'
                    msg_params[JOB_NUM] = job_number
                    msg_params['ACK_ID'] = fwd_ack_id
                    self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT)
                    self._publisher.publish_message(routing_key,
                                                    yaml.dump(msg_params))
                self.ack_timer(4)
                forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(
                    fwd_ack_id)
                if len(forwarder_responses) == len(forwarders):
                    dmcs_params = {}
                    dmcs_params[MSG_TYPE] = 'READOUT_ACK'
                    dmcs_params[JOB_NUM] = job_number
                    dmcs_params['ACK_BOOL'] = True
                    dmcs_params[
                        'COMMENT'] = "Readout begun at %s" % get_timestamp()
                    self._publisher.publish_message('dmcs_consume',
                                                    yaml.dump(dmcs_params))

            else:
                #send problem with ncsa to DMCS
                dmcs_params = {}
                dmcs_params[MSG_TYPE] = 'READOUT_ACK'
                dmcs_params[JOB_NUM] = job_number
                dmcs_params['ACK_BOOL'] = False
                dmcs_params[
                    'COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (
                        ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'],
                        ncsa_response['RECEIVED_DISTRIBUTOR_ACKS'])
                self._base_publisher.publish_message('dmcs_consume',
                                                     yaml.dump(dmcs_params))

        else:
            #send 'no response from ncsa' to DMCS               )
            dmcs_params = {}
            dmcs_params[MSG_TYPE] = 'READOUT_ACK'
            dmcs_params[JOB_NUM] = job_number
            dmcs_params['ACK_BOOL'] = False
            dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA"
            self._base_publisher.publish_message('dmcs_consume',
                                                 yaml.dump(dmcs_params))
Пример #17
0
    def process_dmcs_new_job(self, params):
        input_params = params
        needed_workers = len(input_params[RAFTS])
        ack_id = self.forwarder_health_check(input_params)
        
        self.ack_timer(7)  # This is a HUGE num seconds for now..final setting will be milliseconds
        healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(timed_ack)

        num_healthy_forwarders = len(healthy_forwarders)
        if needed_workers > num_healthy_forwarders:
            result = self.insufficient_base_resources(input_params, healthy_forwarders)
            return result
        else:
            healthy_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"}
            self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status)

            ack_id = self.ncsa_resources_query(input_params, healthy_forwarders)

            self.ack_timer(3)

            #Check ACK scoreboard for response from NCSA
            ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
            if ncsa_response:
                pairs = {}
                ack_bool = None
                try:
                    ack_bool = ncsa_response[ACK_BOOL]
                    if ack_bool == True:
                        pairs = ncsa_response[PAIRS] 
                except KeyError as e:
                    pass 
                # Distribute job params and tell DMCS I'm ready.
                if ack_bool == TRUE:
                    fwd_ack_id = self.distribute_job_params(input_params, pairs)
                    self.ack_timer(3)

                    fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id)
                    if fwd_params_response and (len(fwd_params_response) == len(fwders)):
                        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_TASK_PARAMS_SENT")
                        self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp())
                        in_ready_state = {'STATE':'READY_WITH_PARAMS'}
                        self.FWD_SCBD.set_forwarder_params(fwders, in_ready_state) 
                        # Tell DMCS we are ready
                        result = self.accept_job(job_num)
                else:
                    #not enough ncsa resources to do job - Notify DMCS
                    idle_param = {'STATE': 'IDLE'}
                    self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params)
                    result = self.insufficient_ncsa_resources(ncsa_response)
                    return result

            else:
                result = self.ncsa_no_response(input_params)
                idle_param = {'STATE': 'IDLE'}
                self.FWD_SCBD.set_forwarder_params(list(forwarder_candidate_dict.keys()), idle_params)
                return result