def process(self, message): """ Spot Request has completed, write completion info to SpotRequestItem in DynamoDB, let master know this request has completed so the master can determine if the job has completed :param message: SQS Message instance """ try: spot_request_msg = SpotRequestMsg(raw_json=message.get_body()) spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_msg.spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name, ) ts_cmd_complete = spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP ] cmd_returncode = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE] cmd_std_out = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT] cmd_std_err = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR] key_value_pairs = { TableSpotRequest.is_open: 0, TableSpotRequest.spot_request_state_code: SpotRequestStateCode.instance_complete, TableSpotRequest.ts_cmd_complete: ts_cmd_complete, TableSpotRequest.cmd_returncode: cmd_returncode, } if cmd_std_out != None and len(cmd_std_out) > 0: key_value_pairs[TableSpotRequest.cmd_std_out] = cmd_std_out if cmd_std_err != None and len(cmd_std_err) > 0: key_value_pairs[TableSpotRequest.cmd_std_err] = cmd_std_err spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, key_value_pairs, region_name=self.region_name, profile_name=self.profile_name, ) # let the Master increment the completion count to determine if the job is complete master_msg_incr_instance_success = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_INCR_INSTANCE_SUCCESS_CNT, ) message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageIncrSuccessCnt ) spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name ) spot_master_sqs_message_durable.send_message( master_msg_incr_instance_success.to_json(), message_attributes=message_attributes ) self.spot_request_sqs_message_durable.delete_message(message) except StandardError as e: logger.error(fmt_request_item_msg_hdr(spot_request_item) + "Exiting SpotRequestDispatcher due to exception") logger.error(fmt_request_item_msg_hdr(spot_request_item) + str(e)) logger.error(fmt_request_item_msg_hdr(spot_request_item) + traceback.format_exc())
def process( self, message ) : """ AWS is going to terminate the request - update the status in SpotRequestItem. A SpotRequestCheckStatus message is processed at a regular interval and will detect the status change and process accordingly :param message: SQS Message instance """ try: spot_request_msg = SpotRequestMsg( raw_json=message.get_body() ) spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_msg.spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + 'process_pending_termination_detected' ) ts_pending_termination_detected = spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_TS_PENDING_TERMINATION_DETECTED] spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, { TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_force_termination_pending, TableSpotRequest.ts_pending_termination_detected:ts_pending_termination_detected }, region_name=self.region_name, profile_name=self.profile_name ) self.spot_request_sqs_message_durable.delete_message(message) except StandardError as e: logger.error( fmt_request_item_msg_hdr( spot_request_item ) + 'Exiting SpotRequestDispatcher due to exception' ) logger.error( fmt_request_item_msg_hdr( spot_request_item ) + str(e) ) logger.error( fmt_request_item_msg_hdr( spot_request_item ) + traceback.format_exc() )
def process(self, message): """ Heartbeat daemon successfully started, update timestamp in SpotRequestItem TODO: need to add check to determine the heartbeat daemon has started within a reasonable time period after the instance has started. If the heartbeat daemon can't start, the users' script will never run. :param message: SQS Message instance """ try: spot_request_msg = SpotRequestMsg(raw_json=message.get_body()) spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_msg.spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name, ) logger.info(fmt_request_item_msg_hdr(spot_request_item) + "process_heartbeat_daemon_started") ts_heartbeat_daemon_started = spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_HEARTBEAT_DAEMON_STARTED_TIMESTAMP ] spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, { TableSpotRequest.ts_heartbeat_daemon_started: ts_heartbeat_daemon_started, TableSpotRequest.ts_heartbeat: ts_heartbeat_daemon_started, }, region_name=self.region_name, profile_name=self.profile_name, ) self.spot_request_sqs_message_durable.delete_message(message) except StandardError as e: logger.error(fmt_request_item_msg_hdr(spot_request_item) + "Exiting SpotRequestDispatcher due to exception") logger.error(fmt_request_item_msg_hdr(spot_request_item) + str(e)) logger.error(fmt_request_item_msg_hdr(spot_request_item) + traceback.format_exc())
def process( self, message ) : """ Write heartbeat timestamp to SpotRequestItem :param message: SQS Message instance """ try: before = time.time() spot_request_msg = SpotRequestMsg( raw_json=message.get_body() ) spot_request_item = get_spot_request_item( self.spot_request_table_name, spot_request_msg.spot_request_uuid, region_name=self.region_name, profile_name=self.profile_name ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + 'process_instance_heartbeat' ) if spot_request_item != None: ts_heartbeat = spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_HEARTBEAT_TIMESTAMP] spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {TableSpotRequest.ts_heartbeat:ts_heartbeat }, region_name=self.region_name, profile_name=self.profile_name ) else: logger.warning('Heartbeat not saved, spot_request_uuid not found: ' + spot_request_msg.spot_request_uuid ) self.spot_request_sqs_message_durable.delete_message(message) # logging.info('>>> Elapsed message process for heartbeat: ' + str(time.time() - before) ) except StandardError as e: logger.error( fmt_request_item_msg_hdr( spot_request_item ) + 'Exiting SpotRequestDispatcher due to exception' ) logger.error( fmt_request_item_msg_hdr( spot_request_item ) + str(e) ) logger.error( fmt_request_item_msg_hdr( spot_request_item ) + traceback.format_exc() )