def handle_state_request_constraint_encountered( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ Constraint encountered after spot request initiated but before request fullfilled, 
            i.e. time limit expired
            Submit another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_request_constraint_encountered' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
    def handle_state_instance_force_termination_pending( self, spot_request_msg, spot_request_item, spot_request_uuid, spot_master_uuid ):
        """ AWS has started the termination process for this instance, i.e. the price has increased
            This is the beginning of the two minute warning pending forced termination
            Terminate the instance and start another spot request

        :param spot_request_msg: 
        :param spot_request_item: 
        :param spot_request_uuid: 
        :param spot_master_uuid: 

        """
        logger.info( fmt_request_uuid_msg_hdr( spot_request_uuid ) + 'handle_state_instance_force_termination_pending' )
        ts_now = int( time.time() )
        spot_request_row_partial_save( self.spot_request_table_name, spot_request_item, {
                                                                TableSpotRequest.spot_request_state_code:SpotRequestStateCode.instance_complete,
                                                                TableSpotRequest.is_open:0, 
                                                                TableSpotRequest.ts_end:ts_now
                                                                 },
                                                                 region_name=self.region_name, profile_name=self.profile_name )
        # Create a new spot request based on the spot request that just failed
        master_msg_resubmit_failed_request = SpotMasterMsg( spot_master_uuid=spot_request_msg.spot_master_uuid, 
                                                  spot_master_msg_type=SpotMasterMsg.TYPE_RESUBMIT_FAILED_REQUEST,
                                                  spot_request_uuid=spot_request_msg.spot_request_uuid
                                                   )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest )
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        spot_master_sqs_message_durable.send_message( master_msg_resubmit_failed_request.to_json(),
                                                           message_attributes=message_attributes )
    def process(self, message):
        """ 
            Spot Request has completed, write completion info to SpotRequestItem in DynamoDB,
            let master know this request has completed so the master can determine if the job has completed

        :param message: SQS Message instance

        """
        try:
            spot_request_msg = SpotRequestMsg(raw_json=message.get_body())
            spot_request_item = get_spot_request_item(
                self.spot_request_table_name,
                spot_request_msg.spot_request_uuid,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            ts_cmd_complete = spot_request_msg.name_value_pairs[
                SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP
            ]
            cmd_returncode = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE]
            cmd_std_out = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT]
            cmd_std_err = spot_request_msg.name_value_pairs[SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR]
            key_value_pairs = {
                TableSpotRequest.is_open: 0,
                TableSpotRequest.spot_request_state_code: SpotRequestStateCode.instance_complete,
                TableSpotRequest.ts_cmd_complete: ts_cmd_complete,
                TableSpotRequest.cmd_returncode: cmd_returncode,
            }
            if cmd_std_out != None and len(cmd_std_out) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_out] = cmd_std_out
            if cmd_std_err != None and len(cmd_std_err) > 0:
                key_value_pairs[TableSpotRequest.cmd_std_err] = cmd_std_err
            spot_request_row_partial_save(
                self.spot_request_table_name,
                spot_request_item,
                key_value_pairs,
                region_name=self.region_name,
                profile_name=self.profile_name,
            )
            # let the Master increment the completion count to determine if the job is complete
            master_msg_incr_instance_success = SpotMasterMsg(
                spot_master_uuid=spot_request_msg.spot_master_uuid,
                spot_master_msg_type=SpotMasterMsg.TYPE_INCR_INSTANCE_SUCCESS_CNT,
            )
            message_attributes = create_microsvc_message_attributes(
                awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageIncrSuccessCnt
            )
            spot_master_sqs_message_durable = SqsMessageDurable(
                self.spot_master_queue_name, self.region_name, profile_name=self.profile_name
            )
            spot_master_sqs_message_durable.send_message(
                master_msg_incr_instance_success.to_json(), message_attributes=message_attributes
            )
            self.spot_request_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + "Exiting SpotRequestDispatcher due to exception")
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + str(e))
            logger.error(fmt_request_item_msg_hdr(spot_request_item) + traceback.format_exc())
def submit_spot_batch_job( argv ):
    """ Submit a users' spot batch job
        Submit an SQS message containing the 2 parm files - Batch Job and User Parm

    :param argv: 

    """
    import logging.config
    if len(sys.argv) == 1:
        print 'ERROR: Missing log configuration file, first argument must be path/name.ext of the log configuration file'
        sys.exit(8)
    logging.config.fileConfig( sys.argv[1], disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    if len(sys.argv) == 2:
        logger.error( 'ERROR: Missing Batch Job Parm file, second argument must be path/name.ext of the log Batch Job Parm file' )
        sys.exit(8)              
    
    try:
        logger.info("Starting")
        
        path_batch_job_parm_file = sys.argv[2]
        if len(sys.argv) == 4: path_user_job_parm_file = sys.argv[3]
        else: path_user_job_parm_file = None
        
        with open( path_batch_job_parm_file ) as parm_file:
            raw_batch_job_parm_item = parm_file.read()
            
        if path_user_job_parm_file != None:   
            with open( path_user_job_parm_file ) as parm_file:
                raw_user_job_parm_item = parm_file.read()
        else: raw_user_job_parm_item = None

        batch_job_parm_item = BatchJobParmItem( stringParmFile=raw_batch_job_parm_item )

        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, 
                                                             batch_job_parm_item.primary_region_name, 
                                                             profile_name=batch_job_parm_item.profile_name )
 
        spot_master_uuid = str(uuid.uuid1())
        logger.info('Submitting test batch message, spot_master_uuid=' + spot_master_uuid )
        spot_master_msg = SpotMasterMsg( spot_master_uuid=spot_master_uuid, spot_master_msg_type=SpotMasterMsg.TYPE_SUBMIT_BATCH,
                                         raw_batch_job_parm_item=raw_batch_job_parm_item, raw_user_job_parm_item=raw_user_job_parm_item)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageSubmitBatch )
        spot_master_sqs_message_durable.send_message( spot_master_msg.to_json(),
                                                      message_attributes=message_attributes )
        logger.info( 'Completed Successfully' )

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
示例#5
0
 def receive_test_data( self ):         
     """ """
     try:
         spot_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, region_name=self.region_name, 
                                                  profile_name=self.profile_name)
         while True:
             message = spot_sqs_message_durable.receive_message();
             if message == None: break;
             spot_master_msg = SpotMasterMsg( raw_json=message.get_body() )
             logger.info('spot_master_msg: type=' + spot_master_msg.spot_master_msg_type )
             spot_sqs_message_durable.delete_message(message)
 
     except StandardError as e:
         logger.error( e )
         logger.error( traceback.format_exc() )
         sys.exit(8)
    def process( self, message ) :
        """ Try to submit another Spot Request based on the one that just failed

        :param message: SQS Message instance

        """
        try: 
            spot_master_msg = SpotMasterMsg( raw_json=message.get_body() )
            spot_master_uuid = spot_master_msg.spot_master_uuid       
            logger.info( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'process_resubmit_failed_request')
            dynamodb_conn = boto.dynamodb2.connect_to_region( self.region_name, profile_name=self.profile_name )
            spot_master_table = Table( self.spot_master_table_name, connection=dynamodb_conn ) 
            spot_master_item = spot_master_table.get_item( spot_master_uuid=spot_master_uuid )
            spot_request_table = Table( self.spot_request_table_name, connection=dynamodb_conn ) 
            failed_spot_request_item = spot_request_table.get_item( spot_request_uuid=spot_master_msg.spot_request_uuid )
    
            # Request spot instance
            spot_instance_request = self.resubmit_failed_request_spot_instance( spot_master_item, failed_spot_request_item, dynamodb_conn )
    
            # Queue up a SpotRequestMsg     
            if spot_instance_request != None:
                spot_request_uuid = str(uuid.uuid1())
                spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, 
                                                   spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], 
                                                   spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, 
                                                   spot_request_id=spot_instance_request.id )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_instance_request.price )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ]
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = int( failed_spot_request_item[ TableSpotRequest.attempt_number ] + 1 )
                
                spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name )
                spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated ) )
                self.spot_master_sqs_message_durable.delete_message(message) 
            # No instances available - resubmit this message with a delay timer so it will get reprocessed in future
            else:
                logger.warning( fmt_master_uuid_msg_hdr( spot_master_uuid ) + 'No spot instances available, will try again in ' + str(awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES) + ' minutes')
                delay_seconds = awsspotbatch.common.const.NO_SPOT_INSTANCES_AVAILABLE_RECHECK_MINUTES * 60
                self.spot_master_sqs_message_durable.send_message( message.get_body(), 
                                                                   message_attributes=create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageResubmitFailedRequest ), 
                                                                   delay_seconds=delay_seconds )
                self.spot_master_sqs_message_durable.delete_message(message)

        except StandardError as e:
            logger.error( fmt_master_item_msg_hdr( spot_master_item ) + str(e) )
            logger.error( fmt_master_item_msg_hdr( spot_master_item ) + traceback.format_exc() )
示例#7
0
    def send_test_data( self ):         
        """ """
        try:
            spot_master_uuid = str( uuid.uuid1() )
            spot_master_msg_submit_batch = SpotMasterMsg( spot_master_uuid, SpotMasterMsg.TYPE_SUBMIT_BATCH )
            
            spot_master_msg_check_status = SpotMasterMsg( spot_master_uuid, SpotMasterMsg.TYPE_CHECK_STATUS )
            
            spot_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, region_name=self.region_name, 
                                                     profile_name=self.profile_name)
            
            spot_sqs_message_durable.send_message( spot_master_msg_submit_batch.to_json() )
            spot_sqs_message_durable.send_message( spot_master_msg_check_status.to_json() )

        except StandardError as e:
            logger.error( e )
            logger.error( traceback.format_exc() )
            sys.exit(8)
    def handle_state_master_role_policy_in_progress(self, spot_master_item, dynamodb_conn ):
        """ Verify the Policy is added to the Role 

        :param spot_master_item: 
        :param dynamodb_conn: 

        """
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_role_policy_in_progress')
        iam_conn = awsext.iam.connect_to_region( self.region_name, profile_name=self.profile_name )
        is_role_policy_added = iam_conn.is_role_policy_added( role_name=spot_master_item[ TableSpotMaster.role_name ], 
                                       policy_name=spot_master_item[ TableSpotMaster.policy_name ])
        if not is_role_policy_added: return
        # For some bizarre timing reason, is_role_policy_added can return True but the spot request fails on IAM role not attached to instance profile
        #  - give it a few seconds to clear
        time.sleep(5)
    
        spot_master_state_code = SpotMasterStateCode.waiting_for_instances_complete
        
        # Request spot instances
        spot_instance_requests = submit_request_spot_instances( spot_master_item, self.region_name, self.profile_name )
        
        # Queue up a SpotRequestMsg for each spot request - this will manage all states for SpotRequest        
        if spot_instance_requests != None:
            spot_request_sqs_message_durable = SqsMessageDurable( self.spot_request_queue_name, self.region_name, profile_name=self.profile_name )
            for spot_instance_request in spot_instance_requests:
                spot_request_uuid = str(uuid.uuid1())
                spot_request_msg = SpotRequestMsg( spot_request_uuid=spot_request_uuid, 
                                                   spot_master_uuid=spot_master_item[ TableSpotMaster.spot_master_uuid ], 
                                                   spot_request_msg_type=SpotRequestMsg.TYPE_SPOT_REQUEST_INITIATED, 
                                                   spot_request_id=spot_instance_request.id )
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_SPOT_PRICE ] = str( spot_master_item[TableSpotMaster.cheapest_price])
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_INSTANCE_USERNAME ] = spot_master_item[ TableSpotMaster.instance_username ]
                spot_request_msg.name_value_pairs[ SpotRequestMsg.PAIR_NAME_ATTEMPT_NUMBER ] = 1
                message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageSpotRequestInitiated )
                spot_request_sqs_message_durable.send_message( spot_request_msg.to_json(), message_attributes=message_attributes  )
        else: spot_master_state_code = SpotMasterStateCode.no_instances_available
        
        spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, {
                                                              TableSpotMaster.spot_master_state_code:spot_master_state_code
                                                              },
                                      region_name=self.region_name, profile_name=self.profile_name  )
    def run(self):
        """ Read Master messages, launch Master microservice based on service_class_name message attribute """
        spot_master_sqs_message_durable = SqsMessageDurable( self.spot_master_queue_name, self.region_name, profile_name=self.profile_name )
        while True:
            try:
                logger.info('SpotMasterDispatcher loop')
                if self.is_shutdown: 
                    logger.info('Shutting down SpotMasterDispatcher' )
                    break;
                
                message = spot_master_sqs_message_durable.receive_message( message_attributes=['service_class_name'])
                if message == None: continue
                
                message_attribute = message.message_attributes['service_class_name']
                service_class_name = message_attribute['string_value']
                logger.info('Launching ' +  service_class_name )
                SpotMasterMicrosvcLauncher( service_class_name, message, self ).start()

    
            except StandardError as e:
                logger.error('Exiting SpotMasterDispatcher due to exception'  )
                logger.error( e )
                logger.error( traceback.format_exc() )
def main():
    """ """
    import logging.config
    logging.config.fileConfig( '../../../../config/consoleandfile.conf', disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    try:
        logger.info( 'Starting' )
        master_parm_item = MasterParmItem( sys.argv[1] )
         
        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )
        spot_request_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_REQUEST_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )

        # TEST TEST TEST - only during development
        spot_master_sqs_message_durable.purge_queue()
        spot_request_sqs_message_durable.purge_queue()
        
        spot_master_dispatcher = SpotMasterDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        spot_request_dispatcher = SpotRequestDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        
        spot_master_dispatcher.start()
        spot_request_dispatcher.start()

        spot_master_msg_batch_submit = create_spot_master_msg_batch_submit( sys.argv[2], sys.argv[3] )
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_MASTER_CLASSNAME_SpotMasterMessageSubmitBatch )
        spot_master_sqs_message_durable.send_message( spot_master_msg_batch_submit.to_json(),
                                                      message_attributes=message_attributes )
         
        spot_master_dispatcher.join()
        
        logger.info( 'Completed Successfully' )

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
def launch_spot_batch_service():
    """ 
        Launch dispatcher on t2.micro EC2 instance, in the future this will be in AWS Lambda
        1. Multiple instances can (and should) be launched concurrently, i.e. in different AZ's
        2. Install as a service that starts at system boot, this is detailed in the README
    """
    if len(sys.argv) == 1:
        print 'ERROR: Missing log configuration file, first argument must be path/name.ext of the log configuration file'
        sys.exit(8)
    logging.config.fileConfig( sys.argv[1], disable_existing_loggers=False)
    logger = logging.getLogger(__name__)
    
    try:
        logger.info( '**********************************' )
        logger.info( 'Starting SpotBatchMgr Version: ' + awsspotbatch.Version )
        logger.info( '**********************************' )
        if len(sys.argv) == 2:
            logger.error('Missing master parm item file, second argument must be path/name.ext of master parm item json file')
            sys.exit(8)
            
        master_parm_item = MasterParmItem( sys.argv[2] )
        is_purge_queues = False
        if len(sys.argv) > 3 and sys.argv[3] == 'purge': is_purge_queues = True
         
        spot_master_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_MASTER_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )
        spot_request_sqs_message_durable = SqsMessageDurable( awsspotbatch.common.const.SPOT_REQUEST_QUEUE_NAME, master_parm_item.region_name, profile_name=master_parm_item.profile_name )

        if is_purge_queues:
            spot_master_sqs_message_durable.purge_queue()
            spot_request_sqs_message_durable.purge_queue()
        
        spot_master_dispatcher = SpotMasterDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        spot_request_dispatcher = SpotRequestDispatcher( region_name=master_parm_item.region_name, 
                                                       profile_name=master_parm_item.profile_name )
        
        spot_master_dispatcher.start()
        logger.info("Started: spot_master_dispatcher")
        spot_request_dispatcher.start()
        logger.info("Started: spot_request_dispatcher")
        
        spot_master_dispatcher.join()

    except StandardError as e:
        logger.error( e )
        logger.error( traceback.format_exc() )
        sys.exit(8)
示例#12
0
def main():     
    """ """
    if( len(sys.argv) < 2 ):
        print 'Invalid format, execution cancelled'
        print 'Correct format: python awsspotbatch.spotclientlaunch <parmFile.json>'
        sys.exit(8)
    
    logging.basicConfig( format='%(asctime)s [%(levelname)s] [%(module)s] [%(funcName)s] [%(message)s]', level=logging.INFO )
    logger = logging.getLogger(__name__)

    try:
        spot_client_parm_item = SpotClientParmItem( pathInParmFile=sys.argv[1] )    
        logger.info( 'Starting, region_name=' + spot_client_parm_item.region_name )
        spot_instance_status_thread = SpotInstanceStatusThread( 
                                                             spot_client_parm_item.spot_request_queue_name, 
                                                             spot_client_parm_item.region_name, 
                                                             spot_request_uuid=spot_client_parm_item.spot_request_uuid, 
                                                             spot_master_uuid=spot_client_parm_item.spot_master_uuid, 
                                                             spot_request_id=spot_client_parm_item.spot_request_id )
        spot_instance_status_thread.start()
        child_process = subprocess.Popen( spot_client_parm_item.script_name_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
        std_out, std_err = child_process.communicate( )
        returncode = child_process.returncode   
        std_out, std_err = awsspotbatch.common.util.trimStdOutErrSqsPayload( std_out, std_err )
        sqs_message_send_durable = SqsMessageDurable( spot_client_parm_item.spot_request_queue_name,
                                                      spot_client_parm_item.region_name)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageInstanceBatchProcessComplete )
        sqs_message_send_durable.send_message( SpotRequestMsg( spot_request_uuid=spot_client_parm_item.spot_request_uuid,
                                                               spot_master_uuid=spot_client_parm_item.spot_master_uuid,
                                                               spot_request_msg_type=SpotRequestMsg.TYPE_INSTANCE_BATCH_PROCESS_COMPLETE,
                                                               spot_request_id=spot_client_parm_item.spot_request_id,
                                                               name_value_pairs={
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP:int(time.time()),
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_RETURNCODE:str(returncode),
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_OUT:std_out,
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_STD_ERR:std_err                                                                                        
                                                                                 } ).to_json(),
                                              message_attributes=message_attributes )
        spot_instance_status_thread.shutdown()
        spot_instance_status_thread.join( 60 )
        logger.info( 'Completed Successfully, child_process returncode=' + str(returncode) )

    except StandardError as e:
        spot_instance_status_thread.is_shutdown = True;
        message = ''
        for arg in e.args:
            message = arg + '|'
        logger.error( message )
        logger.error( traceback.format_exc() )
        sqs_message_send_durable = SqsMessageDurable( spot_client_parm_item.spot_request_queue_name,
                                                      spot_client_parm_item.region_name)
        message_attributes = create_microsvc_message_attributes( awsspotbatch.common.const.MICROSVC_REQUEST_CLASSNAME_SpotRequestMessageInstanceBatchProcessStartException )
        sqs_message_send_durable.send_message( SpotRequestMsg( spot_request_uuid=spot_client_parm_item.spot_request_uuid,
                                                               spot_master_uuid=spot_client_parm_item.spot_master_uuid,
                                                               spot_request_msg_type=SpotRequestMsg.TYPE_INSTANCE_BATCH_PROCESS_START_EXCEPTION,
                                                               spot_request_id=spot_client_parm_item.spot_request_id,
                                                               name_value_pairs={
                                                                                 SpotRequestMsg.PAIR_NAME_BATCH_PROCESS_COMPLETE_TIMESTAMP:int(time.time()),
                                                                                 SpotRequestMsg.PAIR_NAME_INSTANCE_BATCH_PROCESS_START_EXCEPTION_MESSAGE:message,
                                                                                 SpotRequestMsg.PAIR_NAME_INSTANCE_BATCH_PROCESS_START_EXCEPTION_TRACEBACK:traceback.format_exc(),                                          
                                                                                 } ).to_json(),
                                              message_attributes=message_attributes  )
        spot_instance_status_thread.join( 60 )
        
        sys.exit(8)