def handle_interrupt(interrupt_code, _): logging.info( 'Executor interrupted with code {}'.format(interrupt_code)) cio.print_and_log( 'Received kill for task {} with grace period of {}'.format( executor_id, config.shutdown_grace_period)) stop_signal.set() non_zero_exit_signal.set() cu.print_memory_usage()
def killTask(self, driver, task_id): logging.info( 'Mesos requested executor to kill task {}'.format(task_id)) task_id_str = task_id['value'] if 'value' in task_id else task_id grace_period = os.environ.get('MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD', '') cio.print_and_log( 'Received kill for task {} with grace period of {}'.format( task_id_str, grace_period)) self.stop_signal.set()
def manage_task(driver, task, stop_signal, completed_signal, config): """Manages the execution of a task waiting for it to terminate normally or be killed. It also sends the task status updates, sandbox location and exit code back to the scheduler. Progress updates are tracked on a separate thread and are also sent to the scheduler. Setting the stop_signal will trigger termination of the task and associated cleanup. Returns ------- Nothing """ launched_process = None task_id = get_task_id(task) cio.print_and_log('Starting task {}'.format(task_id)) status_updater = StatusUpdater(driver, task_id) inner_os_error_handler = functools.partial(os_error_handler, stop_signal, status_updater) try: # not yet started to run the task status_updater.update_status(cook.TASK_STARTING) # Use MESOS_DIRECTORY instead of MESOS_SANDBOX, to report the sandbox location outside of the container sandbox_message = { 'sandbox-directory': config.mesos_directory, 'task-id': task_id, 'type': 'directory' } send_message(driver, inner_os_error_handler, sandbox_message) environment = retrieve_process_environment(config, task, os.environ) launched_process = launch_task(task, environment) if launched_process: # task has begun running successfully status_updater.update_status(cook.TASK_RUNNING) cio.print_and_log('Forked command at {}'.format( launched_process.pid)) else: # task launch failed, report an error logging.error('Error in launching task') status_updater.update_status(cook.TASK_ERROR, reason=cook.REASON_TASK_INVALID) return task_completed_signal = Event( ) # event to track task execution completion sequence_counter = cp.ProgressSequenceCounter() send_progress_message = functools.partial(send_message, driver, inner_os_error_handler) max_message_length = config.max_message_length sample_interval_ms = config.progress_sample_interval_ms progress_updater = cp.ProgressUpdater(task_id, max_message_length, sample_interval_ms, send_progress_message) progress_termination_signal = Event() def launch_progress_tracker(progress_location, location_tag): logging.info('Location {} tagged as [tag={}]'.format( progress_location, location_tag)) progress_tracker = cp.ProgressTracker( config, stop_signal, task_completed_signal, sequence_counter, progress_updater, progress_termination_signal, progress_location, location_tag, inner_os_error_handler) progress_tracker.start() return progress_tracker progress_locations = { config.progress_output_name: 'progress', config.stderr_file(): 'stderr', config.stdout_file(): 'stdout' } logging.info('Progress will be tracked from {} locations'.format( len(progress_locations))) progress_trackers = [ launch_progress_tracker(l, progress_locations[l]) for l in progress_locations ] await_process_completion(launched_process, stop_signal, config.shutdown_grace_period_ms) task_completed_signal.set() progress_termination_timer = Timer( config.shutdown_grace_period_ms / 1000.0, progress_termination_signal.set) progress_termination_timer.daemon = True progress_termination_timer.start() # propagate the exit code exit_code = launched_process.returncode cio.print_and_log('Command exited with status {} (pid: {})'.format( exit_code, launched_process.pid)) exit_message = {'exit-code': exit_code, 'task-id': task_id} send_message(driver, inner_os_error_handler, exit_message) # await progress updater termination if executor is terminating normally if not stop_signal.isSet(): logging.info('Awaiting completion of progress updaters') [progress_tracker.wait() for progress_tracker in progress_trackers] logging.info('Progress updaters completed') # force send the latest progress state if available [ progress_tracker.force_send_progress_update() for progress_tracker in progress_trackers ] # task either completed successfully or aborted with an error task_state = get_task_state(exit_code) output_task_completion(task_id, task_state) status_updater.update_status(task_state) except Exception as exception: if cu.is_out_of_memory_error(exception): inner_os_error_handler(exception) else: # task aborted with an error logging.exception('Error in executing task') output_task_completion(task_id, cook.TASK_FAILED) status_updater.update_status( cook.TASK_FAILED, reason=cook.REASON_EXECUTOR_TERMINATED) finally: # ensure completed_signal is set so driver can stop completed_signal.set() if launched_process and cs.is_process_running(launched_process): cs.send_signal(launched_process.pid, signal.SIGKILL)
def output_task_completion(task_id, task_state): """Prints and logs the executor completion message.""" cio.print_and_log('Executor completed execution of {} (state={})'.format( task_id, task_state))