def job_run(self, tmpdir, mock_event_bus): action_foo = action.Action('foo', action.ActionCommandConfig('command'), None) action_after_foo = action.Action('after_foo', action.ActionCommandConfig('command'), None) action_bar = action.Action('bar', action.ActionCommandConfig('command'), None, triggered_by={'trigger'}) action_graph = actiongraph.ActionGraph( action_map={ 'foo': action_foo, 'after_foo': action_after_foo, 'bar': action_bar, }, required_actions={ 'foo': set(), 'after_foo': {'foo'}, 'bar': set() }, required_triggers={ 'foo': set(), 'after_foo': set(), 'bar': {'trigger'} }, ) mock_job = mock.Mock( output_path=filehandler.OutputPath(tmpdir), action_graph=action_graph, action_runner=actioncommand.NoActionRunnerFactory(), ) job_run = jobrun.JobRun.for_job( mock_job, run_num=1, run_time=datetime.datetime.now(), node=mock.Mock(), manual=False, ) return job_run
def do_recover(self, delay): recovery_command = f"{self.action_runner.exec_path}/recover_batch.py {self.action_runner.status_path}/{self.id}/status" command_config = action.ActionCommandConfig(command=recovery_command) rendered_command = self.render_command(recovery_command) attempt = ActionRunAttempt( command_config=command_config, rendered_command=rendered_command, ) # Put the "recovery" output at the same directory level as the original action_run's output self.output_path.parts = [] # Might not need a separate action run # Using for the separate name recovery_run = SSHActionRun( job_run_id=self.job_run_id, name=f"{self.name}-recovery", node=self.node, command_config=command_config, output_path=self.output_path, ) recovery_action_command = recovery_run.build_action_command(attempt) recovery_action_command.write_stdout( f"Recovering action run {self.id}", ) # Put action command in "running" state so if it fails to connect # and exits with no exit code, the real action run will not retry. recovery_action_command.started() # this line is where the magic happens. # the action run watches another actioncommand, # and updates its internal state according to its result. self.watch(recovery_action_command) self.clear_end_state() self.machine.transition('running') # Still want the action to appear running while we're waiting to submit the recovery # So we do the delay at the end, after the transition to 'running' above if not delay: return self.submit_recovery_command(recovery_run, recovery_action_command) else: return reactor.callLater(delay, self.submit_recovery_command, recovery_run, recovery_action_command)
def from_state( cls, state_data, parent_context, output_path, job_run_node, action_graph, cleanup=False, ): """Restore the state of this ActionRun from a serialized state.""" pool_repo = node.NodePoolRepository.get_instance() # Support state from older version if 'id' in state_data: job_run_id, action_name = state_data['id'].rsplit('.', 1) else: job_run_id = state_data['job_run_id'] action_name = state_data['action_name'] job_run_node = pool_repo.get_node( state_data.get('node_name'), job_run_node, ) action_runner_data = state_data.get('action_runner') if action_runner_data: action_runner = SubprocessActionRunnerFactory(**action_runner_data) else: action_runner = NoActionRunnerFactory() action_config = action_graph.action_map.get(action_name) if action_config: command_config = action_config.command_config else: command_config = action.ActionCommandConfig(command='') attempts = cls.attempts_from_state(state_data, command_config) run = cls( job_run_id=job_run_id, name=action_name, node=job_run_node, parent_context=parent_context, output_path=output_path, command_config=command_config, original_command=state_data.get('original_command'), cleanup=cleanup, start_time=state_data['start_time'], end_time=state_data['end_time'], run_state=state_data['state'], exit_status=state_data.get('exit_status'), attempts=attempts, retries_remaining=state_data.get('retries_remaining'), retries_delay=state_data.get('retries_delay'), action_runner=action_runner, executor=state_data.get('executor', ExecutorTypes.ssh.value), trigger_downstreams=state_data.get('trigger_downstreams'), triggered_by=state_data.get('triggered_by'), on_upstream_rerun=state_data.get('on_upstream_rerun'), trigger_timeout_timestamp=state_data.get('trigger_timeout_timestamp'), ) # Transition running to fail unknown because exit status was missed # Recovery will look for unknown runs if run.is_active: run.transition_and_notify('fail_unknown') return run
def from_state(cls, state_data): state_data['command_config'] = action.ActionCommandConfig(**state_data['command_config']) return cls(**state_data)