def fake_action_runs(self): mock_unknown_machine = Mock(autospec=True) mock_ok_machine = Mock(autospec=True) mock_unknown_machine.state = ActionRun.UNKNOWN mock_ok_machine.state = ActionRun.SUCCEEDED self.action_runs = [ SSHActionRun( job_run_id="test.unknown", name="test.unknown", node=Mock(), machine=mock_unknown_machine, ), SSHActionRun( job_run_id="test.succeeded", name="test.succeeded", node=Mock(), machine=mock_ok_machine, ), MesosActionRun( job_run_id="test.succeeded", name="test.succeeded", node=Mock(), machine=mock_ok_machine, ), ]
def recover_action_run(action_run, action_runner): log.info("creating recovery run for actionrun %s" % action_run.id) if type(action_runner) == NoActionRunnerFactory: log.info( "unable to recover action_run %s: action_run has no action_runner" % action_run.id, ) return None recovery_run = SSHActionRun( job_run_id=action_run.job_run_id, name="recovery-%s" % action_run.id, node=action_run.node, bare_command=build_recovery_command( recovery_binary="%s/recover_batch.py" % (action_runner.exec_path), path="%s/%s/status" % ( action_runner.status_path, action_run.id, ), ), output_path=action_run.output_path, ) recovery_action_command = recovery_run.build_action_command() recovery_action_command.write_stdout( "recovering action run %s" % action_run.id, ) # Put action command in "running" state so if it fails to connect # and exits with no exit code, the real action run will not retry. recovery_action_command.started() # this line is where the magic happens. # the action run watches another actioncommand, # and updates its internal state according to its result. action_run.watch(recovery_action_command) if not action_run.machine.check('running'): log.error( 'unable to transition action run %s from %s to start' % (action_run.id, action_run.machine.state) ) else: action_run.exit_status = None action_run.end_time = None action_run.machine.transition('running') log.info( "submitting recovery job with command %s to node %s" % ( recovery_action_command.command, recovery_run.node, ) ) deferred = recovery_run.node.submit_command(recovery_action_command) deferred.addCallback( lambda x: log.info("completed recovery run %s" % recovery_run.id) ) return deferred
def recover_action_run(action_run, action_runner): log.info("creating recovery run for actionrun %s" % action_run.id) if type(action_runner) == NoActionRunnerFactory: log.info( "unable to recover action_run %s: action_run has no action_runner" % action_run.id, ) return None recovery_run = SSHActionRun( job_run_id=action_run.job_run_id, name="recovery-%s" % action_run.id, node=action_run.node, bare_command=build_recovery_command( recovery_binary="%s/recover_batch.py" % (action_runner.exec_path), path="%s/%s/status" % ( action_runner.status_path, action_run.id, ), ), output_path=action_run.output_path, ) recovery_action_command = recovery_run.build_action_command() recovery_action_command.write_stdout( "recovering action run %s" % action_run.id, ) # this line is where the magic happens. # the action run watches another actioncommand, # and updates its internal state according to its result. action_run.watch(recovery_action_command) if not action_run.machine.check('running'): log.error( 'unable to transition action run %s from %s to start' % (action_run.id, action_run.machine.state) ) else: action_run.exit_status = None action_run.end_time = None action_run.machine.transition('running') log.info( "submitting recovery job with command %s to node %s" % ( recovery_action_command.command, recovery_run.node, ) ) deferred = recovery_run.node.submit_command(recovery_action_command) deferred.addCallback( lambda x: log.info("completed recovery run %s" % recovery_run.id) ) return deferred
def setup_action_run(self): self.output_path = filehandler.OutputPath(tempfile.mkdtemp()) self.action_runner = mock.create_autospec( actioncommand.NoActionRunnerFactory, ) self.command = "do command {actionname}" self.action_run = SSHActionRun( job_run_id="id", name="action_name", node=mock.create_autospec(node.Node), bare_command=self.command, output_path=self.output_path, action_runner=self.action_runner, )
def recover_action_run(action_run, action_runner): log.info(f"Creating recovery run for actionrun {action_run.id}") if type(action_runner) == NoActionRunnerFactory: log.info( f"Unable to recover action_run {action_run.id}: " "action_run has no action_runner" ) return None recovery_run = SSHActionRun( job_run_id=action_run.job_run_id, name=f"recovery-{action_run.id}", node=action_run.node, bare_command=build_recovery_command( recovery_binary=f"{action_runner.exec_path}/recover_batch.py", path=f"{action_runner.status_path}/{action_run.id}/status", ), output_path=action_run.output_path, ) recovery_action_command = recovery_run.build_action_command() recovery_action_command.write_stdout( f"Recovering action run {action_run.id}", ) # Put action command in "running" state so if it fails to connect # and exits with no exit code, the real action run will not retry. recovery_action_command.started() # this line is where the magic happens. # the action run watches another actioncommand, # and updates its internal state according to its result. action_run.watch(recovery_action_command) if not action_run.machine.check('running'): log.error( f'Unable to transition action run {action_run.id} ' f'from {action_run.machine.state} to start' ) else: action_run.exit_status = None action_run.end_time = None action_run.machine.transition('running') log.info( f"Submitting recovery job with command {recovery_action_command.command} " f"to node {recovery_run.node}" ) deferred = recovery_run.node.submit_command(recovery_action_command) deferred.addCallback( lambda x: log.info(f"Completed recovery run {recovery_run.id}") ) return deferred
def test_recover_action_run_no_action_runner(self): no_action_runner = SSHActionRun( job_run_id="test.succeeded", name="test.succeeded", node=Mock(), ) assert recover_action_run(no_action_runner, no_action_runner.action_runner) is None
def fake_action_runs(self): mock_unknown_machine = Mock(autospec=True) mock_ok_machine = Mock(autospec=True) mock_unknown_machine.state = ActionRun.UNKNOWN mock_ok_machine.state = ActionRun.SUCCEEDED self.action_runs = [ SSHActionRun( job_run_id="test.unknown", name="test.unknown", node=Mock(), command_config=Mock(), machine=mock_unknown_machine, end_time=timeutils.current_time(), ), SSHActionRun( job_run_id="test.succeeded", name="test.succeeded", node=Mock(), command_config=Mock(), machine=mock_ok_machine, ), MesosActionRun( job_run_id="test.succeeded", name="test.succeeded", node=Mock(), command_config=Mock(), machine=mock_ok_machine, ), MesosActionRun( job_run_id="test.unknown-mesos", name="test.unknown-mesos", node=Mock(), command_config=Mock(), machine=mock_unknown_machine, ), MesosActionRun( job_run_id="test.unknown-mesos-done", name="test.unknown-mesos-done", node=Mock(), command_config=Mock(), machine=mock_unknown_machine, end_time=timeutils.current_time(), ), ]
def test_recover_action_run_action_runner(self): action_runner = SubprocessActionRunnerFactory( status_path='/tmp/foo', exec_path='/bin/foo', ) mock_node = mock.Mock() action_run = SSHActionRun(job_run_id="test.succeeded", name="test.succeeded", node=mock_node, action_runner=action_runner, end_time=timeutils.current_time(), exit_status=0) action_run.machine.state = ActionRun.UNKNOWN recover_action_run(action_run, action_runner) mock_node.submit_command.assert_called_once() assert action_run.machine.state == ActionRun.RUNNING assert action_run.end_time is None assert action_run.exit_status is None
class TestSSHActionRun(TestCase): @setup def setup_action_run(self): self.output_path = filehandler.OutputPath(tempfile.mkdtemp()) self.action_runner = mock.create_autospec( actioncommand.NoActionRunnerFactory, ) self.command = "do command {actionname}" self.action_run = SSHActionRun( job_run_id="id", name="action_name", node=mock.create_autospec(node.Node), bare_command=self.command, output_path=self.output_path, action_runner=self.action_runner, ) @teardown def teardown_action_run(self): shutil.rmtree(self.output_path.base, ignore_errors=True) def test_start_node_error(self): def raise_error(c): raise node.Error("The error") self.action_run.node = mock.MagicMock() self.action_run.node.submit_command.side_effect = raise_error self.action_run.machine.transition('ready') assert not self.action_run.start() assert_equal(self.action_run.exit_status, -2) assert self.action_run.is_failed @mock.patch('tron.core.actionrun.filehandler', autospec=True) def test_build_action_command(self, mock_filehandler): autospec_method(self.action_run.watch) serializer = mock_filehandler.OutputStreamSerializer.return_value action_command = self.action_run.build_action_command() assert_equal(action_command, self.action_run.action_command) assert_equal(action_command, self.action_runner.create.return_value) self.action_runner.create.assert_called_with( self.action_run.id, self.action_run.command, serializer, ) mock_filehandler.OutputStreamSerializer.assert_called_with( self.action_run.output_path, ) self.action_run.watch.assert_called_with(action_command) def test_auto_retry(self): self.action_run.retries_remaining = 2 self.action_run.exit_statuses = [] self.action_run.build_action_command() self.action_run.action_command.exit_status = -1 self.action_run.machine.transition('start') self.action_run._exit_unsuccessful(-1) assert self.action_run.retries_remaining == 1 assert not self.action_run.is_failed self.action_run._exit_unsuccessful(-1) assert self.action_run.retries_remaining == 0 assert not self.action_run.is_failed self.action_run._exit_unsuccessful(-1) assert self.action_run.retries_remaining == 0 assert self.action_run.is_failed assert_equal(self.action_run.exit_statuses, [-1, -1]) def test_no_auto_retry_on_fail_not_running(self): self.action_run.retries_remaining = 2 self.action_run.exit_statuses = [] self.action_run.build_action_command() self.action_run.fail() assert self.action_run.retries_remaining == -1 assert self.action_run.is_failed assert_equal(self.action_run.exit_statuses, []) assert_equal(self.action_run.exit_status, None) def test_no_auto_retry_on_fail_running(self): self.action_run.retries_remaining = 2 self.action_run.exit_statuses = [] self.action_run.build_action_command() self.action_run.machine.transition('start') self.action_run.fail() assert self.action_run.retries_remaining == -1 assert self.action_run.is_failed assert_equal(self.action_run.exit_statuses, []) assert_equal(self.action_run.exit_status, None) def test_manual_retry(self): self.action_run.retries_remaining = None self.action_run.exit_statuses = [] self.action_run.build_action_command() self.action_run.action_command.exit_status = -1 self.action_run.machine.transition('start') self.action_run.fail(-1) self.action_run.retry() self.action_run.is_running assert_equal(self.action_run.exit_statuses, [-1]) assert_equal(self.action_run.retries_remaining, 0) @mock.patch('twisted.internet.reactor.callLater', autospec=True) def test_retries_delay(self, callLater): self.action_run.retries_delay = datetime.timedelta() self.action_run.retries_remaining = 2 self.action_run.build_action_command() self.action_run.action_command.exit_status = -1 self.action_run.machine.transition('start') callLater.return_value = "delayed call" self.action_run._exit_unsuccessful(-1) assert self.action_run.in_delay == "delayed call" def test_handler_running(self): self.action_run.build_action_command() self.action_run.machine.transition('start') assert self.action_run.handler( self.action_run.action_command, ActionCommand.RUNNING, ) assert self.action_run.is_running def test_handler_failstart(self): self.action_run.build_action_command() assert self.action_run.handler( self.action_run.action_command, ActionCommand.FAILSTART, ) assert self.action_run.is_failed def test_handler_exiting_fail(self): self.action_run.build_action_command() self.action_run.action_command.exit_status = -1 self.action_run.machine.transition('start') assert self.action_run.handler( self.action_run.action_command, ActionCommand.EXITING, ) assert self.action_run.is_failed assert_equal(self.action_run.exit_status, -1) def test_handler_exiting_success(self): self.action_run.build_action_command() self.action_run.action_command.exit_status = 0 self.action_run.machine.transition('start') self.action_run.machine.transition('started') assert self.action_run.handler( self.action_run.action_command, ActionCommand.EXITING, ) assert self.action_run.is_succeeded assert_equal(self.action_run.exit_status, 0) def test_handler_exiting_failunknown(self): self.action_run.action_command = mock.create_autospec( actioncommand.ActionCommand, exit_status=None, ) self.action_run.machine.transition('start') self.action_run.machine.transition('started') assert self.action_run.handler( self.action_run.action_command, ActionCommand.EXITING, ) assert self.action_run.is_unknown assert_equal(self.action_run.exit_status, None) def test_handler_unhandled(self): self.action_run.build_action_command() assert self.action_run.handler( self.action_run.action_command, ActionCommand.PENDING, ) is None assert self.action_run.is_scheduled