Пример #1
0
    def fake_action_runs(self):
        mock_unknown_machine = Mock(autospec=True)
        mock_ok_machine = Mock(autospec=True)

        mock_unknown_machine.state = ActionRun.UNKNOWN
        mock_ok_machine.state = ActionRun.SUCCEEDED
        self.action_runs = [
            SSHActionRun(
                job_run_id="test.unknown",
                name="test.unknown",
                node=Mock(),
                machine=mock_unknown_machine,
            ),
            SSHActionRun(
                job_run_id="test.succeeded",
                name="test.succeeded",
                node=Mock(),
                machine=mock_ok_machine,
            ),
            MesosActionRun(
                job_run_id="test.succeeded",
                name="test.succeeded",
                node=Mock(),
                machine=mock_ok_machine,
            ),
        ]
Пример #2
0
def recover_action_run(action_run, action_runner):
    log.info("creating recovery run for actionrun %s" % action_run.id)
    if type(action_runner) == NoActionRunnerFactory:
        log.info(
            "unable to recover action_run %s: action_run has no action_runner"
            % action_run.id,
        )
        return None

    recovery_run = SSHActionRun(
        job_run_id=action_run.job_run_id,
        name="recovery-%s" % action_run.id,
        node=action_run.node,
        bare_command=build_recovery_command(
            recovery_binary="%s/recover_batch.py" % (action_runner.exec_path),
            path="%s/%s/status" % (
                action_runner.status_path,
                action_run.id,
            ),
        ),
        output_path=action_run.output_path,
    )
    recovery_action_command = recovery_run.build_action_command()
    recovery_action_command.write_stdout(
        "recovering action run %s" % action_run.id,
    )
    # Put action command in "running" state so if it fails to connect
    # and exits with no exit code, the real action run will not retry.
    recovery_action_command.started()

    # this line is where the magic happens.
    # the action run watches another actioncommand,
    # and updates its internal state according to its result.
    action_run.watch(recovery_action_command)

    if not action_run.machine.check('running'):
        log.error(
            'unable to transition action run %s from %s to start' %
            (action_run.id, action_run.machine.state)
        )
    else:
        action_run.exit_status = None
        action_run.end_time = None
        action_run.machine.transition('running')

    log.info(
        "submitting recovery job with command %s to node %s" % (
            recovery_action_command.command,
            recovery_run.node,
        )
    )
    deferred = recovery_run.node.submit_command(recovery_action_command)
    deferred.addCallback(
        lambda x: log.info("completed recovery run %s" % recovery_run.id)
    )
    return deferred
Пример #3
0
def recover_action_run(action_run, action_runner):
    log.info("creating recovery run for actionrun %s" % action_run.id)
    if type(action_runner) == NoActionRunnerFactory:
        log.info(
            "unable to recover action_run %s: action_run has no action_runner"
            % action_run.id,
        )
        return None

    recovery_run = SSHActionRun(
        job_run_id=action_run.job_run_id,
        name="recovery-%s" % action_run.id,
        node=action_run.node,
        bare_command=build_recovery_command(
            recovery_binary="%s/recover_batch.py" % (action_runner.exec_path),
            path="%s/%s/status" % (
                action_runner.status_path,
                action_run.id,
            ),
        ),
        output_path=action_run.output_path,
    )
    recovery_action_command = recovery_run.build_action_command()
    recovery_action_command.write_stdout(
        "recovering action run %s" % action_run.id,
    )

    # this line is where the magic happens.
    # the action run watches another actioncommand,
    # and updates its internal state according to its result.
    action_run.watch(recovery_action_command)

    if not action_run.machine.check('running'):
        log.error(
            'unable to transition action run %s from %s to start' %
            (action_run.id, action_run.machine.state)
        )
    else:
        action_run.exit_status = None
        action_run.end_time = None
        action_run.machine.transition('running')

    log.info(
        "submitting recovery job with command %s to node %s" % (
            recovery_action_command.command,
            recovery_run.node,
        )
    )
    deferred = recovery_run.node.submit_command(recovery_action_command)
    deferred.addCallback(
        lambda x: log.info("completed recovery run %s" % recovery_run.id)
    )
    return deferred
Пример #4
0
 def setup_action_run(self):
     self.output_path = filehandler.OutputPath(tempfile.mkdtemp())
     self.action_runner = mock.create_autospec(
         actioncommand.NoActionRunnerFactory, )
     self.command = "do command {actionname}"
     self.action_run = SSHActionRun(
         job_run_id="id",
         name="action_name",
         node=mock.create_autospec(node.Node),
         bare_command=self.command,
         output_path=self.output_path,
         action_runner=self.action_runner,
     )
Пример #5
0
def recover_action_run(action_run, action_runner):
    log.info(f"Creating recovery run for actionrun {action_run.id}")
    if type(action_runner) == NoActionRunnerFactory:
        log.info(
            f"Unable to recover action_run {action_run.id}: "
            "action_run has no action_runner"
        )
        return None

    recovery_run = SSHActionRun(
        job_run_id=action_run.job_run_id,
        name=f"recovery-{action_run.id}",
        node=action_run.node,
        bare_command=build_recovery_command(
            recovery_binary=f"{action_runner.exec_path}/recover_batch.py",
            path=f"{action_runner.status_path}/{action_run.id}/status",
        ),
        output_path=action_run.output_path,
    )
    recovery_action_command = recovery_run.build_action_command()
    recovery_action_command.write_stdout(
        f"Recovering action run {action_run.id}",
    )
    # Put action command in "running" state so if it fails to connect
    # and exits with no exit code, the real action run will not retry.
    recovery_action_command.started()

    # this line is where the magic happens.
    # the action run watches another actioncommand,
    # and updates its internal state according to its result.
    action_run.watch(recovery_action_command)

    if not action_run.machine.check('running'):
        log.error(
            f'Unable to transition action run {action_run.id} '
            f'from {action_run.machine.state} to start'
        )
    else:
        action_run.exit_status = None
        action_run.end_time = None
        action_run.machine.transition('running')

    log.info(
        f"Submitting recovery job with command {recovery_action_command.command} "
        f"to node {recovery_run.node}"
    )
    deferred = recovery_run.node.submit_command(recovery_action_command)
    deferred.addCallback(
        lambda x: log.info(f"Completed recovery run {recovery_run.id}")
    )
    return deferred
Пример #6
0
 def test_recover_action_run_no_action_runner(self):
     no_action_runner = SSHActionRun(
         job_run_id="test.succeeded",
         name="test.succeeded",
         node=Mock(),
     )
     assert recover_action_run(no_action_runner,
                               no_action_runner.action_runner) is None
Пример #7
0
    def fake_action_runs(self):
        mock_unknown_machine = Mock(autospec=True)
        mock_ok_machine = Mock(autospec=True)

        mock_unknown_machine.state = ActionRun.UNKNOWN
        mock_ok_machine.state = ActionRun.SUCCEEDED
        self.action_runs = [
            SSHActionRun(
                job_run_id="test.unknown",
                name="test.unknown",
                node=Mock(),
                command_config=Mock(),
                machine=mock_unknown_machine,
                end_time=timeutils.current_time(),
            ),
            SSHActionRun(
                job_run_id="test.succeeded",
                name="test.succeeded",
                node=Mock(),
                command_config=Mock(),
                machine=mock_ok_machine,
            ),
            MesosActionRun(
                job_run_id="test.succeeded",
                name="test.succeeded",
                node=Mock(),
                command_config=Mock(),
                machine=mock_ok_machine,
            ),
            MesosActionRun(
                job_run_id="test.unknown-mesos",
                name="test.unknown-mesos",
                node=Mock(),
                command_config=Mock(),
                machine=mock_unknown_machine,
            ),
            MesosActionRun(
                job_run_id="test.unknown-mesos-done",
                name="test.unknown-mesos-done",
                node=Mock(),
                command_config=Mock(),
                machine=mock_unknown_machine,
                end_time=timeutils.current_time(),
            ),
        ]
Пример #8
0
 def test_recover_action_run_action_runner(self):
     action_runner = SubprocessActionRunnerFactory(
         status_path='/tmp/foo',
         exec_path='/bin/foo',
     )
     mock_node = mock.Mock()
     action_run = SSHActionRun(job_run_id="test.succeeded",
                               name="test.succeeded",
                               node=mock_node,
                               action_runner=action_runner,
                               end_time=timeutils.current_time(),
                               exit_status=0)
     action_run.machine.state = ActionRun.UNKNOWN
     recover_action_run(action_run, action_runner)
     mock_node.submit_command.assert_called_once()
     assert action_run.machine.state == ActionRun.RUNNING
     assert action_run.end_time is None
     assert action_run.exit_status is None
Пример #9
0
class TestSSHActionRun(TestCase):
    @setup
    def setup_action_run(self):
        self.output_path = filehandler.OutputPath(tempfile.mkdtemp())
        self.action_runner = mock.create_autospec(
            actioncommand.NoActionRunnerFactory, )
        self.command = "do command {actionname}"
        self.action_run = SSHActionRun(
            job_run_id="id",
            name="action_name",
            node=mock.create_autospec(node.Node),
            bare_command=self.command,
            output_path=self.output_path,
            action_runner=self.action_runner,
        )

    @teardown
    def teardown_action_run(self):
        shutil.rmtree(self.output_path.base, ignore_errors=True)

    def test_start_node_error(self):
        def raise_error(c):
            raise node.Error("The error")

        self.action_run.node = mock.MagicMock()
        self.action_run.node.submit_command.side_effect = raise_error
        self.action_run.machine.transition('ready')
        assert not self.action_run.start()
        assert_equal(self.action_run.exit_status, -2)
        assert self.action_run.is_failed

    @mock.patch('tron.core.actionrun.filehandler', autospec=True)
    def test_build_action_command(self, mock_filehandler):
        autospec_method(self.action_run.watch)
        serializer = mock_filehandler.OutputStreamSerializer.return_value
        action_command = self.action_run.build_action_command()
        assert_equal(action_command, self.action_run.action_command)
        assert_equal(action_command, self.action_runner.create.return_value)
        self.action_runner.create.assert_called_with(
            self.action_run.id,
            self.action_run.command,
            serializer,
        )
        mock_filehandler.OutputStreamSerializer.assert_called_with(
            self.action_run.output_path, )
        self.action_run.watch.assert_called_with(action_command)

    def test_auto_retry(self):
        self.action_run.retries_remaining = 2
        self.action_run.exit_statuses = []
        self.action_run.build_action_command()
        self.action_run.action_command.exit_status = -1
        self.action_run.machine.transition('start')

        self.action_run._exit_unsuccessful(-1)
        assert self.action_run.retries_remaining == 1
        assert not self.action_run.is_failed

        self.action_run._exit_unsuccessful(-1)
        assert self.action_run.retries_remaining == 0
        assert not self.action_run.is_failed

        self.action_run._exit_unsuccessful(-1)
        assert self.action_run.retries_remaining == 0
        assert self.action_run.is_failed

        assert_equal(self.action_run.exit_statuses, [-1, -1])

    def test_no_auto_retry_on_fail_not_running(self):
        self.action_run.retries_remaining = 2
        self.action_run.exit_statuses = []
        self.action_run.build_action_command()

        self.action_run.fail()
        assert self.action_run.retries_remaining == -1
        assert self.action_run.is_failed

        assert_equal(self.action_run.exit_statuses, [])
        assert_equal(self.action_run.exit_status, None)

    def test_no_auto_retry_on_fail_running(self):
        self.action_run.retries_remaining = 2
        self.action_run.exit_statuses = []
        self.action_run.build_action_command()
        self.action_run.machine.transition('start')

        self.action_run.fail()
        assert self.action_run.retries_remaining == -1
        assert self.action_run.is_failed

        assert_equal(self.action_run.exit_statuses, [])
        assert_equal(self.action_run.exit_status, None)

    def test_manual_retry(self):
        self.action_run.retries_remaining = None
        self.action_run.exit_statuses = []
        self.action_run.build_action_command()
        self.action_run.action_command.exit_status = -1
        self.action_run.machine.transition('start')
        self.action_run.fail(-1)
        self.action_run.retry()
        self.action_run.is_running
        assert_equal(self.action_run.exit_statuses, [-1])
        assert_equal(self.action_run.retries_remaining, 0)

    @mock.patch('twisted.internet.reactor.callLater', autospec=True)
    def test_retries_delay(self, callLater):
        self.action_run.retries_delay = datetime.timedelta()
        self.action_run.retries_remaining = 2
        self.action_run.build_action_command()
        self.action_run.action_command.exit_status = -1
        self.action_run.machine.transition('start')
        callLater.return_value = "delayed call"
        self.action_run._exit_unsuccessful(-1)
        assert self.action_run.in_delay == "delayed call"

    def test_handler_running(self):
        self.action_run.build_action_command()
        self.action_run.machine.transition('start')
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.RUNNING,
        )
        assert self.action_run.is_running

    def test_handler_failstart(self):
        self.action_run.build_action_command()
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.FAILSTART,
        )
        assert self.action_run.is_failed

    def test_handler_exiting_fail(self):
        self.action_run.build_action_command()
        self.action_run.action_command.exit_status = -1
        self.action_run.machine.transition('start')
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.EXITING,
        )
        assert self.action_run.is_failed
        assert_equal(self.action_run.exit_status, -1)

    def test_handler_exiting_success(self):
        self.action_run.build_action_command()
        self.action_run.action_command.exit_status = 0
        self.action_run.machine.transition('start')
        self.action_run.machine.transition('started')
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.EXITING,
        )
        assert self.action_run.is_succeeded
        assert_equal(self.action_run.exit_status, 0)

    def test_handler_exiting_failunknown(self):
        self.action_run.action_command = mock.create_autospec(
            actioncommand.ActionCommand,
            exit_status=None,
        )
        self.action_run.machine.transition('start')
        self.action_run.machine.transition('started')
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.EXITING,
        )
        assert self.action_run.is_unknown
        assert_equal(self.action_run.exit_status, None)

    def test_handler_unhandled(self):
        self.action_run.build_action_command()
        assert self.action_run.handler(
            self.action_run.action_command,
            ActionCommand.PENDING,
        ) is None
        assert self.action_run.is_scheduled