示例#1
0
    def test_report_with_failed_exit_message_argument(self):
        msg = Exited(Job({"name": "job1"}, 0), 1).with_error("massive_failure")
        self.reporter.status_dict = self.reporter._init_job_status_dict(
            msg.timestamp, 0, [msg.job])

        self.reporter.report(msg)

        with open(self.reporter.STATUS_file, "r") as f:
            self.assertIn("EXIT: {}/{}".format(1, "massive_failure"),
                          f.readline())
        with open(self.reporter.ERROR_file, "r") as f:
            contents = "".join(f.readlines())
            self.assertIn("<job>job1</job>", contents,
                          "ERROR file missing job")
            self.assertIn(
                "<reason>massive_failure</reason>",
                contents,
                "ERROR file missing reason",
            )
            self.assertIn(
                "<stderr: Not redirected>",
                contents,
                "ERROR had invalid stderr information",
            )
        with open(self.reporter.STATUS_json, "r") as f:
            contents = "".join(f.readlines())
            self.assertIn('"status": "Failure"', contents,
                          "status.json missing Failure status")
            self.assertIn(
                '"error": "massive_failure"',
                contents,
                "status.json missing error message",
            )
        self.assertIsNotNone(self.reporter.status_dict["jobs"][0]["end_time"])
示例#2
0
    def test_exited_success_msg(self, post_mock):
        self.reporter.start_time = dt.now()

        self.reporter.report(Exited(None, 9))
        _, data = post_mock.call_args

        self.assertTrue(post_mock.called)
        self.assertIn('"status": "OK"', data["data"])
示例#3
0
    def test_report_with_successful_exit_message_argument(self):
        msg = Exited(Job({"name": "job1"}, 0), 0)
        self.reporter.status_dict = self.reporter._init_job_status_dict(
            msg.timestamp, 0, [msg.job])

        self.reporter.report(msg)

        with open(self.reporter.STATUS_json, "r") as f:
            contents = "".join(f.readlines())
            self.assertIn('"status": "Success"', contents,
                          "status.json missing Success status")
示例#4
0
def test_report_with_successful_exit_message_argument(tmpdir):
    reporter = Event(event_log=tmpdir / "event_log")
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)
    reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, stage_id=0))
    reporter.report(Exited(job1, 0))

    with open(reporter._event_log, "r") as f:
        lines = f.readlines()
        assert len(lines) == 2
        event = json.loads(lines[1])
        assert event["type"] == _FM_JOB_SUCCESS
示例#5
0
    def test_status_file_is_correct(self):
        """The STATUS file is a file to which we append data about jobs as they
        are run. So this involves multiple reports, and should be tested as
        such.
        See https://github.com/equinor/libres/issues/764
        """
        j_1 = Job({"name": "j_1", "executable": "", "argList": []}, 0)
        j_2 = Job({"name": "j_2", "executable": "", "argList": []}, 0)
        init = Init([j_1, j_2], 1, 1)
        start_j_1 = Start(j_1)
        exited_j_1 = Exited(j_1, 0)
        start_j_2 = Start(j_2)
        exited_j_2 = Exited(j_2, 9).with_error("failed horribly")

        for msg in [init, start_j_1, exited_j_1, start_j_2, exited_j_2]:
            self.reporter.report(msg)

        expected_j1_line = (
            "{:32}: {start_ts:%H:%M:%S} .... {end_ts:%H:%M:%S}  \n".
            format(  # noqa
                j_1.name(),
                start_ts=start_j_1.timestamp,
                end_ts=exited_j_1.timestamp))
        expected_j2_line = "{:32}: {start_ts:%H:%M:%S} .... {end_ts:%H:%M:%S}   EXIT: {code}/{msg}\n".format(  # noqa
            j_2.name(),
            start_ts=start_j_2.timestamp,
            end_ts=exited_j_2.timestamp,
            code=exited_j_2.exit_code,
            msg=exited_j_2.error_message,
        )

        with open(self.reporter.STATUS_file, "r") as f:
            for expected in [
                    "Current host",
                    expected_j1_line,
                    expected_j2_line,
            ]:  # noqa
                self.assertIn(expected, f.readline())

            # EOF
            self.assertEqual("", f.readline())
示例#6
0
def test_report_with_failed_exit_message_argument(tmpdir):
    reporter = Event(event_log=tmpdir / "event_log")
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)
    reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, stage_id=0))
    reporter.report(Exited(job1, 1).with_error("massive_failure"))

    with open(reporter._event_log, "r") as f:
        lines = f.readlines()
        assert len(lines) == 2
        event = json.loads(lines[1])
        assert event["type"] == _FM_JOB_FAILURE
        assert event["data"]["error_msg"] == "massive_failure"
示例#7
0
    def test_failed_job_is_reported(self, post_mock):
        self.reporter.start_time = dt.now()
        job = Job(
            {
                "name": "failing job",
                "executable": "/dev/null",
                "argList": []
            }, 0)

        self.reporter.report(Exited(job, 9).with_error("failed"))
        _, data = post_mock.call_args

        self.assertTrue(post_mock.called, "post not called for failed Exit")
        self.assertIn('"status": "exit"', data["data"], "no exit in data")
        self.assertIn('"error": true', data["data"], "no set err flag in data")
示例#8
0
def test_report_with_successful_exit_message_argument(unused_tcp_port):
    host = "localhost"
    url = f"ws://{host}:{unused_tcp_port}"
    reporter = Event(evaluator_url=url)
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    lines = []
    with _mock_ws_thread(host, unused_tcp_port, lines):
        reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, step_id=0))
        reporter.report(Exited(job1, 0))
        reporter.report(Finish().with_error("failed"))

    assert len(lines) == 1
    event = json.loads(lines[0])
    assert event["type"] == _FM_JOB_SUCCESS
示例#9
0
    def test_successful_job_not_reported(self, post_mock):
        self.reporter.report(Exited(None, 9))

        self.assertFalse(post_mock.called, "post called on successful Exit")
示例#10
0
    def run(self):
        start_message = Start(self)

        errors = self._check_job_files()

        errors.extend(self._assert_arg_list())

        self._dump_exec_env()

        if errors:
            yield start_message.with_error("\n".join(errors))
            return

        yield start_message

        executable = self.job_data.get("executable")
        assert_file_executable(executable)

        arg_list = [executable]
        if self.job_data.get("argList"):
            arg_list += self.job_data["argList"]

        if self.job_data.get("stdin"):
            stdin = open(self.job_data.get("stdin"))
        else:
            stdin = None

        if self.std_err:
            stderr = open(self.std_err, "w")
        else:
            stderr = None

        if self.std_out:
            stdout = open(self.std_out, "w")
        else:
            stdout = None

        if self.job_data.get("target_file"):
            target_file_mtime = 0
            if os.path.exists(self.job_data["target_file"]):
                stat = os.stat(self.job_data["target_file"])
                target_file_mtime = stat.st_mtime

        exec_env = self.job_data.get("exec_env")
        if exec_env:
            exec_name, _ = os.path.splitext(
                os.path.basename(self.job_data.get("executable"))
            )
            with open("%s_exec_env.json" % exec_name, "w") as f:
                f.write(json.dumps(exec_env))

        max_running_minutes = self.job_data.get("max_running_minutes")
        run_start_time = dt.now()

        proc = Popen(
            arg_list,
            stdin=stdin,
            stdout=stdout,
            stderr=stderr,
            env=self.job_data.get("environment"),
        )

        exit_code = None

        process = Process(proc.pid)
        max_memory_usage = 0
        while exit_code is None:
            try:
                memory = process.memory_info().rss
            except (NoSuchProcess, AccessDenied, ZombieProcess):
                """In case of a process that has died and is in some
                transitional state, we ignore any failures. Only seen on OSX
                thus far.
                See https://github.com/giampaolo/psutil/issues/1044#issuecomment-298745532
                """
                memory = 0
            if memory > max_memory_usage:
                max_memory_usage = memory

            yield Running(self, max_memory_usage, memory)

            try:
                exit_code = process.wait(timeout=self.MEMORY_POLL_PERIOD)
            except TimeoutExpired:
                run_time = dt.now() - run_start_time
                if (
                    max_running_minutes is not None
                    and run_time.seconds > max_running_minutes * 60
                ):
                    """
                    If the spawned process is not in the same process group
                    as the callee (job_dispatch), we will kill the process
                    group explicitly.

                    Propagating the unsuccessful Exited message will kill the
                    callee group. See job_dispatch.py.
                    """
                    process_group_id = os.getpgid(proc.pid)
                    this_group_id = os.getpgid(os.getpid())
                    if process_group_id != this_group_id:
                        os.killpg(process_group_id, signal.SIGKILL)

                    yield Exited(self, exit_code).with_error(
                        "Job:{} has been running for more than {} minutes - explicitly killed.".format(
                            self.name(), max_running_minutes
                        )
                    )
                    return

        exited_message = Exited(self, exit_code)

        if exit_code != 0:
            yield exited_message.with_error(
                "Process exited with status code {}".format(exit_code)
            )
            return

        # exit_code is 0

        if self.job_data.get("error_file"):
            if os.path.exists(self.job_data["error_file"]):
                yield exited_message.with_error(
                    "Found the error file:{} - job failed.".format(
                        self.job_data["error_file"]
                    )
                )
                return

        if self.job_data.get("target_file"):
            target_file_error = self._check_target_file_is_written(target_file_mtime)
            if target_file_error:
                yield exited_message.with_error(target_file_error)
                return

        yield exited_message
示例#11
0
    def test_exited_failure_msg(self, post_mock):
        self.reporter.start_time = dt.now()

        self.reporter.report(Exited(None, 9))

        self.assertTrue(post_mock.called)