Пример #1
0
def test_report_with_failed_finish_message_argument(unused_tcp_port):
    host = "localhost"
    url = f"ws://{host}:{unused_tcp_port}"
    reporter = Event(evaluator_url=url)
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    lines = []
    with _mock_ws_thread(host, unused_tcp_port, lines):
        reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, step_id=0))
        reporter.report(Running(job1, 100, 10))
        reporter.report(Finish().with_error("massive_failure"))

    assert len(lines) == 1
Пример #2
0
def test_report_only_job_running_for_successful_run(unused_tcp_port):
    host = "localhost"
    url = f"ws://{host}:{unused_tcp_port}"
    reporter = Event(evaluator_url=url)
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    lines = []
    with _mock_ws_thread(host, unused_tcp_port, lines):
        reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, step_id=0))
        reporter.report(Running(job1, 100, 10))
        reporter.report(Finish())

    assert len(lines) == 1
Пример #3
0
def test_report_with_successful_finish_message_argument(tmpdir):
    reporter = Event(event_log=tmpdir / "event_log")
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, stage_id=0))
    reporter.report(Running(job1, 100, 10))
    reporter.report(Finish())

    with open(reporter._event_log, "r") as f:
        lines = f.readlines()
        assert len(lines) == 3
        event = json.loads(lines[2])
        assert event["type"] == _FM_STEP_SUCCESS
Пример #4
0
def test_report_with_failed_finish_message_argument(tmpdir):
    reporter = Event(event_log=tmpdir / "event_log")
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, stage_id=0))
    reporter.report(Running(job1, 100, 10))
    reporter.report(Finish().with_error("massive_failure"))

    with open(reporter._event_log, "r") as f:
        lines = f.readlines()
        assert len(lines) == 3
        event = json.loads(lines[2])
        assert event["type"] == _FM_STEP_FAILURE
        assert event["data"]["error_msg"] == "massive_failure"
Пример #5
0
def test_report_with_running_message_argument(tmpdir):
    reporter = Event(event_log=tmpdir / "event_log")
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, stage_id=0))
    reporter.report(Running(job1, 100, 10))

    with open(reporter._event_log, "r") as f:
        lines = f.readlines()
        assert len(lines) == 2
        event = json.loads(lines[1])
        assert event["type"] == _FM_JOB_RUNNING
        assert event["data"]["max_memory_usage"] == 100
        assert event["data"]["current_memory_usage"] == 10
Пример #6
0
    def test_report_with_running_message_argument(self):
        msg = Running(Job({"name": "job1"}, 0), 100, 10)
        self.reporter.status_dict = self.reporter._init_job_status_dict(
            msg.timestamp, 0, [msg.job])

        self.reporter.report(msg)

        with open(self.reporter.STATUS_json, "r") as f:
            contents = "".join(f.readlines())
            self.assertIn('"status": "Running"', contents,
                          "status.json missing status")
            self.assertIn('"max_memory_usage": 100', contents,
                          "status.json missing max_memory_usage")
            self.assertIn('"current_memory_usage": 10', contents,
                          "status.json missing current_memory_usage")
Пример #7
0
def test_report_with_running_message_argument(unused_tcp_port):
    host = "localhost"
    url = f"ws://{host}:{unused_tcp_port}"
    reporter = Event(evaluator_url=url)
    job1 = Job({"name": "job1", "stdout": "stdout", "stderr": "stderr"}, 0)

    lines = []
    with _mock_ws_thread(host, unused_tcp_port, lines):
        reporter.report(Init([job1], 1, 19, ee_id="ee_id", real_id=0, step_id=0))
        reporter.report(Running(job1, 100, 10))
        reporter.report(Finish())

    assert len(lines) == 1
    event = json.loads(lines[0])
    assert event["type"] == _FM_JOB_RUNNING
    assert event["data"]["max_memory_usage"] == 100
    assert event["data"]["current_memory_usage"] == 10
Пример #8
0
    def run(self):
        start_message = Start(self)

        errors = self._check_job_files()

        errors.extend(self._assert_arg_list())

        self._dump_exec_env()

        if errors:
            yield start_message.with_error("\n".join(errors))
            return

        yield start_message

        executable = self.job_data.get("executable")
        assert_file_executable(executable)

        arg_list = [executable]
        if self.job_data.get("argList"):
            arg_list += self.job_data["argList"]

        if self.job_data.get("stdin"):
            stdin = open(self.job_data.get("stdin"))
        else:
            stdin = None

        if self.std_err:
            stderr = open(self.std_err, "w")
        else:
            stderr = None

        if self.std_out:
            stdout = open(self.std_out, "w")
        else:
            stdout = None

        if self.job_data.get("target_file"):
            target_file_mtime = 0
            if os.path.exists(self.job_data["target_file"]):
                stat = os.stat(self.job_data["target_file"])
                target_file_mtime = stat.st_mtime

        exec_env = self.job_data.get("exec_env")
        if exec_env:
            exec_name, _ = os.path.splitext(
                os.path.basename(self.job_data.get("executable"))
            )
            with open("%s_exec_env.json" % exec_name, "w") as f:
                f.write(json.dumps(exec_env))

        max_running_minutes = self.job_data.get("max_running_minutes")
        run_start_time = dt.now()

        proc = Popen(
            arg_list,
            stdin=stdin,
            stdout=stdout,
            stderr=stderr,
            env=self.job_data.get("environment"),
        )

        exit_code = None

        process = Process(proc.pid)
        max_memory_usage = 0
        while exit_code is None:
            try:
                memory = process.memory_info().rss
            except (NoSuchProcess, AccessDenied, ZombieProcess):
                """In case of a process that has died and is in some
                transitional state, we ignore any failures. Only seen on OSX
                thus far.
                See https://github.com/giampaolo/psutil/issues/1044#issuecomment-298745532
                """
                memory = 0
            if memory > max_memory_usage:
                max_memory_usage = memory

            yield Running(self, max_memory_usage, memory)

            try:
                exit_code = process.wait(timeout=self.MEMORY_POLL_PERIOD)
            except TimeoutExpired:
                run_time = dt.now() - run_start_time
                if (
                    max_running_minutes is not None
                    and run_time.seconds > max_running_minutes * 60
                ):
                    """
                    If the spawned process is not in the same process group
                    as the callee (job_dispatch), we will kill the process
                    group explicitly.

                    Propagating the unsuccessful Exited message will kill the
                    callee group. See job_dispatch.py.
                    """
                    process_group_id = os.getpgid(proc.pid)
                    this_group_id = os.getpgid(os.getpid())
                    if process_group_id != this_group_id:
                        os.killpg(process_group_id, signal.SIGKILL)

                    yield Exited(self, exit_code).with_error(
                        "Job:{} has been running for more than {} minutes - explicitly killed.".format(
                            self.name(), max_running_minutes
                        )
                    )
                    return

        exited_message = Exited(self, exit_code)

        if exit_code != 0:
            yield exited_message.with_error(
                "Process exited with status code {}".format(exit_code)
            )
            return

        # exit_code is 0

        if self.job_data.get("error_file"):
            if os.path.exists(self.job_data["error_file"]):
                yield exited_message.with_error(
                    "Found the error file:{} - job failed.".format(
                        self.job_data["error_file"]
                    )
                )
                return

        if self.job_data.get("target_file"):
            target_file_error = self._check_target_file_is_written(target_file_mtime)
            if target_file_error:
                yield exited_message.with_error(target_file_error)
                return

        yield exited_message