Пример #1
0
    def test_kill_jobs(self):
        with TestAreaContext("job_queue_test_kill") as work_area:
            job_queue = create_queue(never_ending_script)

            assert job_queue.queue_size == 10
            assert job_queue.is_active()

            pool_sema = BoundedSemaphore(value=10)
            start_all(job_queue, pool_sema)

            # make sure never ending jobs are running
            wait_until(lambda: self.assertTrue(job_queue.is_active()))

            for job in job_queue.job_list:
                job.stop()

            wait_until(lambda: self.assertFalse(job_queue.is_active()))

            job_queue._differ.transition(job_queue.job_list)

            for q_index, job in enumerate(job_queue.job_list):
                assert job.status == JobStatusType.JOB_QUEUE_IS_KILLED
                iens = job_queue._differ.qindex_to_iens(q_index)
                assert job_queue.snapshot()[iens] == str(
                    JobStatusType.JOB_QUEUE_IS_KILLED)

            for job in job_queue.job_list:
                job.wait_for()
Пример #2
0
    def test_failing_jobs(self):
        with TestAreaContext("job_queue_test_add") as work_area:
            job_queue = create_queue(failing_script, max_submit=1)

            assert job_queue.queue_size == 10
            assert job_queue.is_active()

            pool_sema = BoundedSemaphore(value=10)
            start_all(job_queue, pool_sema)

            wait_until(
                func=(lambda: self.assertFalse(job_queue.is_active())), )

            for job in job_queue.job_list:
                job.wait_for()

            job_queue._differ.transition(job_queue.job_list)

            assert job_queue.fetch_next_waiting() is None

            for q_index, job in enumerate(job_queue.job_list):
                assert job.status == JobStatusType.JOB_QUEUE_FAILED
                iens = job_queue._differ.qindex_to_iens(q_index)
                assert job_queue.snapshot()[iens] == str(
                    JobStatusType.JOB_QUEUE_FAILED)
Пример #3
0
    def test_workflow_thread_cancel_external(self):
        with TestAreaContext(
                "python/job_queue/workflow_runner_external") as work_area:
            WorkflowCommon.createWaitJob()

            joblist = WorkflowJoblist()
            self.assertTrue(joblist.addJobFromFile("WAIT",
                                                   "external_wait_job"))
            self.assertTrue("WAIT" in joblist)

            workflow = Workflow("wait_workflow", joblist)

            self.assertEqual(len(workflow), 3)

            workflow_runner = WorkflowRunner(workflow,
                                             ert=None,
                                             context=SubstitutionList())

            self.assertFalse(workflow_runner.isRunning())

            with workflow_runner:
                wait_until(
                    lambda: self.assertTrue(workflow_runner.isRunning()))
                wait_until(lambda: self.assertFileExists("wait_started_0"))
                wait_until(lambda: self.assertFileExists("wait_finished_0"))
                wait_until(lambda: self.assertFileExists("wait_started_1"))
                workflow_runner.cancel()
                self.assertTrue(workflow_runner.isCancelled())

            self.assertFileDoesNotExist("wait_finished_1")
            self.assertFileDoesNotExist("wait_started_2")
            self.assertFileDoesNotExist("wait_cancelled_2")
            self.assertFileDoesNotExist("wait_finished_2")
Пример #4
0
    def test_add_jobs(self):
        with TestAreaContext("job_queue_test_add") as work_area:
            job_queue = create_queue(simple_script)

            assert job_queue.queue_size == 10
            assert job_queue.is_active()
            assert job_queue.fetch_next_waiting() is not None

            pool_sema = BoundedSemaphore(value=10)
            start_all(job_queue, pool_sema)

            for job in job_queue.job_list:
                job.stop()

            wait_until(lambda: self.assertFalse(job_queue.is_active()))

            for job in job_queue.job_list:
                job.wait_for()
Пример #5
0
    def test_timeout_jobs(self):
        with TestAreaContext("job_queue_test_kill") as work_area:
            job_numbers = set()

            def callback(arg):
                nonlocal job_numbers
                job_numbers.add(arg[0]["job_number"])

            job_queue = create_queue(
                never_ending_script,
                max_submit=1,
                max_runtime=5,
                callback_timeout=callback,
            )

            assert job_queue.queue_size == 10
            assert job_queue.is_active()

            pool_sema = BoundedSemaphore(value=10)
            start_all(job_queue, pool_sema)

            # make sure never ending jobs are running
            wait_until(lambda: self.assertTrue(job_queue.is_active()))

            wait_until(lambda: self.assertFalse(job_queue.is_active()))

            job_queue._differ.transition(job_queue.job_list)

            for q_index, job in enumerate(job_queue.job_list):
                assert job.status == JobStatusType.JOB_QUEUE_IS_KILLED
                iens = job_queue._differ.qindex_to_iens(q_index)
                assert job_queue.snapshot()[iens] == str(
                    JobStatusType.JOB_QUEUE_IS_KILLED)

            assert job_numbers == set(range(10))

            for job in job_queue.job_list:
                job.wait_for()
Пример #6
0
    def test_workflow_thread_cancel_ert_script(self):
        with TestAreaContext("python/job_queue/workflow_runner_ert_script"):
            WorkflowCommon.createWaitJob()

            joblist = WorkflowJoblist()
            self.assertTrue(joblist.addJobFromFile("WAIT", "wait_job"))
            self.assertTrue("WAIT" in joblist)

            workflow = Workflow("wait_workflow", joblist)

            self.assertEqual(len(workflow), 3)

            workflow_runner = WorkflowRunner(workflow)

            self.assertFalse(workflow_runner.isRunning())

            with workflow_runner:
                self.assertIsNone(workflow_runner.workflowResult())

                wait_until(lambda: self.assertTrue(workflow_runner.isRunning()))
                wait_until(lambda: self.assertFileExists("wait_started_0"))

                wait_until(lambda: self.assertFileExists("wait_finished_0"))

                wait_until(lambda: self.assertFileExists("wait_started_1"))

                workflow_runner.cancel()

                wait_until(lambda: self.assertFileExists("wait_cancelled_1"))

                self.assertTrue(workflow_runner.isCancelled())

            self.assertFileDoesNotExist("wait_finished_1")
            self.assertFileDoesNotExist("wait_started_2")
            self.assertFileDoesNotExist("wait_cancelled_2")
            self.assertFileDoesNotExist("wait_finished_2")
Пример #7
0
    def test_simulation_context(self):
        config_file = self.createTestPath("local/batch_sim/sleepy_time.ert")
        with ErtTestContext("res/sim/simulation_context",
                            config_file) as test_context:
            ert = test_context.getErt()

            size = 4
            even_mask = [True, False] * (size // 2)
            odd_mask = [False, True] * (size // 2)

            fs_manager = ert.getEnkfFsManager()
            even_half = fs_manager.getFileSystem("even_half")
            odd_half = fs_manager.getFileSystem("odd_half")

            # i represents geo_id
            case_data = [(i, {}) for i in range(size)]
            even_ctx = SimulationContext(ert, even_half, even_mask, 0,
                                         case_data)
            odd_ctx = SimulationContext(ert, odd_half, odd_mask, 0, case_data)

            for iens in range(size):
                # do we have the proper geo_id in run_args?
                if iens % 2 == 0:
                    self.assertFalse(even_ctx.isRealizationFinished(iens))
                    self.assertEqual(even_ctx.get_run_args(iens).geo_id, iens)
                else:
                    self.assertFalse(odd_ctx.isRealizationFinished(iens))
                    self.assertEqual(odd_ctx.get_run_args(iens).geo_id, iens)

            def any_is_running():
                return even_ctx.isRunning() or odd_ctx.isRunning()

            wait_until(func=(lambda: self.assertFalse(any_is_running())),
                       timeout=90)

            self.assertEqual(even_ctx.getNumFailed(), 0)
            self.assertEqual(even_ctx.getNumRunning(), 0)
            self.assertEqual(even_ctx.getNumSuccess(), size / 2)

            self.assertEqual(odd_ctx.getNumFailed(), 0)
            self.assertEqual(odd_ctx.getNumRunning(), 0)
            self.assertEqual(odd_ctx.getNumSuccess(), size / 2)

            even_state_map = even_half.getStateMap()
            odd_state_map = odd_half.getStateMap()

            for iens in range(size):
                if iens % 2 == 0:
                    self.assertTrue(even_ctx.didRealizationSucceed(iens))
                    self.assertFalse(even_ctx.didRealizationFail(iens))
                    self.assertTrue(even_ctx.isRealizationFinished(iens))

                    self.assertEqual(even_state_map[iens],
                                     RealizationStateEnum.STATE_HAS_DATA)
                else:
                    self.assertTrue(odd_ctx.didRealizationSucceed(iens))
                    self.assertFalse(odd_ctx.didRealizationFail(iens))
                    self.assertTrue(odd_ctx.isRealizationFinished(iens))

                    self.assertEqual(odd_state_map[iens],
                                     RealizationStateEnum.STATE_HAS_DATA)
Пример #8
0
    def test_terminate_jobs(self):

        # Executes it self recursively and sleeps for 100 seconds
        with open("dummy_executable", "w") as f:
            f.write(
                """#!/usr/bin/env python
import sys, os, time
counter = eval(sys.argv[1])
if counter > 0:
    os.fork()
    os.execv(sys.argv[0],[sys.argv[0], str(counter - 1) ])
else:
    time.sleep(100)"""
            )

        executable = os.path.realpath("dummy_executable")
        os.chmod("dummy_executable", stat.S_IRWXU | stat.S_IRWXO | stat.S_IRWXG)

        self.job_list = {
            "umask": "0002",
            "DATA_ROOT": "",
            "global_environment": {},
            "global_update_path": {},
            "jobList": [
                {
                    "name": "dummy_executable",
                    "executable": executable,
                    "target_file": None,
                    "error_file": None,
                    "start_file": None,
                    "stdout": "dummy.stdout",
                    "stderr": "dummy.stderr",
                    "stdin": None,
                    "argList": ["3"],
                    "environment": None,
                    "exec_env": None,
                    "license_path": None,
                    "max_running_minutes": None,
                    "max_running": None,
                    "min_arg": 1,
                    "arg_types": [],
                    "max_arg": None,
                }
            ],
            "run_id": "",
            "ert_pid": "",
        }

        with open("jobs.json", "w") as f:
            f.write(json.dumps(self.job_list))

        # macOS doesn't provide /usr/bin/setsid, so we roll our own
        with open("setsid", "w") as f:
            f.write(
                dedent(
                    """\
                #!/usr/bin/env python
                import os
                import sys
                os.setsid()
                os.execvp(sys.argv[1], sys.argv[1:])
                """
                )
            )
        os.chmod("setsid", 0o755)

        job_dispatch_script = importlib.util.find_spec("job_runner.job_dispatch").origin
        job_dispatch_process = Popen(
            [
                os.getcwd() + "/setsid",
                sys.executable,
                job_dispatch_script,
                os.getcwd(),
            ]
        )

        p = psutil.Process(job_dispatch_process.pid)

        # Three levels of processes should spawn 8 children in total
        wait_until(lambda: self.assertEqual(len(p.children(recursive=True)), 8))

        p.terminate()

        wait_until(lambda: self.assertEqual(len(p.children(recursive=True)), 0))

        os.wait()  # allow os to clean up zombie processes