Exemplo n.º 1
0
    def __init__(self, ert, sim_fs, mask, itr, case_data):
        self._ert = ert
        """ :type: res.enkf.EnKFMain """
        max_runtime = ert.analysisConfig().get_max_runtime()
        self._mask = mask

        job_queue = ert.get_queue_config().create_job_queue()
        job_queue.set_max_job_duration(max_runtime)
        self._queue_manager = JobQueueManager(job_queue)

        subst_list = self._ert.getDataKW()
        path_fmt = self._ert.getModelConfig().getRunpathFormat()
        jobname_fmt = self._ert.getModelConfig().getJobnameFormat()

        self._run_context = ErtRunContext(EnkfRunType.ENSEMBLE_EXPERIMENT,
                                          sim_fs, None, mask, path_fmt,
                                          jobname_fmt, subst_list, itr)
        # fill in the missing geo_id data
        for sim_id, (geo_id, _) in enumerate(case_data):
            if mask[sim_id]:
                run_arg = self._run_context[sim_id]
                run_arg.geo_id = geo_id

        self._ert.getEnkfSimulationRunner().createRunPath(self._run_context)
        self._sim_thread = self._run_simulations_simple_step()

        # Wait until the queue is active before we finish the creation
        # to ensure sane job status while running
        while self.isRunning() and not self._queue_manager.isRunning():
            sleep(0.1)
Exemplo n.º 2
0
    def test_simulation_model(self):

        with TestAreaContext('enkf_test_sim_model_kw') as work_area:
            base_path = os.getcwd()
            source_path = self.createTestPath('local/simulation_model')

            work_area.copy_directory(source_path)
            dir_ert = os.path.join(base_path, 'simulation_model')
            assert (os.path.isdir(dir_ert))

            file_ert = os.path.join(dir_ert, 'sim_kw.ert')
            assert (os.path.isfile(file_ert))

            with ErtTestContext("sim_kw",
                                model_config=file_ert,
                                store_area=True) as ctx:
                ert = ctx.getErt()
                fs_manager = ert.getEnkfFsManager()
                result_fs = fs_manager.getCurrentFileSystem()

                model_config = ert.getModelConfig()
                forward_model = model_config.getForwardModel()
                self.assertEqual(forward_model.get_size(), 4)
                self.assertEqual(
                    forward_model.iget_job(3).get_arglist(), ['WORD_A'])
                self.assertEqual(
                    forward_model.iget_job(0).get_arglist(), ['<ARGUMENT>'])
                self.assertEqual(
                    forward_model.iget_job(1).get_arglist(),
                    ['Hello', 'True', '3.14', '4'])
                self.assertEqual(
                    forward_model.iget_job(2).get_arglist(),
                    ['word', '<ECLBASE>'])

                runpath_fmt = model_config.getRunpathFormat()
                jobname_fmt = model_config.getJobnameFormat()

                subst_list = ert.getDataKW()
                itr = 0
                mask = BoolVector(default_value=True, initial_size=1)

                run_context = ErtRunContext.ensemble_experiment(
                    result_fs, mask, runpath_fmt, jobname_fmt, subst_list, itr)
                ert.getEnkfSimulationRunner().createRunPath(run_context)
                queue_config = ert.get_queue_config()
                self.assertEqual(queue_config.num_cpu, 5)
                os.chdir('storage/sim_kw/runpath/realisation-0/iter-0')
                assert (os.path.isfile('jobs.json'))
                with open("jobs.json", "r") as f:
                    data = json.load(f)
                    jobList = data["jobList"]
                    old_job_A = jobList[3]
                    self.assertEqual(old_job_A["argList"], ['WORD_A'])
                    old_job_B = jobList[0]
                    self.assertEqual(old_job_B["argList"], ['yy'])
                    new_job_A = jobList[1]
                    self.assertEqual(new_job_A["argList"],
                                     ['Hello', 'True', '3.14', '4'])
                    new_job_B = jobList[2]
                    self.assertEqual(new_job_B["argList"], ['word', 'SIM_KW'])
Exemplo n.º 3
0
    def run_ensemble_evaluator(self, run_context: ErtRunContext,
                               ee_config: EvaluatorServerConfig) -> int:
        if run_context.get_step():
            self.ert().eclConfig().assert_restart()

        ensemble = EnsembleBuilder.from_legacy(
            run_context,
            self.get_forward_model(),
            self._queue_config,
            self.ert().analysisConfig(),
            self.ert().resConfig(),
        ).build()

        self.ert().initRun(run_context)

        totalOk = EnsembleEvaluator(
            ensemble,
            ee_config,
            run_context.get_iter(),
            ee_id=str(uuid.uuid1()).split("-", maxsplit=1)[0],
        ).run_and_get_successful_realizations()

        for iens, run_arg in enumerate(run_context):
            if run_context.is_active(iens):
                if run_arg.run_status in (
                        RunStatusType.JOB_LOAD_FAILURE,
                        RunStatusType.JOB_RUN_FAILURE,
                ):
                    run_context.deactivate_realization(iens)

        run_context.get_sim_fs().fsync()
        return totalOk
Exemplo n.º 4
0
    def __init__(self, ert, init_fs, result_fs, mask, itr , verbose=False):
        self._ert = ert
        """ :type: res.enkf.EnKFMain """
        max_runtime = ert.analysisConfig().get_max_runtime()
        self._mask = mask

        job_queue = ert.get_queue_config().create_job_queue()
        self._queue_manager = JobQueueManager(job_queue)
        self._queue_manager.startQueue( mask.count( ), verbose=verbose)
        self._run_args = {}
        """ :type: dict[int, RunArg] """

        self._thread_pool = CThreadPool(8)
        self._thread_pool.addTaskFunction("submitJob", ENKF_LIB, "enkf_main_isubmit_job__")

        subst_list = self._ert.getDataKW( )
        path_fmt = self._ert.getModelConfig().getRunpathFormat()
        self._run_context = ErtRunContext( EnkfRunType.ENSEMBLE_EXPERIMENT, init_fs, result_fs, None, mask, path_fmt, subst_list, itr)
Exemplo n.º 5
0
    def run_ensemble_evaluator(self, run_context: ErtRunContext,
                               ee_config: EvaluatorServerConfig) -> int:
        if run_context.get_step():
            self.ert().eclConfig().assert_restart()

        iactive = run_context.get_mask()

        run_context.get_sim_fs().getStateMap().deselectMatching(
            iactive,
            RealizationStateEnum.STATE_LOAD_FAILURE
            | RealizationStateEnum.STATE_PARENT_FAILURE,
        )

        ensemble = create_ensemble_builder_from_legacy(
            run_context,
            self.get_forward_model(),
            self._queue_config,
            self.ert().analysisConfig(),
            self.ert().resConfig(),
        ).build()

        self.ert().initRun(run_context)

        totalOk = EnsembleEvaluator(
            ensemble,
            ee_config,
            run_context.get_iter(),
            ee_id=str(uuid.uuid1()).split("-")[0],
        ).run_and_get_successful_realizations()

        for i in range(len(run_context)):
            if run_context.is_active(i):
                run_arg = run_context[i]
                if (run_arg.run_status == RunStatusType.JOB_LOAD_FAILURE or
                        run_arg.run_status == RunStatusType.JOB_RUN_FAILURE):
                    run_context.deactivate_realization(i)

        run_context.get_sim_fs().fsync()
        return totalOk
Exemplo n.º 6
0
    def from_legacy(
        run_context: ErtRunContext,
        forward_model: ForwardModel,
        queue_config: QueueConfig,
        analysis_config: AnalysisConfig,
        res_config: ResConfig,
    ) -> "_EnsembleBuilder":
        builder = _EnsembleBuilder().set_legacy_dependencies(
            queue_config,
            analysis_config,
        )

        num_cpu = res_config.queue_config.num_cpu
        if num_cpu == 0:
            num_cpu = res_config.ecl_config.num_cpu

        for iens, run_arg in enumerate(run_context):
            active = run_context.is_active(iens)
            real = _RealizationBuilder().set_iens(iens).active(active)
            step = _StepBuilder().set_id("0").set_dummy_io().set_name("legacy step")
            if active:
                real.active(True).add_step(step)
                for index in range(0, len(forward_model)):
                    ext_job = forward_model.iget_job(index)
                    step.add_job(
                        _LegacyJobBuilder()
                        .set_id(str(index))
                        .set_name(ext_job.name())
                        .set_ext_job(ext_job)
                    )
                step.set_max_runtime(
                    analysis_config.get_max_runtime()
                ).set_callback_arguments((run_arg, res_config)).set_done_callback(
                    EnKFState.forward_model_ok_callback
                ).set_exit_callback(
                    EnKFState.forward_model_exit_callback
                ).set_num_cpu(
                    num_cpu
                ).set_run_path(
                    run_arg.runpath
                ).set_job_script(
                    res_config.queue_config.job_script
                ).set_job_name(
                    run_arg.job_name
                ).set_run_arg(
                    run_arg
                )
            builder.add_realization(real)
        return builder
Exemplo n.º 7
0
    def addSimulation(self, iens, target_fs):
        if iens >= self._size:
            raise UserWarning("Realization number out of range: %d >= %d" %
                              (iens, self._size))

        if iens in self._run_args:
            raise UserWarning("Realization number: '%d' already queued" % iens)

        runpath_fmt = self._ert.getModelConfig().getRunpathFormat()
        member = self._ert.getRealisation(iens)
        runpath = ErtRunContext.createRunpath(iens, runpath_fmt,
                                              member.getDataKW())
        run_arg = RunArg.createEnsembleExperimentRunArg(
            target_fs, iens, runpath)

        self._ert.createRunPath(run_arg)

        self._run_args[iens] = run_arg
        self._thread_pool.submitJob(ArgPack(self._ert, run_arg))
Exemplo n.º 8
0
    def test_transfer_var(self):

        with TestAreaContext('enkf_test_transfer_env') as work_area:
            base_path = os.getcwd()
            source_path = self.createTestPath('local/snake_oil_no_data')

            work_area.copy_directory(source_path)
            dir_ert = os.path.join(base_path, 'snake_oil_no_data')
            assert (os.path.isdir(dir_ert))

            file_ert = os.path.join(dir_ert, 'snake_oil.ert')
            assert (os.path.isfile(file_ert))

            with ErtTestContext("transfer_env_var",
                                model_config=file_ert,
                                store_area=True) as ctx:
                ert = ctx.getErt()
                fs_manager = ert.getEnkfFsManager()
                result_fs = fs_manager.getCurrentFileSystem()

                model_config = ert.getModelConfig()
                runpath_fmt = model_config.getRunpathFormat()
                jobname_fmt = model_config.getJobnameFormat()
                subst_list = ert.getDataKW()
                itr = 0
                mask = BoolVector(default_value=True, initial_size=1)
                run_context = ErtRunContext.ensemble_experiment(
                    result_fs, mask, runpath_fmt, jobname_fmt, subst_list, itr)
                ert.getEnkfSimulationRunner().createRunPath(run_context)
                os.chdir('storage/snake_oil/runpath/realisation-0/iter-0')
                assert (os.path.isfile('jobs.json'))
                with open("jobs.json", "r") as f:
                    data = json.load(f)
                    env_data = data["global_environment"]
                    self.assertEqual('TheFirstValue', env_data["FIRST"])
                    self.assertEqual('TheSecondValue', env_data["SECOND"])

                    path_data = data["global_update_path"]
                    self.assertEqual('TheThirdValue', path_data["THIRD"])
                    self.assertEqual('TheFourthValue', path_data["FOURTH"])
Exemplo n.º 9
0
def test_transfer_var(use_tmpdir):
    # Write a minimal config file with env
    with open("config_file.ert", "w") as fout:
        fout.write(
            dedent("""
        NUM_REALIZATIONS 1
        JOBNAME a_name_%d
        SETENV FIRST TheFirstValue
        SETENV SECOND TheSecondValue
        UPDATE_PATH   THIRD  TheThirdValue
        UPDATE_PATH   FOURTH TheFourthValue
        """))
    res_config = ResConfig("config_file.ert")
    ert = EnKFMain(res_config)
    fs_manager = ert.getEnkfFsManager()

    model_config = ert.getModelConfig()
    run_context = ErtRunContext.ensemble_experiment(
        fs_manager.getCurrentFileSystem(),
        [True],
        model_config.getRunpathFormat(),
        model_config.getJobnameFormat(),
        ert.getDataKW(),
        0,
    )
    ert.getEnkfSimulationRunner().createRunPath(run_context)
    os.chdir("simulations/realization0")
    with open("jobs.json", "r") as f:
        data = json.load(f)
        env_data = data["global_environment"]
        assert env_data["FIRST"] == "TheFirstValue"
        assert env_data["SECOND"] == "TheSecondValue"

        path_data = data["global_update_path"]
        assert "TheThirdValue" == path_data["THIRD"]
        assert "TheFourthValue" == path_data["FOURTH"]
Exemplo n.º 10
0
 def count_active_realizations(self, run_context: ErtRunContext) -> int:
     return sum(run_context.get_mask())
Exemplo n.º 11
0
class SimulationContext(object):
    def __init__(self, ert, sim_fs, mask, itr, verbose=False):
        self._ert = ert
        """ :type: res.enkf.EnKFMain """
        max_runtime = ert.analysisConfig().get_max_runtime()
        self._mask = mask

        job_queue = ert.get_queue_config().create_job_queue()
        job_queue.set_max_job_duration(max_runtime)
        self._queue_manager = JobQueueManager(job_queue)
        self._queue_manager.startQueue(mask.count(), verbose=verbose)
        self._run_args = {}
        """ :type: dict[int, RunArg] """

        self._thread_pool = CThreadPool(8)
        self._thread_pool.addTaskFunction("submitJob", RES_LIB,
                                          "enkf_main_isubmit_job__")

        subst_list = self._ert.getDataKW()
        path_fmt = self._ert.getModelConfig().getRunpathFormat()
        jobname_fmt = self._ert.getModelConfig().getJobnameFormat()
        self._run_context = ErtRunContext(EnkfRunType.ENSEMBLE_EXPERIMENT,
                                          sim_fs, None, mask, path_fmt,
                                          jobname_fmt, subst_list, itr)
        self._ert.initRun(self._run_context)

    def __len__(self):
        return self._mask.count()

    def addSimulation(self, iens, geo_id):
        if not (0 <= iens < len(self._run_context)):
            raise UserWarning("Realization number out of range: %d >= %d" %
                              (iens, len(self._run_context)))

        if not self._mask[iens]:
            raise UserWarning("Realization number: '%d' is not active" % iens)

        if iens in self._run_args:
            raise UserWarning("Realization number: '%d' already queued" % iens)

        run_arg = self._run_context[iens]
        run_arg.geo_id = geo_id
        self._run_args[iens] = run_arg

        self._ert.createRunpath(self._run_context, iens=iens)

        queue = self._queue_manager.get_job_queue()
        self._thread_pool.submitJob(ArgPack(self._ert, run_arg, queue))

    def isRunning(self):
        return self._queue_manager.isRunning()

    def getNumPending(self):
        return self._queue_manager.getNumPending()

    def getNumRunning(self):
        return self._queue_manager.getNumRunning()

    def getNumSuccess(self):
        return self._queue_manager.getNumSuccess()

    def getNumFailed(self):
        return self._queue_manager.getNumFailed()

    def getNumWaiting(self):
        return self._queue_manager.getNumWaiting()

    def didRealizationSucceed(self, iens):
        queue_index = self._run_args[iens].getQueueIndex()
        return self._queue_manager.didJobSucceed(queue_index)

    def didRealizationFail(self, iens):
        # For the purposes of this class, a failure should be anything (killed job, etc) that is not an explicit success.
        return not self.didRealizationSucceed(iens)

    def isRealizationQueued(self, iens):
        return iens in self._run_args

    def isRealizationFinished(self, iens):
        run_arg = self._run_args[iens]

        if run_arg.isSubmitted():
            queue_index = run_arg.getQueueIndex()
            return self._queue_manager.isJobComplete(queue_index)
        else:
            return False

    def __repr__(self):
        running = 'running' if self.isRunning() else 'not running'
        numRunn = self.getNumRunning()
        numSucc = self.getNumSuccess()
        numFail = self.getNumFailed()
        numWait = self.getNumWaiting()
        fmt = '%s, #running = %d, #success = %d, #failed = %d, #waiting = %d'
        fmt = fmt % (running, numRunn, numSucc, numFail, numWait)
        return 'SimulationContext(%s)' % fmt

    def get_sim_fs(self):
        return self._run_context.get_sim_fs()

    def get_run_context(self):
        return self._run_context

    def stop(self):
        self._queue_manager.stop_queue()

    def job_progress(self, iens):
        """Will return a detailed progress of the job.

        The progress report is obtained by reading a file from the filesystem,
        that file is typically created by another process running on another
        machine, and reading might fail due to NFS issues, simultanoues write
        and so on. If loading valid json fails the function will sleep 0.10
        seconds and retry - eventually giving up and returning None. Also for
        jobs which have not yet started the method will return None.

        When the method succeeds in reading the progress file from the file
        system the return value will be an object with properties like this:|

           progress.start_time
           progress.end_time
           progress.run_id
           progress.jobs =[ (job1.name, job1.start_time, job1.end_time, job1.status, job1.error_msg),
                             (job2.name, job2.start_time, job2.end_time, job2.status, job2.error_msg),
                              ....
                             (jobN.name, jobN.start_time, jobN.end_time, jobN.status, jobN.error_msg) ]

        """
        if not iens in self._run_args:
            raise KeyError("No such simulation: %s" % iens)

        run_arg = self._run_args[iens]
        try:
            # will throw if not yet submitted (is in a limbo state)
            queue_index = run_arg.getQueueIndex()
        except ValueError:
            return None
        if self._queue_manager.isJobWaiting(queue_index):
            return None

        return ForwardModelStatus.load(run_arg.runpath)

    def run_path(self, iens):
        """
        Will return the path to the simulation.
        """
        if not iens in self._run_args:
            raise KeyError("No such simulation: %s" % iens)

        run_arg = self._run_args[iens]
        return run_arg.runpath

    def job_status(self, iens):
        """Will query the queue system for the status of the job.
        """
        if not iens in self._run_args:
            raise KeyError("No such simulation: %s" % iens)

        run_arg = self._run_args[iens]
        try:
            queue_index = run_arg.getQueueIndex()
        except ValueError:
            return None
        return self._queue_manager.getJobStatus(queue_index)

    def status_timestamp(self):
        """Will return a timestamp for the last status change of the simulations.

        The timestamp is related to status changes when a simulation has
        started, completed and failed.

        """
        return self._queue_manager.status_timestamp()

    def progress_timestamp(self, iens=None):
        """
        Will return a timestamp for when the simulation has progressed to a new forward model step.
        """

        if iens is None:
            return self._queue_manager.progress_timestamp()

        if not iens in self._run_args:
            raise KeyError("No such simulation: %s" % iens)

        run_arg = self._run_args[iens]
        queue_index = run_arg.getQueueIndex()
        return self._queue_manager.progress_timestamp(queue_index)
Exemplo n.º 12
0
class SimulationContext(object):
    def __init__(self, ert, sim_fs, mask, itr, verbose=False):
        self._ert = ert
        """ :type: res.enkf.EnKFMain """
        max_runtime = ert.analysisConfig().get_max_runtime()
        self._mask = mask

        job_queue = ert.get_queue_config().create_job_queue()
        self._queue_manager = JobQueueManager(job_queue)
        self._queue_manager.startQueue(mask.count(), verbose=verbose)
        self._run_args = {}
        """ :type: dict[int, RunArg] """

        self._thread_pool = CThreadPool(8)
        self._thread_pool.addTaskFunction("submitJob", ENKF_LIB,
                                          "enkf_main_isubmit_job__")

        subst_list = self._ert.getDataKW()
        path_fmt = self._ert.getModelConfig().getRunpathFormat()
        jobname_fmt = self._ert.getModelConfig().getJobnameFormat()
        self._run_context = ErtRunContext(EnkfRunType.ENSEMBLE_EXPERIMENT,
                                          sim_fs, None, mask, path_fmt,
                                          jobname_fmt, subst_list, itr)
        self._ert.createRunpath(self._run_context)

    def __len__(self):
        return self._mask.count()

    def addSimulation(self, iens, geo_id):
        if not (0 <= iens < len(self._run_context)):
            raise UserWarning("Realization number out of range: %d >= %d" %
                              (iens, len(self._run_context)))

        if not self._mask[iens]:
            raise UserWarning("Realization number: '%d' is not active" % iens)

        if iens in self._run_args:
            raise UserWarning("Realization number: '%d' already queued" % iens)

        run_arg = self._run_context[iens]
        queue = self._queue_manager.get_job_queue()
        self._run_args[iens] = run_arg
        self._thread_pool.submitJob(ArgPack(self._ert, run_arg, queue, geo_id))

    def isRunning(self):
        return self._queue_manager.isRunning()

    def getNumPending(self):
        return self._queue_manager.getNumPending()

    def getNumRunning(self):
        return self._queue_manager.getNumRunning()

    def getNumSuccess(self):
        return self._queue_manager.getNumSuccess()

    def getNumFailed(self):
        return self._queue_manager.getNumFailed()

    def getNumWaiting(self):
        return self._queue_manager.getNumWaiting()

    def didRealizationSucceed(self, iens):
        queue_index = self._run_args[iens].getQueueIndex()
        return self._queue_manager.didJobSucceed(queue_index)

    def didRealizationFail(self, iens):
        # For the purposes of this class, a failure should be anything (killed job, etc) that is not an explicit success.
        return not self.didRealizationSucceed(iens)

    def isRealizationQueued(self, iens):
        return iens in self._run_args

    def isRealizationFinished(self, iens):
        run_arg = self._run_args[iens]

        if run_arg.isSubmitted():
            queue_index = run_arg.getQueueIndex()
            return self._queue_manager.isJobComplete(queue_index)
        else:
            return False

    def __repr__(self):
        running = 'running' if self.isRunning() else 'not running'
        numRunn = self.getNumRunning()
        numSucc = self.getNumSuccess()
        numFail = self.getNumFailed()
        numWait = self.getNumWaiting()
        fmt = '%s, #running = %d, #success = %d, #failed = %d, #waiting = %d'
        fmt = fmt % (running, numRunn, numSucc, numFail, numWait)
        return 'SimulationContext(%s)' % fmt

    def get_sim_fs(self):
        return self._run_context.get_sim_fs()

    def get_run_context(self):
        return self._run_context
Exemplo n.º 13
0
    def test_simulation_model(self):

        with TestAreaContext("enkf_test_sim_model_kw") as work_area:
            base_path = os.getcwd()
            source_path = self.createTestPath("local/simulation_model")

            work_area.copy_directory(source_path)
            dir_ert = os.path.join(base_path, "simulation_model")
            assert os.path.isdir(dir_ert)

            file_ert = os.path.join(dir_ert, "sim_kw.ert")
            assert os.path.isfile(file_ert)

            with ErtTestContext("sim_kw",
                                model_config=file_ert,
                                store_area=True) as ctx:
                ert = ctx.getErt()
                fs_manager = ert.getEnkfFsManager()
                result_fs = fs_manager.getCurrentFileSystem()

                model_config = ert.getModelConfig()
                forward_model = model_config.getForwardModel()
                self.assertEqual(forward_model.get_size(), 6)

                self.assertEqual(
                    forward_model.iget_job(3).get_arglist(), ["WORD_A"])
                self.assertEqual(
                    forward_model.iget_job(0).get_arglist(), ["<ARGUMENT>"])
                self.assertEqual(
                    forward_model.iget_job(1).get_arglist(),
                    ["Hello", "True", "3.14", "4"],
                )
                self.assertEqual(
                    forward_model.iget_job(2).get_arglist(),
                    ["word", "<ECLBASE>"])

                self.assertEqual(
                    forward_model.iget_job(0).get_argvalues(), ["yy"])
                self.assertEqual(
                    forward_model.iget_job(1).get_argvalues(),
                    ["Hello", "True", "3.14", "4"],
                )
                self.assertEqual(
                    forward_model.iget_job(2).get_argvalues(),
                    ["word", "<ECLBASE>"])
                self.assertEqual(
                    forward_model.iget_job(3).get_argvalues(), ["WORD_A"])
                self.assertEqual(
                    list(forward_model.iget_job(4).get_argvalues()),
                    [
                        "configured_argumentA",
                        "configured_argumentB",
                        "DEFINED_ARGC_VALUE",
                    ],
                )
                self.assertEqual(
                    list(forward_model.iget_job(5).get_argvalues()),
                    [
                        "DEFAULT_ARGA_VALUE", "<ARGUMENTB>",
                        "DEFINED_ARGC_VALUE"
                    ],
                )

                runpath_fmt = model_config.getRunpathFormat()
                jobname_fmt = model_config.getJobnameFormat()

                subst_list = ert.getDataKW()
                itr = 0
                mask = BoolVector(default_value=True, initial_size=1)

                run_context = ErtRunContext.ensemble_experiment(
                    result_fs, mask, runpath_fmt, jobname_fmt, subst_list, itr)
                ert.getEnkfSimulationRunner().createRunPath(run_context)
                queue_config = ert.get_queue_config()
                self.assertEqual(queue_config.num_cpu, 5)
                os.chdir("storage/sim_kw/runpath/realisation-0/iter-0")
                assert os.path.isfile("jobs.json")
                with open("jobs.json", "r") as f:
                    data = json.load(f)
                    jobList = data["jobList"]
                    old_job_A = jobList[3]
                    self.assertEqual(old_job_A["argList"], ["WORD_A"])
                    old_job_B = jobList[0]
                    self.assertEqual(old_job_B["argList"], ["yy"])
                    new_job_A = jobList[1]
                    self.assertEqual(new_job_A["argList"],
                                     ["Hello", "True", "3.14", "4"])
                    new_job_B = jobList[2]
                    self.assertEqual(new_job_B["argList"], ["word", "SIM_KW"])
Exemplo n.º 14
0
def _is_iens_active(iens: int, run_context: ErtRunContext) -> bool:
    """Return whether or not the iens is active."""
    try:
        return run_context.is_active(iens)
    except AttributeError:
        return False
Exemplo n.º 15
0
def _get_run_context_iter(run_context: ErtRunContext) -> int:
    """Return the iter from run_context."""
    try:
        return run_context.get_iter()
    except AttributeError:
        return -1
Exemplo n.º 16
0
class SimulationContext(object):
    def __init__(self, ert, sim_fs, mask, itr, case_data):
        self._ert = ert
        """ :type: res.enkf.EnKFMain """
        max_runtime = ert.analysisConfig().get_max_runtime()
        self._mask = mask

        job_queue = ert.get_queue_config().create_job_queue()
        job_queue.set_max_job_duration(max_runtime)
        self._queue_manager = JobQueueManager(job_queue)

        subst_list = self._ert.getDataKW()
        path_fmt = self._ert.getModelConfig().getRunpathFormat()
        jobname_fmt = self._ert.getModelConfig().getJobnameFormat()

        self._run_context = ErtRunContext(EnkfRunType.ENSEMBLE_EXPERIMENT,
                                          sim_fs, None, mask, path_fmt,
                                          jobname_fmt, subst_list, itr)
        # fill in the missing geo_id data
        for sim_id, (geo_id, _) in enumerate(case_data):
            if mask[sim_id]:
                run_arg = self._run_context[sim_id]
                run_arg.geo_id = geo_id

        self._ert.getEnkfSimulationRunner().createRunPath(self._run_context)
        self._sim_thread = self._run_simulations_simple_step()

        # Wait until the queue is active before we finish the creation
        # to ensure sane job status while running
        while self.isRunning() and not self._queue_manager.isRunning():
            sleep(0.1)

    def get_run_args(self, iens):
        '''
        raises an  exception if no iens simulation found

        :param iens: realization number
        :return: run_args for the realization
        '''
        for run_arg in self._run_context:
            if run_arg is not None and run_arg.iens == iens:
                return run_arg
        raise KeyError("No such simulation: %s" % iens)

    def _run_simulations_simple_step(self):
        sim_thread = Thread(target=lambda: self._ert.getEnkfSimulationRunner(
        ).runSimpleStep(self._queue_manager.queue, self._run_context))
        sim_thread.start()
        return sim_thread

    def __len__(self):
        return self._mask.count()

    def isRunning(self):
        # TODO: Should separate between running jobs and having loaded all data
        return self._sim_thread.is_alive() or self._queue_manager.isRunning()

    def getNumPending(self):
        return self._queue_manager.getNumPending()

    def getNumRunning(self):
        return self._queue_manager.getNumRunning()

    def getNumSuccess(self):
        return self._queue_manager.getNumSuccess()

    def getNumFailed(self):
        return self._queue_manager.getNumFailed()

    def getNumWaiting(self):
        return self._queue_manager.getNumWaiting()

    def didRealizationSucceed(self, iens):
        queue_index = self.get_run_args(iens).getQueueIndex()
        return self._queue_manager.didJobSucceed(queue_index)

    def didRealizationFail(self, iens):
        # For the purposes of this class, a failure should be anything (killed job, etc) that is not an explicit success.
        return not self.didRealizationSucceed(iens)

    def isRealizationQueued(self, iens):
        # an exception will be raised if it's not queued
        self.get_run_args(iens)
        return True

    def isRealizationFinished(self, iens):
        run_arg = self.get_run_args(iens)

        if run_arg.isSubmitted():
            queue_index = run_arg.getQueueIndex()
            return self._queue_manager.isJobComplete(queue_index)
        else:
            return False

    def __repr__(self):
        running = 'running' if self.isRunning() else 'not running'
        numRunn = self.getNumRunning()
        numSucc = self.getNumSuccess()
        numFail = self.getNumFailed()
        numWait = self.getNumWaiting()
        fmt = '%s, #running = %d, #success = %d, #failed = %d, #waiting = %d'
        fmt = fmt % (running, numRunn, numSucc, numFail, numWait)
        return 'SimulationContext(%s)' % fmt

    def get_sim_fs(self):
        return self._run_context.get_sim_fs()

    def get_run_context(self):
        return self._run_context

    def stop(self):
        self._queue_manager.stop_queue()
        self._sim_thread.join()

    def job_progress(self, iens):
        """Will return a detailed progress of the job.

        The progress report is obtained by reading a file from the filesystem,
        that file is typically created by another process running on another
        machine, and reading might fail due to NFS issues, simultanoues write
        and so on. If loading valid json fails the function will sleep 0.10
        seconds and retry - eventually giving up and returning None. Also for
        jobs which have not yet started the method will return None.

        When the method succeeds in reading the progress file from the file
        system the return value will be an object with properties like this:|

           progress.start_time
           progress.end_time
           progress.run_id
           progress.jobs =[ (job1.name, job1.start_time, job1.end_time, job1.status, job1.error_msg),
                             (job2.name, job2.start_time, job2.end_time, job2.status, job2.error_msg),
                              ....
                             (jobN.name, jobN.start_time, jobN.end_time, jobN.status, jobN.error_msg) ]

        """
        run_arg = self.get_run_args(iens)

        try:
            # will throw if not yet submitted (is in a limbo state)
            queue_index = run_arg.getQueueIndex()
        except ValueError:
            return None
        if self._queue_manager.isJobWaiting(queue_index):
            return None

        return ForwardModelStatus.load(run_arg.runpath)

    def run_path(self, iens):
        """
        Will return the path to the simulation.
        """
        return self.get_run_args(iens).runpath

    def job_status(self, iens):
        """Will query the queue system for the status of the job.
        """
        run_arg = self.get_run_args(iens)
        try:
            queue_index = run_arg.getQueueIndex()
        except ValueError:
            return None
        return self._queue_manager.getJobStatus(queue_index)