Пример #1
0
    def _add_snapshot(self, snapshot: Snapshot, iter_: int):
        # Parts of the metadata will be used in the underlying data model,
        # which is be mutable, hence we thaw it here—once.
        metadata = pyrsistent.thaw(snapshot.data()[ids.METADATA])
        snapshot_tree = Node(
            iter_,
            {
                ids.STATUS: snapshot.data()[ids.STATUS],
                SORTED_REALIZATION_IDS: metadata[SORTED_REALIZATION_IDS],
                SORTED_JOB_IDS: metadata[SORTED_JOB_IDS],
            },
            NodeType.ITER,
        )
        for real_id in snapshot_tree.data[SORTED_REALIZATION_IDS]:
            real = snapshot.data()[ids.REALS][real_id]
            real_node = Node(
                real_id,
                {
                    ids.STATUS:
                    real[ids.STATUS],
                    ids.ACTIVE:
                    real[ids.ACTIVE],
                    REAL_JOB_STATUS_AGGREGATED:
                    metadata[REAL_JOB_STATUS_AGGREGATED][real_id],
                    REAL_STATUS_COLOR:
                    metadata[REAL_STATUS_COLOR][real_id],
                },
                NodeType.REAL,
            )
            snapshot_tree.add_child(real_node)
            for step_id, step in real[ids.STEPS].items():
                step_node = Node(step_id, {ids.STATUS: step[ids.STATUS]},
                                 NodeType.STEP)
                real_node.add_child(step_node)
                for job_id in metadata[SORTED_JOB_IDS]:
                    job = step[ids.JOBS][job_id]
                    job_dict = dict(job)
                    job_dict[ids.DATA] = job.data
                    job_node = Node(job_id, job_dict, NodeType.JOB)
                    step_node.add_child(job_node)

        if iter_ in self.root.children:
            self.modelAboutToBeReset.emit()
            self.root.children[iter_] = snapshot_tree
            snapshot_tree.parent = self.root
            self.modelReset.emit()
            return

        parent = QModelIndex()
        next_iter = len(self.root.children)
        self.beginInsertRows(parent, next_iter, next_iter)
        self.root.add_child(snapshot_tree)
        self.root.children[iter_] = snapshot_tree
        self.rowsInserted.emit(parent, snapshot_tree.row(),
                               snapshot_tree.row())
Пример #2
0
    def test_print_progress(self):
        out = StringIO()
        monitor = Monitor(out=out)
        sd = SnapshotDict(status="")
        for i in range(0, 100):
            status = REALIZATION_STATE_FINISHED if i < 50 else REALIZATION_STATE_WAITING
            sd.reals[i] = Realization(status=status, active=True)
        monitor._snapshots[0] = Snapshot(sd.dict())
        monitor._start_time = datetime.now()
        general_event = _UpdateEvent(
            phase_name="Test Phase",
            current_phase=0,
            total_phases=2,
            progress=0.5,
            indeterminate=False,
            iteration=0,
        )

        monitor._print_progress(general_event)

        self.assertEqual(
            """\r
    --> Test Phase

    1/2 |███████████████               | 50% Running time: 0 seconds

    Waiting        50/100
    Pending         0/100
    Running         0/100
    Failed          0/100
    Finished       50/100
    Unknown         0/100
""",
            out.getvalue(),
        )
Пример #3
0
    def _create_snapshot(self):
        reals = {}
        for real in self.get_active_reals():
            reals[str(real.get_iens())] = Realization(
                active=True,
                status=state.REALIZATION_STATE_WAITING,
            )
            for step in real.get_steps():
                reals[str(real.get_iens())].steps[str(step.get_id())] = Step(
                    status=state.STEP_STATE_UNKNOWN
                )
                for job in step.get_jobs():
                    reals[str(real.get_iens())].steps[str(step.get_id())].jobs[
                        str(job.get_id())
                    ] = Job(
                        status=state.JOB_STATE_START,
                        data={},
                        name=job.get_name(),
                    )
        top = SnapshotDict(
            reals=reals,
            status=state.ENSEMBLE_STATE_UNKNOWN,
            metadata=self.get_metadata(),
        )

        return Snapshot(top.dict())
Пример #4
0
    def _full_snapshot_event(self,
                             iter_) -> typing.Optional[FullSnapshotEvent]:
        """Return a FullSnapshotEvent if it was possible to create a snapshot.
        Return None if not, indicating that there should be no event."""
        run_context = self._model.get_run_context()
        detailed_progress = self._model.getDetailedProgress()
        if detailed_progress == _THE_EMPTY_DETAILED_PROGRESS:
            return None
        snapshot_dict = self._create_snapshot_dict(run_context,
                                                   detailed_progress, iter_)
        if not snapshot_dict:
            return None

        snapshot = Snapshot(snapshot_dict.dict())

        self._set_iter_snapshot(iter_, snapshot)

        return FullSnapshotEvent(
            phase_name=self._model.getPhaseName(),
            current_phase=self._model.currentPhase(),
            total_phases=self._model.phaseCount(),
            indeterminate=self._model.isIndeterminate(),
            progress=self._progress(),
            iteration=iter_,
            snapshot=snapshot,
        )
Пример #5
0
    def track(self):
        while True:
            event = self._work_queue.get()
            if isinstance(event, str):
                try:
                    if event == EvaluatorTracker.DONE:
                        yield EndEvent(
                            failed=self._model.hasRunFailed(),
                            failed_msg=self._model.getFailMessage(),
                        )
                    elif event == EvaluatorTracker.CONNECTION_ERROR:
                        yield EndEvent(
                            failed=True,
                            failed_msg="Connection error",
                        )
                except GeneratorExit:
                    # consumers may exit at this point, make sure the last
                    # task is marked as done
                    pass
                self._work_queue.task_done()
                break
            elif event["type"] == ids.EVTYPE_EE_SNAPSHOT:
                iter_ = event.data["iter"]
                snapshot = Snapshot(event.data)
                self._iter_snapshot[iter_] = snapshot
                yield FullSnapshotEvent(
                    phase_name=self._model.getPhaseName(),
                    current_phase=self._model.currentPhase(),
                    total_phases=self._model.phaseCount(),
                    indeterminate=self._model.isIndeterminate(),
                    progress=self._progress(),
                    iteration=iter_,
                    snapshot=snapshot,
                )
            elif event["type"] == ids.EVTYPE_EE_SNAPSHOT_UPDATE:
                iter_ = event.data["iter"]
                if iter_ not in self._iter_snapshot:
                    raise OutOfOrderSnapshotUpdateException(
                        f"got {ids.EVTYPE_EE_SNAPSHOT_UPDATE} without having stored snapshot for iter {iter_}"
                    )
                partial = PartialSnapshot(
                    self._iter_snapshot[iter_]).from_cloudevent(event)
                self._iter_snapshot[iter_].merge_event(partial)
                yield SnapshotUpdateEvent(
                    phase_name=self._model.getPhaseName(),
                    current_phase=self._model.currentPhase(),
                    total_phases=self._model.phaseCount(),
                    indeterminate=self._model.isIndeterminate(),
                    progress=self._progress(),
                    iteration=iter_,
                    partial_snapshot=partial,
                )

            self._work_queue.task_done()
Пример #6
0
def full_snapshot() -> Snapshot:
    real = Realization(
        status=REALIZATION_STATE_UNKNOWN,
        active=True,
        steps={
            "0":
            Step(
                status="",
                jobs={
                    "0":
                    Job(
                        start_time=dt.now(),
                        end_time=dt.now(),
                        name="poly_eval",
                        status=JOB_STATE_START,
                        error="error",
                        stdout="std_out_file",
                        stderr="std_err_file",
                        data={
                            CURRENT_MEMORY_USAGE: "123",
                            MAX_MEMORY_USAGE: "312",
                        },
                    ),
                    "1":
                    Job(
                        start_time=dt.now(),
                        end_time=dt.now(),
                        name="poly_postval",
                        status=JOB_STATE_START,
                        error="error",
                        stdout="std_out_file",
                        stderr="std_err_file",
                        data={
                            CURRENT_MEMORY_USAGE: "123",
                            MAX_MEMORY_USAGE: "312",
                        },
                    ),
                },
            )
        },
    )
    snapshot = SnapshotDict(
        status=ENSEMBLE_STATE_STARTED,
        reals={},
    )
    for i in range(0, 100):
        snapshot.reals[str(i)] = copy.deepcopy(real)

    return Snapshot(snapshot.dict())
Пример #7
0
def snapshot_to_tree(snapshot: Snapshot, iter_: int) -> Node:
    iter_node = Node(iter_, {ids.STATUS: snapshot.get_status()}, NodeType.ITER)
    snapshot_d = SnapshotDict(**snapshot.to_dict())
    for real_id in sorted(snapshot_d.reals, key=int):
        real = snapshot_d.reals[real_id]
        real_node = Node(
            real_id,
            {
                ids.STATUS: real.status,
                ids.ACTIVE: real.active
            },
            NodeType.REAL,
        )
        iter_node.add_child(real_node)
        for step_id, step in real.steps.items():
            step_node = Node(step_id, {ids.STATUS: step.status}, NodeType.STEP)
            real_node.add_child(step_node)
            for job_id in sorted(step.jobs, key=int):
                job = step.jobs[job_id]
                job_dict = dict(job)
                job_dict[ids.DATA] = job.data
                job_node = Node(job_id, job_dict, NodeType.JOB)
                step_node.add_child(job_node)
    return iter_node
Пример #8
0
    def test_legends(self):
        monitor = Monitor(out=StringIO())
        sd = SnapshotDict(status="")
        for i in range(0, 100):
            status = REALIZATION_STATE_FINISHED if i < 10 else REALIZATION_STATE_RUNNING
            sd.reals[i] = Realization(status=status, active=True)
        monitor._snapshots[0] = Snapshot(sd.dict())
        legends = monitor._get_legends()

        self.assertEqual(
            """    Waiting         0/100
    Pending         0/100
    Running        90/100
    Failed          0/100
    Finished       10/100
    Unknown         0/100
""",
            legends,
        )
Пример #9
0
    def test_print_progress(self):
        out = StringIO()
        monitor = Monitor(out=out)
        sd = SnapshotDict(status="")
        for i in range(0, 100):
            status = REALIZATION_STATE_FINISHED if i < 50 else REALIZATION_STATE_WAITING
            sd.reals[i] = Realization(status=status, active=True)
        monitor._snapshots[0] = Snapshot(sd.dict())
        monitor._start_time = datetime.now()
        general_event = _UpdateEvent(
            phase_name="Test Phase",
            current_phase=0,
            total_phases=2,
            progress=0.5,
            indeterminate=False,
            iteration=0,
        )

        monitor._print_progress(general_event)

        # For some reason, `tqdm` adds an extra line containing a progress-bar,
        # even though this test only calls it once.
        # I suspect this has something to do with the way `tqdm` does refresh,
        # but do not know how to fix it.
        # Seems not be a an issue when used normally.
        expected = """    --> Test Phase


    |                                                                                      |   0% it
    1/2 |##############################5                              |  50% Running time: 0 seconds

    Waiting        50/100
    Pending         0/100
    Running         0/100
    Failed          0/100
    Finished       50/100
    Unknown         0/100

"""

        assert out.getvalue().replace("\r", "\n") == expected
Пример #10
0
    def create_snapshot(ensemble):
        reals = {}
        for real in ensemble.get_active_reals():
            reals[str(real.get_iens())] = _Realization(
                active=True,
                start_time=None,
                end_time=None,
                status="Waiting",
            )
            for stage in real.get_stages():
                reals[str(real.get_iens())].stages[str(
                    stage.get_id())] = _Stage(
                        status="Unknown",
                        start_time=None,
                        end_time=None,
                    )
                for step in stage.get_steps():
                    reals[str(real.get_iens())].stages[str(
                        stage.get_id())].steps[str(step.get_id())] = _Step(
                            status="Unknown", start_time=None, end_time=None)
                    for job in step.get_jobs():
                        reals[str(real.get_iens())].stages[str(
                            stage.get_id())].steps[str(
                                step.get_id())].jobs[str(job.get_id())] = _Job(
                                    status="Pending",
                                    data={},
                                    start_time=None,
                                    end_time=None,
                                    name=job.get_name(),
                                )
        top = _SnapshotDict(
            reals=reals,
            status="Unknown",
            forward_model=_ForwardModel(step_definitions={}),
            metadata=ensemble.get_metadata(),
        )

        return Snapshot(top.dict())
Пример #11
0
    def _batch(self, events):
        batch: List[CloudEvent] = []

        for event in events:
            if event["type"] == ids.EVTYPE_EE_SNAPSHOT:

                # A new iteration, so ensure any updates for the previous one,
                # is emitted.
                if batch:
                    yield self._flush(batch)
                batch = []

                iter_ = event.data["iter"]
                snapshot = Snapshot(event.data)
                self._iter_snapshot[iter_] = snapshot
                yield FullSnapshotEvent(
                    phase_name=self._model.getPhaseName(),
                    current_phase=self._model.currentPhase(),
                    total_phases=self._model.phaseCount(),
                    indeterminate=self._model.isIndeterminate(),
                    progress=self._progress(),
                    iteration=iter_,
                    snapshot=snapshot,
                )
                self._work_queue.task_done()
            elif event["type"] == ids.EVTYPE_EE_SNAPSHOT_UPDATE:
                iter_ = event.data["iter"]
                if iter_ not in self._iter_snapshot:
                    raise OutOfOrderSnapshotUpdateException(
                        f"got {ids.EVTYPE_EE_SNAPSHOT_UPDATE} without having stored snapshot for iter {iter_}"
                    )
                batch.append(event)
            else:
                raise ValueError("got unexpected event type", event["type"])
        if batch:
            yield self._flush(batch)
Пример #12
0
def test_monitor_stop(evaluator):
    with evaluator.run() as monitor:
        for event in monitor.track():
            snapshot = Snapshot(event.data)
            break
    assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
Пример #13
0
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    with evaluator.run() as monitor:
        events = monitor.track()

        host = evaluator._config.host
        port = evaluator._config.port

        # first snapshot before any event occurs
        snapshot_event = next(events)
        snapshot = Snapshot(snapshot_event.data)
        assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
        # two dispatchers connect
        with Client(host, port, "/dispatch") as dispatch1, Client(
                host, port, "/dispatch") as dispatch2:

            # first dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch1,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                "/ert/ee/0/real/0/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING

            # second dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                "/ert/ee/0/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_RUNNING

            # second dispatcher informs that job 0 is done
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_SUCCESS,
                "/ert/ee/0/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED

            # second dispatcher informs that job 1 is failed
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_FAILURE,
                "/ert/ee/0/real/1/step/0/job/1",
                "event_job_1_fail",
                {identifiers.ERROR_MSG: "error"},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE

            # a second monitor connects
            with ee_monitor.create(host, port) as monitor2:
                events2 = monitor2.track()
                snapshot = Snapshot(next(events2).data)
                assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
                assert snapshot.get_job("0", "0",
                                        "0").status == JOB_STATE_RUNNING
                assert snapshot.get_job("1", "0",
                                        "0").status == JOB_STATE_FINISHED

                # one monitor requests that server exit
                monitor.signal_cancel()

                # both monitors should get a terminated event
                terminated = next(events)
                terminated2 = next(events2)
                assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
                assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

                for e in [events, events2]:
                    for _ in e:
                        assert False, "got unexpected event from monitor"
Пример #14
0
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    monitor = evaluator.run()
    events = monitor.track()

    host = evaluator._config.host
    port = evaluator._config.port

    # first snapshot before any event occurs
    snapshot_event = next(events)
    snapshot = Snapshot(snapshot_event.data)
    assert snapshot.get_status() == "Unknown"
    # two dispatchers connect
    with Client(host, port,
                "/dispatch") as dispatch1, Client(host, port,
                                                  "/dispatch") as dispatch2:

        # first dispatcher informs that job 0 is running
        send_dispatch_event(
            dispatch1,
            identifiers.EVTYPE_FM_JOB_RUNNING,
            "/ert/ee/0/real/0/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running"

        # second dispatcher informs that job 0 is running
        send_dispatch_event(
            dispatch2,
            identifiers.EVTYPE_FM_JOB_RUNNING,
            "/ert/ee/0/real/1/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Running"

        # second dispatcher informs that job 0 is done
        send_dispatch_event(
            dispatch2,
            identifiers.EVTYPE_FM_JOB_SUCCESS,
            "/ert/ee/0/real/1/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished"

        # a second monitor connects
        monitor2 = ee_monitor.create(host, port)
        events2 = monitor2.track()
        snapshot = Snapshot(next(events2).data)
        assert snapshot.get_status() == "Unknown"
        assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running"
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished"

    # one monitor requests that server exit
    monitor.signal_cancel()

    # both monitors should get a terminated event
    terminated = next(events)
    terminated2 = next(events2)
    assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
    assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

    for e in [events, events2]:
        for _ in e:
            assert False, "got unexpected event from monitor"
Пример #15
0
def test_monitor_stop(evaluator):
    monitor = evaluator.run()
    events = monitor.track()
    snapshot = Snapshot(next(events).data)
    assert snapshot.get_status() == "Unknown"
Пример #16
0
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    with evaluator.run() as monitor:
        events = monitor.track()
        host = evaluator._config.host
        port = evaluator._config.port
        token = evaluator._config.token
        cert = evaluator._config.cert

        url = evaluator._config.url
        # first snapshot before any event occurs
        snapshot_event = next(events)
        snapshot = Snapshot(snapshot_event.data)
        assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN
        # two dispatchers connect
        with Client(
                url + "/dispatch",
                cert=cert,
                token=token,
                max_retries=1,
                timeout_multiplier=1,
        ) as dispatch1, Client(
                url + "/dispatch",
                cert=cert,
                token=token,
                max_retries=1,
                timeout_multiplier=1,
        ) as dispatch2:

            # first dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch1,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                f"/ert/ee/{evaluator._ee_id}/real/0/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 0 is done
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_SUCCESS,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 1 is failed
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_FAILURE,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/1",
                "event_job_1_fail",
                {identifiers.ERROR_MSG: "error"},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING
            assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE

        # a second monitor connects
        with ee_monitor.create(host, port, "wss", cert, token) as monitor2:
            events2 = monitor2.track()
            full_snapshot_event = next(events2)
            assert full_snapshot_event[
                "type"] == identifiers.EVTYPE_EE_SNAPSHOT
            snapshot = Snapshot(full_snapshot_event.data)
            assert snapshot.get_status() == ENSEMBLE_STATE_UNKNOWN
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED

            # one monitor requests that server exit
            monitor.signal_cancel()

            # both monitors should get a terminated event
            terminated = next(events)
            terminated2 = next(events2)
            assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
            assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

            for e in [events, events2]:
                for undexpected_event in e:
                    assert (
                        False
                    ), f"got unexpected event {undexpected_event} from monitor"