示例#1
0
def test_monitor_failing_ensemble(make_ee_config, unused_tcp_port):
    ensemble = TestEnsemble(iter=1, reals=2, steps=2, jobs=2)
    ensemble.addFailJob(real=1, step=0, job=1)
    ee_config = make_ee_config(use_token=False, generate_cert=False)
    ee = EnsembleEvaluator(
        ensemble,
        ee_config,
        0,
        ee_id="ee-0",
    )
    with ee.run():
        pass
    with NarrativeProxy(
            monitor_failing_ensemble.on_uri(
                f"ws://localhost:{unused_tcp_port}")).proxy(
                    ee_config.url) as port:
        with ee_monitor.create("localhost", port, "ws", None, None) as monitor:
            for event in monitor.track():
                if event["type"] == identifiers.EVTYPE_EE_SNAPSHOT:
                    ensemble.start()
                if (event.data and event.data.get(identifiers.STATUS)
                        == ENSEMBLE_STATE_STOPPED):
                    monitor.signal_done()

    ensemble.join()
示例#2
0
def test_monitor_failing_evaluation(make_ee_config):
    ee_config = make_ee_config(use_token=False, generate_cert=False)
    ensemble = TestEnsemble(iter=1, reals=1, steps=1, jobs=1)
    ensemble.with_failure()
    ee = EnsembleEvaluator(
        ensemble,
        ee_config,
        0,
        ee_id="ee-0",
    )
    ee.run()
    with NarrativeProxy(monitor_failing_evaluation().on_uri(
            f"ws://localhost:{ee_config.port}")).proxy(ee_config.url):
        with ee_monitor.create(ee_config.get_connection_info()) as monitor:
            for event in monitor.track():
                if event["type"] == identifiers.EVTYPE_EE_SNAPSHOT:
                    ensemble.start()
                if (event.data and event.data.get(identifiers.STATUS)
                        == ENSEMBLE_STATE_FAILED):
                    monitor.signal_done()

    ensemble.join()
示例#3
0
def test_monitor_successful_ensemble(make_ee_config):
    ensemble = TestEnsemble(iter=1, reals=2, steps=2, jobs=2)
    ensemble.addFailJob(real=1, step=0, job=1)
    ee_config = make_ee_config(use_token=False, generate_cert=False)
    ee = EnsembleEvaluator(
        ensemble,
        ee_config,
        0,
        ee_id="ee-0",
    )

    ee.run()
    with NarrativeProxy(monitor_successful_ensemble()).proxy(ee_config.url):
        with ee_monitor.create(ee_config.get_connection_info()) as monitor:
            for event in monitor.track():
                if event["type"] == identifiers.EVTYPE_EE_SNAPSHOT:
                    ensemble.start()
                if (event.data and event.data.get(identifiers.STATUS)
                        == ENSEMBLE_STATE_STOPPED):
                    monitor.signal_done()

    ensemble.join()
示例#4
0
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    with evaluator.run() as monitor:
        events = monitor.track()

        host = evaluator._config.host
        port = evaluator._config.port

        # first snapshot before any event occurs
        snapshot_event = next(events)
        snapshot = Snapshot(snapshot_event.data)
        assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
        # two dispatchers connect
        with Client(host, port, "/dispatch") as dispatch1, Client(
                host, port, "/dispatch") as dispatch2:

            # first dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch1,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                "/ert/ee/0/real/0/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING

            # second dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                "/ert/ee/0/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_RUNNING

            # second dispatcher informs that job 0 is done
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_SUCCESS,
                "/ert/ee/0/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED

            # second dispatcher informs that job 1 is failed
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_FAILURE,
                "/ert/ee/0/real/1/step/0/job/1",
                "event_job_1_fail",
                {identifiers.ERROR_MSG: "error"},
            )
            snapshot = Snapshot(next(events).data)
            assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE

            # a second monitor connects
            with ee_monitor.create(host, port) as monitor2:
                events2 = monitor2.track()
                snapshot = Snapshot(next(events2).data)
                assert snapshot.get_status() == ENSEMBLE_STATE_STARTED
                assert snapshot.get_job("0", "0",
                                        "0").status == JOB_STATE_RUNNING
                assert snapshot.get_job("1", "0",
                                        "0").status == JOB_STATE_FINISHED

                # one monitor requests that server exit
                monitor.signal_cancel()

                # both monitors should get a terminated event
                terminated = next(events)
                terminated2 = next(events2)
                assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
                assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

                for e in [events, events2]:
                    for _ in e:
                        assert False, "got unexpected event from monitor"
示例#5
0
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    with evaluator.run() as monitor:
        events = monitor.track()
        token = evaluator._config.token
        cert = evaluator._config.cert

        url = evaluator._config.url
        # first snapshot before any event occurs
        snapshot_event = next(events)
        print(snapshot_event)
        snapshot = Snapshot(snapshot_event.data)
        assert snapshot.status == ENSEMBLE_STATE_UNKNOWN
        # two dispatchers connect
        with Client(
                url + "/dispatch",
                cert=cert,
                token=token,
                max_retries=1,
                timeout_multiplier=1,
        ) as dispatch1, Client(
                url + "/dispatch",
                cert=cert,
                token=token,
                max_retries=1,
                timeout_multiplier=1,
        ) as dispatch2:

            # first dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch1,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                f"/ert/ee/{evaluator._ee_id}/real/0/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 0 is running
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_RUNNING,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 0 is done
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_SUCCESS,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/0",
                "event1",
                {"current_memory_usage": 1000},
            )

            # second dispatcher informs that job 1 is failed
            send_dispatch_event(
                dispatch2,
                identifiers.EVTYPE_FM_JOB_FAILURE,
                f"/ert/ee/{evaluator._ee_id}/real/1/step/0/job/1",
                "event_job_1_fail",
                {identifiers.ERROR_MSG: "error"},
            )
            evt = next(events)
            print(evt)
            snapshot = Snapshot(evt.data)
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING
            assert snapshot.get_job("1", "0", "1").status == JOB_STATE_FAILURE

        # a second monitor connects
        with ee_monitor.create(
                evaluator._config.get_connection_info()) as monitor2:
            events2 = monitor2.track()
            full_snapshot_event = next(events2)
            assert full_snapshot_event[
                "type"] == identifiers.EVTYPE_EE_SNAPSHOT
            snapshot = Snapshot(full_snapshot_event.data)
            assert snapshot.status == ENSEMBLE_STATE_UNKNOWN
            assert snapshot.get_job("0", "0", "0").status == JOB_STATE_RUNNING
            assert snapshot.get_job("1", "0", "0").status == JOB_STATE_FINISHED

            # one monitor requests that server exit
            monitor.signal_cancel()

            # both monitors should get a terminated event
            terminated = next(events)
            terminated2 = next(events2)
            assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
            assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

            for e in [events, events2]:
                for undexpected_event in e:
                    assert (
                        False
                    ), f"got unexpected event {undexpected_event} from monitor"
def test_dispatchers_can_connect_and_monitor_can_shut_down_evaluator(
        evaluator):
    monitor = evaluator.run()
    events = monitor.track()

    host = evaluator._config.host
    port = evaluator._config.port

    # first snapshot before any event occurs
    snapshot_event = next(events)
    snapshot = Snapshot(snapshot_event.data)
    assert snapshot.get_status() == "Unknown"
    # two dispatchers connect
    with Client(host, port,
                "/dispatch") as dispatch1, Client(host, port,
                                                  "/dispatch") as dispatch2:

        # first dispatcher informs that job 0 is running
        send_dispatch_event(
            dispatch1,
            identifiers.EVTYPE_FM_JOB_RUNNING,
            "/ert/ee/0/real/0/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running"

        # second dispatcher informs that job 0 is running
        send_dispatch_event(
            dispatch2,
            identifiers.EVTYPE_FM_JOB_RUNNING,
            "/ert/ee/0/real/1/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Running"

        # second dispatcher informs that job 0 is done
        send_dispatch_event(
            dispatch2,
            identifiers.EVTYPE_FM_JOB_SUCCESS,
            "/ert/ee/0/real/1/stage/0/step/0/job/0",
            "event1",
            {"current_memory_usage": 1000},
        )
        snapshot = Snapshot(next(events).data)
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished"

        # a second monitor connects
        monitor2 = ee_monitor.create(host, port)
        events2 = monitor2.track()
        snapshot = Snapshot(next(events2).data)
        assert snapshot.get_status() == "Unknown"
        assert snapshot.get_job("0", "0", "0", "0")["status"] == "Running"
        assert snapshot.get_job("1", "0", "0", "0")["status"] == "Finished"

    # one monitor requests that server exit
    monitor.signal_cancel()

    # both monitors should get a terminated event
    terminated = next(events)
    terminated2 = next(events2)
    assert terminated["type"] == identifiers.EVTYPE_EE_TERMINATED
    assert terminated2["type"] == identifiers.EVTYPE_EE_TERMINATED

    for e in [events, events2]:
        for _ in e:
            assert False, "got unexpected event from monitor"