def test_update_partial_from_multiple_cloudevents(snapshot): partial = PartialSnapshot(snapshot) partial.from_cloudevent( CloudEvent( { "id": "0", "type": ids.EVTYPE_FM_JOB_RUNNING, "source": "/real/0/step/0/job/0", } ) ) partial.from_cloudevent( CloudEvent( { "id": "0", "type": ids.EVTYPE_FM_JOB_FAILURE, "source": "/real/0/step/0/job/0", }, {ids.ERROR_MSG: "failed"}, ) ) partial.from_cloudevent( CloudEvent( { "id": "1", "type": ids.EVTYPE_FM_JOB_SUCCESS, "source": "/real/0/step/0/job/1", } ) ) jobs = partial.to_dict()["reals"]["0"]["steps"]["0"]["jobs"] jobs["0"]["status"] == state.JOB_STATE_FAILURE jobs["1"]["status"] == state.JOB_STATE_FINISHED
def send_dispatch_event(client, event_type, source, event_id, data): event1 = CloudEvent({ "type": event_type, "source": source, "id": event_id }, data) client.send(to_json(event1))
async def test_run_and_cancel_legacy_ensemble(tmpdir, unused_tcp_port, make_ensemble_builder): num_reals = 10 conf_file = Path(tmpdir / CONFIG_FILE) with tmpdir.as_cwd(): with open(conf_file, "w") as f: f.write(f'port: "{unused_tcp_port}"\n') ensemble = make_ensemble_builder(tmpdir, num_reals, 2).build() config = load_config(conf_file) evaluator = EnsembleEvaluator(ensemble, config, ee_id="1") thread = threading.Thread( name="test_eval", target=evaluator.run_and_get_successful_realizations, args=(), ) thread.start() # Wait for evaluator to start await wait_for_ws(config["url"], 10) # Send termination request to the evaluator async with websockets.connect(config["client_url"]) as websocket: out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_EE_USER_CANCEL, "source": "/ert/test/0", "id": "ID", }) await websocket.send(to_json(out_cloudevent)) thread.join() assert evaluator._snapshot.get_status() == "Cancelled"
def _cancel(self): logger.debug("cancelling, waiting for wakeup...") self._allow_cancel.wait() logger.debug("got wakeup, killing all jobs...") self._job_queue.kill_all_jobs() logger.debug("cancelling futures...") if self._aggregate_future.cancelled(): logger.debug("future was already cancelled") return self._aggregate_future.cancel() logger.debug("cancelled") out_cloudevent = CloudEvent( { "type": identifiers.EVTYPE_ENSEMBLE_CANCELLED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), } ) loop = asyncio.new_event_loop() loop.run_until_complete( self.send_cloudevent(self._config.dispatch_uri, out_cloudevent) ) loop.close()
async def _send_snapshot_update(self, snapshot_mutate_event): self._snapshot.merge_event(snapshot_mutate_event) out_cloudevent = CloudEvent( { "type": identifiers.EVTYPE_EE_SNAPSHOT_UPDATE, "source": f"/ert/ee/{self._ee_id}", "id": self.event_index(), }, snapshot_mutate_event.to_dict(), ) out_cloudevent.data["iter"] = self._iter out_msg = to_json( out_cloudevent, data_marshaller=serialization.evaluator_marshaller ).decode() if out_msg and self._clients: await asyncio.wait([client.send(out_msg) for client in self._clients])
def terminate_message(self): out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_EE_TERMINATED, "source": f"/ert/ee/{self._ee_id}", "id": self.event_index(), }) message = to_json(out_cloudevent).decode() return message
def on_timeout(callback_args: Sequence[Any]) -> None: run_args: RunArg = callback_args[0] timeout_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_FM_STEP_TIMEOUT, "source": f"/ert/ee/{self._ee_id}/real/{run_args.iens}/step/0", "id": str(uuid.uuid1()), }) timeout_queue.put_nowait(timeout_cloudevent)
async def handle_result(self, websocket, path): if self._result is None: event = CloudEvent( { "type": identifiers.EVTYPE_EE_RESULT_NOT_READY, "source": f"/ert/ee/{self._ee_id}", }, ) else: event = CloudEvent( { "type": identifiers.EVTYPE_EE_RESULT, "source": f"/ert/ee/{self._ee_id}", "datacontenttype": "application/octet-stream", }, cloudpickle.dumps(self._result), ) await websocket.send(to_json(event))
def signal_cancel(self): logger.debug(f"monitor-{self._id} asking server to cancel...") out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_EE_USER_CANCEL, "source": f"/ert/monitor/{self._id}", "id": str(uuid.uuid1()), }) self._send_event(out_cloudevent) logger.debug(f"monitor-{self._id} asked server to cancel")
def signal_done(self): logger.debug(f"monitor-{self._id} informing server monitor is done...") out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_EE_USER_DONE, "source": f"/ert/monitor/{self._id}", "id": str(uuid.uuid1()), }) self._send_event(out_cloudevent) logger.debug(f"monitor-{self._id} informed server monitor is done")
def create_snapshot_msg(ee_id, snapshot, event_index): data = snapshot.to_dict() out_cloudevent = CloudEvent( { "type": identifiers.EVTYPE_EE_SNAPSHOT, "source": f"/ert/ee/{ee_id}", "id": event_index, }, data, ) return to_json(out_cloudevent).decode()
def terminate_message(self): out_cloudevent = CloudEvent( { "type": identifiers.EVTYPE_EE_TERMINATED, "source": f"/ert/ee/{self._ee_id}", "id": str(self.event_index()), } ) message = to_json( out_cloudevent, data_marshaller=serialization.evaluator_marshaller ).decode() return message
async def mock_writer(filename, times=2): async with aiofiles.open(filename, mode="a") as f: for r in range(0, times): e = CloudEvent({ "source": "/mock", "id": f"time-{r}", "type": "fake", "data": { "time": r }, }) await f.write(to_json(e).decode() + "\n") await asyncio.sleep(0.2) await f.write(EVTYPE_FM_STEP_SUCCESS)
def create_snapshot_msg(ee_id, iter_, snapshot, event_index): data = snapshot.to_dict() data["iter"] = iter_ out_cloudevent = CloudEvent( { "type": identifiers.EVTYPE_EE_SNAPSHOT, "source": f"/ert/ee/{ee_id}", "id": event_index, }, data, ) return to_json( out_cloudevent, data_marshaller=serialization.evaluator_marshaller ).decode()
def test_multiple_cloud_events_trigger_non_communicated_change(): """In other words, though we say all steps are finished, we don't explicitly send an event that changes the realization status. It should happen by virtue of the steps being completed.""" snapshot = (SnapshotBuilder().add_step( step_id="0", status="Unknown").build(["0"], status="Unknown")) partial = PartialSnapshot(snapshot) partial.from_cloudevent( CloudEvent({ "id": "0", "type": ids.EVTYPE_FM_STEP_SUCCESS, "source": "/real/0/step/0", })) assert partial.to_dict( )["reals"]["0"]["status"] == state.REALIZATION_STATE_FINISHED
def test_data_marshaller_and_unmarshaller(): data = {"start_time": datetime.datetime.now()} out_cloudevent = CloudEvent( { "type": "com.equinor.ert.ee.snapshot", "source": f"/ert/ee/{0}", "id": 0, }, data, ) ce_to_json = to_json(out_cloudevent, data_marshaller=serialization.evaluator_marshaller) ce_from_json = from_json( ce_to_json, data_unmarshaller=serialization.evaluator_unmarshaller) assert isinstance(ce_from_json.data["start_time"], datetime.datetime) assert out_cloudevent == ce_from_json
def from_http( data: typing.Union[str, bytes], headers: typing.Dict[str, str], data_unmarshaller: types.UnmarshallerType = None, ): """ Unwrap a CloudEvent (binary or structured) from an HTTP request. :param data: the HTTP request body :type data: typing.IO :param headers: the HTTP headers :type headers: typing.Dict[str, str] :param data_unmarshaller: Callable function to map data to a python object e.g. lambda x: x or lambda x: json.loads(x) :type data_unmarshaller: types.UnmarshallerType """ if data_unmarshaller is None: data_unmarshaller = _json_or_string marshall = marshaller.NewDefaultHTTPMarshaller() if converters.is_binary(headers): specversion = headers.get("ce-specversion", None) else: raw_ce = json.loads(data) specversion = raw_ce.get("specversion", None) if specversion is None: raise ValueError("could not find specversion in HTTP request") event_handler = _obj_by_version.get(specversion, None) if event_handler is None: raise ValueError(f"found invalid specversion {specversion}") event = marshall.FromRequest(event_handler(), headers, data, data_unmarshaller=data_unmarshaller) attrs = event.Properties() attrs.pop("data", None) attrs.pop("extensions", None) attrs.update(**event.extensions) return CloudEvent(attrs, event.data)
def _create_cloud_event( self, event_type, data={}, extra_attrs={}, data_marshaller=serialization.evaluator_marshaller, ): if isinstance(data, dict): data["iter"] = self._iter attrs = { "type": event_type, "source": f"/ert/ee/{self._ee_id}", } attrs.update(extra_attrs) out_cloudevent = CloudEvent( attrs, data, ) return to_json(out_cloudevent, data_marshaller=data_marshaller).decode()
def from_http( headers: typing.Dict[str, str], data: typing.Union[str, bytes, None], data_unmarshaller: types.UnmarshallerType = None, ): """ Unwrap a CloudEvent (binary or structured) from an HTTP request. :param headers: the HTTP headers :type headers: typing.Dict[str, str] :param data: the HTTP request body. If set to None, "" or b'', the returned event's data field will be set to None :type data: typing.IO :param data_unmarshaller: Callable function to map data to a python object e.g. lambda x: x or lambda x: json.loads(x) :type data_unmarshaller: types.UnmarshallerType """ if data is None or data == b"": # Empty string will cause data to be marshalled into None data = "" if not isinstance(data, (str, bytes, bytearray)): raise cloud_exceptions.InvalidStructuredJSON( "Expected json of type (str, bytes, bytearray), " f"but instead found type {type(data)}") headers = {key.lower(): value for key, value in headers.items()} if data_unmarshaller is None: data_unmarshaller = _json_or_string marshall = marshaller.NewDefaultHTTPMarshaller() if is_binary(headers): specversion = headers.get("ce-specversion", None) else: try: raw_ce = json.loads(data) except json.decoder.JSONDecodeError: raise cloud_exceptions.MissingRequiredFields( "Failed to read specversion from both headers and data. " f"The following can not be parsed as json: {data}") if hasattr(raw_ce, "get"): specversion = raw_ce.get("specversion", None) else: raise cloud_exceptions.MissingRequiredFields( "Failed to read specversion from both headers and data. " f"The following deserialized data has no 'get' method: {raw_ce}" ) if specversion is None: raise cloud_exceptions.MissingRequiredFields( "Failed to find specversion in HTTP request") event_handler = _obj_by_version.get(specversion, None) if event_handler is None: raise cloud_exceptions.InvalidRequiredFields( f"Found invalid specversion {specversion}") event = marshall.FromRequest(event_handler(), headers, data, data_unmarshaller=data_unmarshaller) attrs = event.Properties() attrs.pop("data", None) attrs.pop("extensions", None) attrs.update(**event.extensions) if event.data == "" or event.data == b"": # TODO: Check binary unmarshallers to debug why setting data to "" # returns an event with data set to None, but structured will return "" data = None else: data = event.data return CloudEvent(attrs, data)
return MagicMock(track=MagicMock(side_effect=_track)) return _mock_ee_monitor @pytest.mark.timeout(60) @pytest.mark.parametrize( "run_model, monitor_events,brm_mutations,expected_progress", [ pytest.param( ERT3RunModel, [ CloudEvent( # zero realizations completed {"source": "/", "type": ids.EVTYPE_EE_SNAPSHOT}, data={ **(build_snapshot(["0", "1", "2"]).to_dict()), "iter": 0, }, ), CloudEvent( {"source": "/", "type": ids.EVTYPE_EE_SNAPSHOT_UPDATE}, data={ **( build_partial(["0"]) # Complete one realizations completed .update_step( "0", "0", Step(status=state.STEP_STATE_SUCCESS) ).to_dict() ), "iter": 0, },
return MagicMock(track=MagicMock(side_effect=_track)) return _mock_ee_monitor @pytest.mark.timeout(60) @pytest.mark.parametrize( "monitor_events,expected_types", [ pytest.param( [ CloudEvent( { "source": "/", "type": ids.EVTYPE_EE_SNAPSHOT }, data={ **(build_snapshot().to_dict()), "iter": 0 }, ), CloudEvent( { "source": "/", "type": ids.EVTYPE_EE_SNAPSHOT_UPDATE }, data={ **build_partial().to_dict(), "iter": 0 }, ), CloudEvent({ "source": "/",
async def _evaluate_inner() -> None: """ This (inner) coroutine does the actual work. It prepares and executes the necessary bookkeeping, prepares and executes the JobQueue, and dispatches pertinent events. Before returning, it always dispatches a CloudEvent describing the final result of executing all its jobs through a JobQueue. """ # Set up the timeout-mechanism timeout_queue: "asyncio.Queue[CloudEvent]" = asyncio.Queue() on_timeout, send_timeout_future = self.setup_timeout_callback( timeout_queue) if not self._ee_id: raise ValueError( f"invalid ensemble evaluator id: {self._ee_id}") if not self._config: raise ValueError("no config") # mypy # event for normal evaluation result = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_STOPPED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) try: # Dispatch STARTED-event out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_STARTED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) await self.send_cloudevent( self._config.dispatch_uri, out_cloudevent, token=self._config.token, cert=self._config.cert, ) # Submit all jobs to queue and inform queue when done for real in self.active_reals: self._job_queue.add_ee_stage( # type: ignore real.steps[0], callback_timeout=on_timeout) self._job_queue.submit_complete() # type: ignore # TODO: this is sort of a callback being preemptively called. # It should be lifted out of the queue/evaluate, into the evaluator. If # something is long running, the evaluator will know and should send # commands to the task in order to have it killed/retried. # See https://github.com/equinor/ert/issues/1229 queue_evaluators = None if (self._analysis_config.get_stop_long_running() and self._analysis_config.minimum_required_realizations > 0): queue_evaluators = [ partial( self._job_queue.stop_long_running_jobs, self._analysis_config. minimum_required_realizations, ) ] # Tell queue to pass info to the jobs-file # NOTE: This touches files on disk... self._job_queue.add_ensemble_evaluator_information_to_jobs_file( self._ee_id, self._config.dispatch_uri, self._config.cert, self._config.token, ) # Finally, run the queue-loop until it finishes or raises await self._job_queue.execute_queue_async( self._config.dispatch_uri, self._ee_id, threading.BoundedSemaphore( value=CONCURRENT_INTERNALIZATION), queue_evaluators, # type: ignore cert=self._config.cert, token=self._config.token, ) except asyncio.CancelledError: logger.debug("ensemble was cancelled") result = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_CANCELLED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) except Exception: # pylint: disable=broad-except logger.exception( "unexpected exception in ensemble", exc_info=True, ) result = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_FAILED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) else: logger.debug("ensemble finished normally") finally: await timeout_queue.put(None) # signal to exit timer await send_timeout_future # Dispatch final result from evaluator - FAILED, CANCEL or STOPPED assert self._config # mypy await self.send_cloudevent( self._config.dispatch_uri, result, token=self._config.token, cert=self._config.cert, )
def _evaluate(self): asyncio.set_event_loop(asyncio.new_event_loop()) dispatch_url = self._config.dispatch_uri try: out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_STARTED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) asyncio.get_event_loop().run_until_complete( self.send_cloudevent(dispatch_url, out_cloudevent)) self._job_queue = self._queue_config.create_job_queue() for real in self.get_active_reals(): self._job_queue.add_ee_stage(real.get_steps()[0]) self._job_queue.submit_complete() # TODO: this is sort of a callback being preemptively called. # It should be lifted out of the queue/evaluate, into the evaluator. If # something is long running, the evaluator will know and should send # commands to the task in order to have it killed/retried. # See https://github.com/equinor/ert/issues/1229 queue_evaluators = None if (self._analysis_config.get_stop_long_running() and self._analysis_config.minimum_required_realizations > 0): queue_evaluators = [ partial( self._job_queue.stop_long_running_jobs, self._analysis_config.minimum_required_realizations, ) ] self._job_queue.add_ensemble_evaluator_information_to_jobs_file( self._ee_id, dispatch_url) futures = [ self._job_queue.execute_queue_async( dispatch_url, threading.BoundedSemaphore( value=CONCURRENT_INTERNALIZATION), queue_evaluators, ) ] self._aggregate_future = asyncio.gather(*futures, return_exceptions=True) self._allow_cancel.set() try: asyncio.get_event_loop().run_until_complete( self._aggregate_future) except asyncio.CancelledError: logger.debug("cancelled aggregate future") else: out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_STOPPED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) asyncio.get_event_loop().run_until_complete( self.send_cloudevent(dispatch_url, out_cloudevent)) except Exception: logger.exception( "An exception occurred while starting the ensemble evaluation", exc_info=True, ) out_cloudevent = CloudEvent({ "type": identifiers.EVTYPE_ENSEMBLE_FAILED, "source": f"/ert/ee/{self._ee_id}/ensemble", "id": str(uuid.uuid1()), }) asyncio.get_event_loop().run_until_complete( self.send_cloudevent(dispatch_url, out_cloudevent))
return MagicMock(track=MagicMock(side_effect=_track)) return _mock_ee_monitor @pytest.mark.timeout(60) @pytest.mark.parametrize( "monitor_events,brm_mutations,expected_progress", [ pytest.param( [ CloudEvent( {"source": "/", "type": ids.EVTYPE_EE_SNAPSHOT}, data={ **(build_snapshot(["0", "1"]).to_dict()), "iter": 0, }, ), CloudEvent( {"source": "/", "type": ids.EVTYPE_EE_SNAPSHOT_UPDATE}, data={ **( build_partial(["0", "1"]) .update_step( "0", "0", Step(status=state.STEP_STATE_SUCCESS) ) .to_dict() ), "iter": 0, },