def request_termination(self): logger = logging.getLogger("ert_shared.ensemble_evaluator.tracker") # There might be some situations where the # evaluation is finished or the evaluation # is yet to start when calling this function. # In these cases the monitor is not started # # To avoid waiting too long we exit if we are not # able to connect to the monitor after 2 tries # # See issue: https://github.com/equinor/ert/issues/1250 # try: wait_for_ws(self._monitor_url, 2) except ConnectionRefusedError as e: logger.warning(f"{__name__} - exception {e}") return with create_ee_monitor(self._monitor_host, self._monitor_port) as monitor: monitor.signal_cancel() while self._drainer_thread.is_alive(): self._clear_work_queue() time.sleep(1)
def request_termination(self): logger = logging.getLogger("ert_shared.ensemble_evaluator.tracker") config = load_config() # There might be some situations where the # evaulation is finished or the evaluation # is yet to start when calling this function. # In these cases the monitor is not started # # To avoid waiting too long we exit if we are not # able to connect to the monitor after 2 tries # # See issue: https://github.com/equinor/ert/issues/1250 # try: wait_for_ws(config.get("url"), 2) except ConnectionRefusedError as e: logger.warning(f"{__name__} - exception {e}") return monitor = create_ee_monitor(self._monitor_host, self._monitor_port) monitor.signal_cancel()
def track(self): wait_for_ws(self._base_uri) done_future = asyncio.Future(loop=self._loop) thread = threading.Thread(name=f"ert_monitor-{self._id}_loop", target=self._run, args=(done_future, )) thread.start() event = None try: while event is None or event[ "type"] != identifiers.EVTYPE_EE_TERMINATED: event = asyncio.run_coroutine_threadsafe( self._incoming.get(), self._loop).result() yield event self._loop.call_soon_threadsafe(done_future.set_result, None) except GeneratorExit: logger.debug(f"monitor-{self._id} generator exit") self._loop.call_soon_threadsafe(self._receive_future.cancel) if not done_future.done(): self._loop.call_soon_threadsafe(done_future.set_result, None) thread.join()
def _drain_monitor(self): drainer_logger = logging.getLogger( "ert_shared.ensemble_evaluator.drainer") monitor = create_ee_monitor(self._monitor_host, self._monitor_port) while monitor: try: for event in monitor.track(): if event["type"] == ids.EVTYPE_EE_SNAPSHOT: iter_ = event.data["metadata"]["iter"] with self._state_mutex: self._realization_progress[ iter_] = self._snapshot_to_realization_progress( event.data) self._work_queue.put(None) if event.data.get( "status") == _EVTYPE_SNAPSHOT_STOPPED: drainer_logger.debug( "observed evaluation stopped event, signal done" ) monitor.signal_done() elif event["type"] == ids.EVTYPE_EE_SNAPSHOT_UPDATE: with self._state_mutex: self._updates.append(event.data) self._work_queue.put(None) if event.data.get( "status") == _EVTYPE_SNAPSHOT_CANCELLED: drainer_logger.debug( "observed evaluation cancelled event, return" ) return if event.data.get( "status") == _EVTYPE_SNAPSHOT_STOPPED: drainer_logger.debug( "observed evaluation stopped event, signal done" ) monitor.signal_done() elif event["type"] == ids.EVTYPE_EE_TERMINATED: drainer_logger.debug("got terminator event") while True: if self._model.isFinished(): drainer_logger.debug( "observed that model was finished, waiting tasks completion..." ) self._work_queue.join() drainer_logger.debug("tasks complete") return try: time.sleep(5) drainer_logger.debug( "connecting to new monitor...") monitor = create_ee_monitor( self._monitor_host, self._monitor_port) wait_for_ws(monitor.get_base_uri(), max_retries=2) drainer_logger.debug("connected") break except ConnectionRefusedError as e: drainer_logger.debug( f"connection refused: {e}") pass except ConnectionRefusedError as e: if self._model.isFinished(): return else: raise e
def evaluate(self, config, ee_id): self._config = config self._ee_id = ee_id wait_for_ws(self._config.url) self._evaluate_thread = threading.Thread(target=self._evaluate) self._evaluate_thread.start()