def _evaluate_model(self, model_inst: BaseModel, proposal: Proposal) -> TrialResult: val_dataset_path = self._monitor.val_dataset_path if not proposal.to_eval: return TrialResult(proposal) logger.info('Evaluating model...') score = model_inst.evaluate(val_dataset_path) logger.info(f'Score on validation dataset: {score}') return TrialResult(proposal, score=score)
def _evaluate_model(model_inst: BaseModel, proposal: Proposal, val_dataset_path: str) -> TrialResult: if not proposal.to_eval: return TrialResult(proposal) print('Evaluating model...') score = model_inst.evaluate(val_dataset_path) if not isinstance(score, float): raise Exception('`evaluate()` should return a float!') print('Score on validation dataset:', score) return TrialResult(proposal, score=score)
def take_result(self, worker_id) -> Union[TrialResult, None]: name = f'workers:{worker_id}:result' result = self._redis.get(name) if result is None: return None # Clear result from Redis self._redis.delete(name) logger.info(f'Retrieved result "{result}" for worker "{worker_id}"') return TrialResult.from_jsonable(result)
def _perform_trial(self, proposal: Proposal) -> TrialResult: self._trial_id = proposal.trial_id logger.info( f'Starting trial {self._trial_id} with proposal {proposal}...') try: # Setup logging logger_info = self._start_logging_to_trial( lambda log_line, log_lvl: self._monitor.log_to_trial( self._trial_id, log_line, log_lvl)) self._monitor.mark_trial_as_running(self._trial_id, proposal) shared_params = self._pull_shared_params(proposal) model_inst = self._load_model(proposal) self._train_model(model_inst, proposal, shared_params) result = self._evaluate_model(model_inst, proposal) store_params_id = self._save_model(model_inst, proposal, result) model_inst.destroy() self._monitor.mark_trial_as_completed(self._trial_id, result.score, store_params_id) self._trial_errors = 0 return result except Exception as e: logger.error('Error while running trial:') logger.error(traceback.format_exc()) self._monitor.mark_trial_as_errored(self._trial_id) # Ensure that trial doesn't error too many times consecutively self._trial_errors += 1 if self._trial_errors > MAX_CONSEC_TRIAL_ERRORS: logger.error( f'Reached {MAX_CONSEC_TRIAL_ERRORS} consecutive errors - raising exception' ) raise e return TrialResult(proposal) finally: self._stop_logging_to_trial(logger_info) # Untie from done trial self._trial_id = None
def create_result(self, worker_id: str, result: TrialResult): name = f'workers:{worker_id}:result' assert self._redis.get(name) is None logger.info(f'Creating result "{result}" for worker "{worker_id}"...') self._redis.set(name, result.to_jsonable())