def _checkout_response_json(response): # print("Response: ") # print(response.text) response_dict = util.loads(response.text) code = response_dict["code"] if code != 0: raise Exception(f"Update failed, {response.text}") return response_dict['data']
def get(self, dataset_name, model_name, batch_predict_job_name, *args, **kwargs): # 1. query all the message of request_id todo move to service with db.open_session() as s: messages = s.query(MessageEntity).filter(MessageEntity.author == batch_predict_job_name).order_by(MessageEntity.create_datetime.asc()).all() messages_dict_list = [] for m in messages: messages_dict_list.append(util.loads(m.content)) # 2. response response = \ { "batch_predict_job_name": batch_predict_job_name, "steps": messages_dict_list } self.response_json(response)
def delete(self, dataset_name, *args, **kwargs): # 1. validate param if dataset_name is None: raise IllegalParamException("dataset_name", None, "not empty") path_dataset = P.join(consts.PATH_DATASET, dataset_name) if not P.exists(path_dataset): raise EntityNotExistsException( EntityNotExistsException.Entities.Dataset, dataset_name) meta_file = P.join(path_dataset, 'meta.json') if not P.exists(meta_file): raise ValueError(f"Dataset={dataset_name} is broken.") with open(meta_file, 'r') as f: meta_dict = util.loads(f.read()) self.response_json(meta_dict)
def get(self, dataset_name, analyze_job_name, *args, **kwargs): # 1. validate param if analyze_job_name is None: raise IllegalParamException("analyze_job_name", None, "not empty") # 2. query all the message of request_id todo move to service with db.open_session() as s: messages = s.query(MessageEntity).filter( MessageEntity.author == analyze_job_name).order_by( MessageEntity.create_datetime.asc()).all() messages_dict_list = [] for m in messages: messages_dict_list.append(util.loads(m.content)) # 3. response response = \ { "analyze_job_name": analyze_job_name, "steps": messages_dict_list } self.response_json(response)
def add_predict_process_step(self, model_name: str, job_name: str, step: JobStep): step_type = step.type with db.open_session() as s: # 1. check temporary model exists model = self.model_dao.require_by_name(s, model_name) # 2. check event type, one type one record messages = s.query(MessageEntity).filter( MessageEntity.author == job_name).all() for m in messages: if step_type == util.loads(m.content).get('type'): raise Exception( f"Event type = {step_type} already exists .") # 3. create a new message content = util.dumps(step.to_dict()) message = MessageEntity(id=util.short_uuid(), author=job_name, content=content, create_datetime=util.get_now_datetime()) s.add(message)
def add_train_process_step(self, train_job_name, req_dict): # [1]. read & check params step_type = util.require_in_dict(req_dict, 'type', str) step_status = util.require_in_dict(req_dict, 'status', str) step_extension = util.get_from_dict(req_dict, 'extension', dict) if step_type not in [TrainStep.Types.Load, TrainStep.Types.Optimize, TrainStep.Types.OptimizeStart, TrainStep.Types.Persist, TrainStep.Types.Evaluate, TrainStep.Types.FinalTrain, TrainStep.Types.Searched]: raise ValueError(f"Unknown step type = {step_type}") if step_status not in [JobStep.Status.Succeed, JobStep.Status.Failed]: raise ValueError(f"Unknown status = {step_status}") # [2]. save message with db.open_session() as s: # [2.1]. check temporary model exists model = self.model_dao.find_by_train_job_name(s, train_job_name) model_name = model.name # [2.2]. check event type, one type one record messages = s.query(MessageEntity).filter(MessageEntity.author == train_job_name).all() for m in messages: if step_type == util.loads(m.content).get('type'): if step_type not in [TrainStep.Types.OptimizeStart, TrainStep.Types.Optimize]: raise Exception(f"Event type = {step_type} already exists .") # [2.3]. create a new message content = util.dumps(req_dict) message = MessageEntity(id=util.short_uuid(), author=train_job_name, content=content, create_datetime=util.get_now_datetime()) s.add(message) # [2.4]. handle analyze event current_progress = model.progress # todo check in code body self._check_progress_change(step_type, current_progress) # add failed status if step_type == TrainStep.Types.Evaluate: if step_status == JobStep.Status.Succeed: self._update_model(s, model_name, step_type, {"performance": step_extension['performance']}) else: self._update_model(s, model_name, step_type, {"status": ModelStatusType.Failed, "finish_datetime": util.get_now_datetime()}) elif step_type == TrainStep.Types.Load: if step_status == JobStep.Status.Succeed: self._update_model(s, model_name, step_type, {"status": ModelStatusType.Running}) else: self._update_model(s, model_name, step_type, {"status": ModelStatusType.Failed, "finish_datetime": util.get_now_datetime()}) elif step_type == TrainStep.Types.OptimizeStart: pass # train_trail_no = step_extension.get('trail_no') # if train_trail_no is None or not isinstance(train_trail_no, int): # raise ValueError(f"Param trail_no can not be None and should be int but is : {train_trail_no}") # # upload trail number # self._update_model(s, model_name, step_type, {"train_trail_no": train_trail_no}) elif step_type == TrainStep.Types.Optimize: train_trail_no = step_extension.get('trail_no') # update trails # load current trail and append new trails = model.trails if model.trails is None: trails = [] trails.append(step_extension) self._update_model(s, model_name, step_type, {"train_trail_no": train_trail_no, "score": step_extension.get('reward'), "trails": trails}) elif step_type == TrainStep.Types.Persist: model_file_size = step_extension['model_file_size'] self._update_model(s, model_name, step_type, {"model_file_size": model_file_size, "status": ModelStatusType.Succeed, "finish_datetime": util.get_now_datetime()}) else: self._update_model(s, model_name, step_type, {})
def add_analyze_process_step(self, dataset_name, analyze_job_name, step: JobStep): step_type = step.type with db.open_session() as s: # 1.1. check dataset exists d = s.query(DatasetEntity).filter( DatasetEntity.name == dataset_name).first() if d is None: raise EntityNotExistsException(DatasetEntity, dataset_name) # 1.2. check event type, one type one record messages = s.query(MessageEntity).filter( MessageEntity.author == analyze_job_name).all() for m in messages: if step_type == util.loads(m.content).get('type'): raise Exception( f"Event type = {step_type} already exists .") # 2. handle event with db.open_session() as s: # 2.1. create a new message content = util.dumps(step.to_dict()) message = MessageEntity(id=util.short_uuid(), author=analyze_job_name, content=content, create_datetime=util.get_now_datetime()) s.add(message) # 2.2. handle analyze event if step_type == AnalyzeStep.Types.Analyzed: # update temporary dataset # todo handle failed analyze if step.status == JobStep.Status.Succeed: hints = step.extension.pop("hints") d_stats = DatasetStats.load_dict(step.extension) features_str = [f.to_dict() for f in d_stats.features] update_fields = \ { "has_header": d_stats.has_header, "extension": step.extension, "n_cols": d_stats.n_cols, "n_rows": d_stats.n_rows, "features": features_str, "hints": hints, "feature_summary": d_stats.feature_summary.to_dict(), "status": DatasetEntity.Status.Analyzed } else: update_fields = {"status": DatasetEntity.Status.Failed} self.dataset_dao.update_by_name(s, dataset_name, update_fields) elif step_type == AnalyzeStep.Types.PatchCorrelation: # 1. check dataset status, only analyzed can calc relativity dataset = self.dataset_dao.require_by_name(s, dataset_name) if dataset.status != AnalyzeStep.Types.Analyzed: raise ValueError( f"Dataset {dataset_name} status is not {AnalyzeStep.Types.Analyzed} ." ) request_label_col = step.extension.get("label_col") if request_label_col != dataset.label_col: raise ValueError( f"Dataset {dataset_name} label col is {dataset.label_col} but received result is for {request_label_col}" ) # 2. read extension corr_dict = step.extension.get('corr') # 3. load & update features features = dataset.to_dataset_stats().features for f in features: correlation = corr_dict.get(f.name) f.correlation = FeatureCorrelation( value=correlation, status=FeatureCorrelation.calc_status( correlation, request_label_col == f.name)) # 4. sort features by abs correlation features = sorted(features, key=lambda f: abs(f.correlation.value), reverse=True) feature_dict_list = [] for f in features: feature_dict_list.append(f.to_dict()) # 5. push back database self.dataset_dao.update_by_name( s, dataset_name, {"features": feature_dict_list})
def assert_response_and_get(self, response): self.assertEqual(response.code, 200) response_body = util.loads(response.body) assert response_body['code'] == 0 return response_body['data']