def create_evaluate_task_by_train_job_id(train_job_id, evaluate_task_name, evaluate_task_desc, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule=0): """ 如果后面要加重新训练的逻辑,这部分要改,不能根据train_job_id去创建评估任务,而是根据train_task_id, 目前先保留,因为目前train_job和train_task是一一对应,不会有影响 """ # get correspondent train_job, doc_type, train_task, nlp_task by train_job_id train_job = TrainJobModel().get_by_id(train_job_id) doc_type = DocTypeModel().get_by_id(train_job.doc_type_id) doc_term_list = DocTermModel().get_by_filter(limit=99999, doc_type_id=doc_type.doc_type_id) doc_type.doc_term_list = doc_term_list nlp_task = NlpTaskEnum(doc_type.nlp_task_id) _, train_task_list = TrainTaskModel().get_by_filter(train_job_id=train_job_id) train_task = train_task_list[0] # create evaluate_task evaluate_task = EvaluateTaskModel().create(evaluate_task_name=evaluate_task_name, evaluate_task_desc=evaluate_task_desc, train_task_id=train_task.train_task_id, evaluate_task_status=int(StatusEnum.processing)) # bulk create evaluate m2m mark evaluate_m2m_mark_list = [{"evaluate_task_id": evaluate_task.evaluate_task_id, "mark_job_id": _id} for _id in mark_job_ids] EvaluateM2mMarkModel().bulk_create(evaluate_m2m_mark_list) # push to evaluate redis queue doc_term_ids = [str(t.doc_term_id) for t in RelationM2mTermModel().get_by_filter(limit=99999, doc_relation_ids=[int(rl) for rl in doc_relation_ids])] push_evaluate_task_to_redis(nlp_task, evaluate_task, train_task, doc_type, mark_job_ids, doc_term_ids, doc_relation_ids, use_rule) session.commit() return evaluate_task
def get_evaluate_task_list_by_train_job_id(train_job_id, order_by, order_by_desc, offset, limit): count, evaluate_task_list = EvaluateTaskModel().get_by_train_job_id(train_job_id=train_job_id, order_by=order_by, order_by_desc=order_by_desc, offset=offset, limit=limit) # assign train_job_id to evaluate_task for dumping for evaluate_task in evaluate_task_list: evaluate_task.train_job_id = train_job_id evaluate_task.mark_job_ids = [m2m.mark_job_id for m2m in EvaluateM2mMarkModel().get_by_filter(limit=99999, evaluate_task_id=evaluate_task.evaluate_task_id)] return count, evaluate_task_list
def get_doc_type_info_by_nlp_task_by_user(nlp_task_id, current_user: CurrentUser): """ 获取管理大厅首页的doc_type信息 """ result = [] # get doc_type list by user _, doc_type_list = DocTypeModel().get_by_nlp_task_id_by_user(nlp_task_id=nlp_task_id, current_user=current_user) for doc_type, terms in doc_type_list: doc_type.doc_terms = [int(t) for t in terms.split(",")] if terms is not None else [] doc_type_list = [d[0] for d in doc_type_list] doc_type_list = [{"doc_type": DocTypeSchema().dump(doc_type)} for doc_type in doc_type_list] # get all job count and approved job count all_status, all_marked_status = MarkTaskModel().count_status_by_user(nlp_task_id=nlp_task_id, current_user=current_user) # calculate marked mark_job count and all mark_job for each doc_type all_status_dict = Common().tuple_list2dict(all_status) all_marked_status_dict = Common().tuple_list2dict(all_marked_status) for doc_type in doc_type_list: doc_type_id = doc_type["doc_type"]["doc_type_id"] mark_job_count = len(all_status_dict.get(doc_type_id, {})) marked_mark_job_count = 0 for _mark_job_id, _count_sum in all_status_dict.get(doc_type_id, {}).items(): if _count_sum == all_marked_status_dict.get(doc_type_id, {}).get(_mark_job_id, 0): marked_mark_job_count += 1 doc_type.update(progress_state={"job_num": mark_job_count, "labeled_job_number": marked_mark_job_count, "progress_rate": round(marked_mark_job_count / mark_job_count, 2) if mark_job_count > 0 else 0}) # get latest evaluation result if exists latest_evaluate = EvaluateTaskModel().get_latest_evaluate_by_doc_type_id(nlp_task_id=nlp_task_id, doc_type_id=doc_type_id) if latest_evaluate: doc_type.update(evaluate=EvaluateTaskSchema().dump(latest_evaluate)) result.append(doc_type) return result
def delete_evaluate_task_by_id(evaluate_task_id): EvaluateTaskModel().delete(evaluate_task_id) session.commit()
def update_evaluate_task_by_id(evaluate_task_id, args): evaluate_task = EvaluateTaskModel().update(evaluate_task_id, **args) session.commit() return evaluate_task
def get_evaluate_task_by_id(evaluate_task_id): evaluate_task = EvaluateTaskModel().get_by_id(evaluate_task_id) evaluate_task.train_job_id = TrainTaskModel().get_by_id(evaluate_task.train_task_id).train_job_id return evaluate_task
def get_by_nlp_task_id(nlp_task_id, search, current_user: CurrentUser, order_by="created_time", order_by_desc=True, limit=10, offset=0, **kwargs): """ get (traintask, trainjob, doctype) tuple by nlp_task_id and other filters """ # Define allowed filter keys accept_keys = ["train_job_status", "doc_type_id"] # Compose query, select 3 tables related to a train job q = session.query(TrainTask, TrainJob, DocType) \ .outerjoin(TrainJob, TrainTask.train_job_id == TrainJob.train_job_id) \ .outerjoin(DocType, DocType.doc_type_id == TrainJob.doc_type_id) \ .filter(DocType.nlp_task_id == nlp_task_id, ~DocType.is_deleted, ~TrainJob.is_deleted, ~TrainTask.is_deleted) # auth if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) # Filter conditions for key, val in kwargs.items(): if key in accept_keys: q = q.filter(getattr(TrainJob, key) == val) if search: q = q.filter(TrainJob.train_job_name.like(f'%{search}%')) # Order by key if order_by_desc: q = q.order_by(getattr(TrainJob, order_by).desc()) else: q = q.order_by(getattr(TrainJob, order_by)) train_job_list = [] job_id_list = [] for train_task, train_job, doc_type in q.all(): train_task.mark_job_ids = [ m2m.mark_job_id for m2m in TrainM2mMarkbModel().get_by_filter( limit=99999, train_job_id=train_task.train_job_id) ] # assign train_task, doc_type to train_job if train_task.train_job_id not in job_id_list: job_id_list.append(train_task.train_job_id) train_job.doc_type = doc_type _, model_evaluate_list = EvaluateTaskModel( ).get_by_train_job_id(train_job_id=train_job.train_job_id, evaluate_task_status=int( StatusEnum.success)) if model_evaluate_list: train_job.model_evaluate = model_evaluate_list[0] train_job.model_version = train_task.model_version train_job.train_list = [train_task] train_job_list.append(train_job) else: train_job_list[job_id_list.index( train_task.train_job_id)].train_list.append(train_task) count = len(train_job_list) return count, train_job_list[offset:offset + limit]