def update_status_to_unlabel_by_mark_task_id(mark_task_id): session.query(MarkTask).filter( MarkTask.mark_task_id == mark_task_id, ~MarkTask.is_deleted).update( {MarkTask.mark_task_status: int(StatusEnum.unlabel)}, synchronize_session='fetch') session.flush()
def delete_relation_mapping(doc_relation_id): # 删除关系表 session.query(RelationM2mTerm).filter( ~RelationM2mTerm.is_deleted, RelationM2mTerm.doc_relation_id == doc_relation_id, ).update({RelationM2mTerm.is_deleted: 1}, synchronize_session='fetch') session.commit()
def get_doc_and_lable(task_id): item, doc_type_id, doc = session.query(MarkTask, MarkJob.doc_type_id, Doc) \ .join(MarkJob, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .join(Doc, Doc.doc_id == MarkTask.doc_id) \ .filter(MarkTask.mark_task_id == task_id, ~MarkTask.is_deleted).one() if item.mark_task_status not in (int(StatusEnum.success), int(StatusEnum.approved)): abort(400, message="审核未完成,不能进行导出操作") doc_terms = session.query(DocTerm).filter( DocTerm.doc_type_id == doc_type_id, ~DocTerm.is_deleted) term_color_mapping = { doc_term.doc_term_id: doc_term.doc_term_color for doc_term in doc_terms } term_name_mapping = { doc_term.doc_term_id: doc_term.doc_term_name for doc_term in doc_terms } labels = [{ "index": item["index"], "color": term_color_mapping.get(item['doc_term_id'], '#ddd'), "word": item['value'], "annotation": term_name_mapping.get(item['doc_term_id'], '') } for item in item.mark_task_result] return doc.doc_unique_name, doc.doc_raw_name, labels
def get_classify_doc_rule(doc_type_id, offset, limit): doc_terms = session.query(DocTerm.doc_term_id).filter( DocTerm.doc_type_id == doc_type_id, ~DocTerm.is_deleted).all() q = session.query(ClassifyDocRule).filter( ClassifyDocRule.doc_term_id.in_([dt[0] for dt in doc_terms]), ~ClassifyDocRule.is_deleted) count = q.count() items = q.offset(offset).limit(limit).all() return items, count
def check_user_task_and_update_mark_task(self, user_task_id): mark_task, nlp_task_id, reviewer_ids = session.query(MarkTask, DocType.nlp_task_id, MarkJob.reviewer_ids) \ .join(MarkJob, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .join(DocType, DocType.doc_type_id == MarkJob.doc_type_id) \ .join(UserTask, UserTask.mark_task_id == MarkTask.mark_task_id) \ .filter( UserTask.user_task_id == user_task_id, ~MarkTask.is_deleted, ~UserTask.is_deleted, ).one() user_tasks = session.query(UserTask) \ .join(MarkTask, MarkTask.mark_task_id == UserTask.mark_task_id) \ .join(MarkJob, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .filter( MarkTask.mark_task_id == mark_task.mark_task_id, ~MarkTask.is_deleted, ~UserTask.is_deleted ).all() is_labeled = all([ user_task.user_task_status == int(StatusEnum.labeled) for user_task in user_tasks ]) if is_labeled: self.update(mark_task.mark_task_id, mark_task_status=int(StatusEnum.reviewing)) # mark_task.update(mark_task_status=int(StatusEnum.reviewing)) # 为了支持单人标注无审核员情况下,标注完成直接通过审核的情况 if len(user_tasks) == 1 and not reviewer_ids: self.update( mark_task.mark_task_id, **{ 'mark_task_status': int(StatusEnum.approved), 'mark_task_result': user_tasks[0].user_task_result }) # 为了支持分类多人共同标注所有人结果相同时,直接通过审核 if len(user_tasks) > 1 and nlp_task_id == int( NlpTaskEnum.classify) and all([ json.dumps(user_task.user_task_result) == json.dumps( user_tasks[0].user_task_result) for user_task in user_tasks ]): mark_task_result = [ mark for mark in user_tasks[0].user_task_result if mark['marked'] ] self.update( mark_task.mark_task_id, **{ 'mark_task_status': int(StatusEnum.approved), 'mark_task_result': mark_task_result })
def get_by_nlp_task_id( self, nlp_task_id, search, order_by="created_time", order_by_desc=True, limit=10, offset=0, user_role=None, **kwargs): # Define allowed filter keys accept_keys = ["assign_mode", "mark_job_status", "mark_job_type", "doc_type_id"] # Compose query q = session.query(MarkJob, DocType).join( DocType, MarkJob.doc_type_id == DocType.doc_type_id ).filter(DocType.nlp_task_id == nlp_task_id, ~DocType.is_deleted, ~MarkJob.is_deleted) # Role if user_role == "管理员": q = q.filter(DocType.group_id.in_(g.user_groups)) elif user_role == "审核员": q = q.filter(func.json_contains(MarkJob.reviewer_ids, str(g.user_id))) elif user_role == "标注员": q = q.filter(func.json_contains(MarkJob.annotator_ids, str(g.user_id))) # Filter conditions for key, val in kwargs.items(): if key in accept_keys and val is not None: q = q.filter(getattr(MarkJob, key) == val) if search: q = q.filter(MarkJob.mark_job_name.like(f'%{search}%')) count = q.count() # Order by key q = q.order_by(text(f"{'-' if order_by_desc else ''}mark_job.{order_by}")) q = q.offset(offset).limit(limit) return count, q.all()
def get_by_filter(self, search="", order_by="created_time", order_by_desc=True, limit=10, offset=0, **kwargs): # Define allowed filter keys accept_keys = ["predict_task_status", "predict_job_id"] # Compose query q = session.query(PredictTask).filter(~PredictTask.is_deleted) # Filter conditions for key, val in kwargs.items(): if key in accept_keys: q = q.filter(getattr(PredictTask, key) == val) if search: q = q.filter(PredictTask.predict_task_name.like(f'%{search}%')) count = q.count() # Order by key, Descending or ascending order if order_by_desc: q = q.order_by(getattr(PredictTask, order_by).desc()) else: q = q.order_by(getattr(PredictTask, order_by)) q = q.offset(offset).limit(limit) return count, q.all()
def get_relation_with_terms(order_by="created_time", order_by_desc=True, limit=10, offset=0, require_count=False, **kwargs): # Define allowed filter keys accept_keys = ["doc_relation_name", "doc_type_id"] # Compose query q = session.query(DocRelation.doc_relation_id, DocRelation.doc_relation_name, DocRelation.created_time, func.group_concat(RelationM2mTerm.doc_term_id.distinct()))\ .join(RelationM2mTerm, RelationM2mTerm.doc_relation_id == DocRelation.doc_relation_id)\ .filter(~DocRelation.is_deleted, ~RelationM2mTerm.is_deleted)\ # Filter conditions for key, val in kwargs.items(): if key == "doc_relation_ids" and len(val) > 0: q = q.filter(DocRelation.doc_relation_id.in_(val)) elif key in accept_keys: q = q.filter(getattr(DocRelation, key) == val) q = q.group_by(RelationM2mTerm.doc_relation_id, DocRelation.doc_relation_name, DocRelation.created_time) count = 0 if require_count: count = q.count() # Order by key if order_by == "order_by" and order_by_desc: q = q.order_by(DocRelation.created_time.desc()) q = q.offset(offset).limit(limit) return q.all(), count
def get_by_filter(self, order_by="created_time", order_by_desc=True, limit=10, offset=0, require_count=False, **kwargs): # Define allowed filter keys accept_keys = ["doc_relation_ids", "doc_relation_name", "doc_type_id"] # Compose query q = session.query(DocRelation).filter(~DocRelation.is_deleted) # Filter conditions for key, val in kwargs.items(): if key == "doc_relation_ids": q = q.filter(DocRelation.doc_relation_id.in_(val)) elif key in accept_keys: q = q.filter(getattr(DocRelation, key) == val) count = 0 if require_count: count = q.count() # Order by key if order_by_desc: q = q.order_by(getattr(DocRelation, order_by).desc()) else: q = q.order_by(getattr(DocRelation, order_by)) q = q.offset(offset).limit(limit) return q.all(), count
def get_by_filter(self, order_by="created_time", order_by_desc=True, limit=10, offset=0, **kwargs) -> [DocType]: # Define allowed filter keys accept_keys = [ "user_groups", "doc_type_name", "nlp_task_id", "doc_type_id", "group_id" ] # Compose query q = session.query(DocType).filter(~DocType.is_deleted) # Filter conditions for key, val in kwargs.items(): if key == "user_groups": q = q.filter(DocType.group_id.in_(val)) elif key in accept_keys: q = q.filter(getattr(DocType, key) == val) # Order by key if order_by_desc: q = q.order_by(getattr(DocType, order_by).desc()) else: q = q.order_by(getattr(DocType, order_by)) q = q.offset(offset).limit(limit) return q.all()
def get_by_mark_job_ids(mark_job_ids, nlp_task_id, current_user: CurrentUser, limit=10, offset=0) -> (int, List): q = session.query(DocType)\ .outerjoin(MarkJob, MarkJob.doc_type_id == DocType.doc_type_id)\ .filter(DocType.nlp_task_id == nlp_task_id, ~DocType.is_deleted, or_(~MarkJob.is_deleted, MarkJob.is_deleted.is_(None))) # 权限filter if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) elif current_user.user_role in [ RoleEnum.reviewer.value, RoleEnum.annotator.value ]: q = q.filter( or_( func.json_contains(MarkJob.annotator_ids, str(current_user.user_id)), func.json_contains(MarkJob.reviewer_ids, str(current_user.user_id)))) if mark_job_ids: q = q.filter(MarkJob.mark_job_id.in_(mark_job_ids)) count = q.count() items = q.offset(offset).limit(limit).all() return count, items
def count_mark_job_by_nlp_task_manager(user_id): all_count = session.query(DocType.nlp_task_id, func.count(MarkJob.mark_job_id)) \ .join(DocType, MarkJob.doc_type_id == DocType.doc_type_id) \ .filter(~MarkJob.is_deleted, ~DocType.is_deleted, DocType.created_by == user_id) \ .group_by(DocType.nlp_task_id).all() labeled_count = session.query(DocType.nlp_task_id, func.count(MarkJob.mark_job_id)) \ .join(DocType, MarkJob.doc_type_id == DocType.doc_type_id) \ .filter(~MarkJob.is_deleted, ~DocType.is_deleted, DocType.created_by == user_id, MarkJob.mark_job_status.in_([StatusEnum.labeled, StatusEnum.reviewing, StatusEnum.approved])) \ .group_by(DocType.nlp_task_id).all() reviewed_count = session.query(DocType.nlp_task_id, func.count(MarkJob.mark_job_id)) \ .join(DocType, MarkJob.doc_type_id == DocType.doc_type_id) \ .filter(~MarkJob.is_deleted, ~DocType.is_deleted, DocType.created_by == user_id, MarkJob.mark_job_status.in_([StatusEnum.approved])) \ .group_by(DocType.nlp_task_id).all() return all_count, labeled_count, reviewed_count
def get_rule_with_term(doc_type_id): return session.query(ClassifyDocRule, DocTerm).join( DocTerm, DocTerm.doc_term_id == ClassifyDocRule.doc_term_id).join( DocType, DocType.doc_type_id == DocTerm.doc_type_id).filter( DocType.doc_type_id == doc_type_id, ~DocType.is_deleted, ~DocTerm.is_deleted, ~ClassifyDocRule.is_deleted, ClassifyDocRule.is_active).all()
def get_online_model_info_by_doc_type_id(doc_type_id, current_user: CurrentUser, order_by="created_time", order_by_desc=True): # Compose query q = session.query(TrainTask, EvaluateTask, TrainJob, DocType) \ .join(EvaluateTask, EvaluateTask.train_task_id == TrainTask.train_task_id) \ .join(TrainJob, TrainTask.train_job_id == TrainJob.train_job_id) \ .join(DocType, DocType.doc_type_id == TrainJob.doc_type_id) \ .filter(DocType.doc_type_id == doc_type_id, TrainTask.train_status == int(StatusEnum.online), EvaluateTask.evaluate_task_status == int(StatusEnum.success), ~DocType.is_deleted, ~TrainJob.is_deleted, ~TrainTask.is_deleted, ~EvaluateTask.is_deleted) # auth if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) # Order by key if order_by_desc: q = q.order_by(getattr(TrainJob, order_by).desc()) else: q = q.order_by(getattr(TrainJob, order_by)) return q.first()
def count_status_by_user(nlp_task_id, current_user: CurrentUser): # compose query q = session.query(DocType) \ .join(MarkJob, DocType.doc_type_id == MarkJob.doc_type_id) \ .join(MarkTask, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .filter(DocType.nlp_task_id == nlp_task_id, ~DocType.is_deleted, ~MarkJob.is_deleted, ~MarkTask.is_deleted) # filter by user if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) elif current_user.user_role in [RoleEnum.reviewer.value]: q = q.filter( func.json_contains(MarkJob.reviewer_ids, str(current_user.user_id))) elif current_user.user_role in [RoleEnum.annotator.value]: q = q.filter( func.json_contains(MarkJob.annotator_ids, str(current_user.user_id))) # get grouped (doc_type_id, mark_job_id, count) list all_status = q.group_by(MarkJob.doc_type_id, MarkJob.mark_job_id) \ .with_entities(MarkJob.doc_type_id, MarkJob.mark_job_id, func.count(MarkTask.mark_task_id)).all() # filter >= labeled status q = q.filter(MarkTask.mark_task_status >= int(StatusEnum.labeled)) # get grouped (doc_type_id, mark_job_id, >= labeled count) list all_finish_marking_status = q.group_by(MarkJob.doc_type_id, MarkJob.mark_job_id) \ .with_entities(MarkJob.doc_type_id, MarkJob.mark_job_id, func.count(MarkTask.mark_task_id)).all() return all_status, all_finish_marking_status
def update_by_annotator_id(self, current_user, _id, **kwargs) -> UserTask: entity = session.query(UserTask).filter(UserTask.user_task_id == _id) if current_user.user_role in [RoleEnum.annotator.value]: entity = entity.filter( UserTask.annotator_id == current_user.user_id) entity.update(kwargs) session.flush() return entity.one()
def count_mark_task_status(mark_job_ids) -> [Tuple[int]]: all_count = session.query( MarkTask.mark_job_id, MarkTask.mark_task_status, func.count(MarkTask.mark_task_status)) \ .join(MarkJob, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .filter(MarkJob.mark_job_id.in_(mark_job_ids), ~MarkTask.is_deleted, ~MarkJob.is_deleted) \ .group_by(MarkTask.mark_task_status, MarkTask.mark_job_id).all() return all_count
def count_doc_type_by_nlp_task(current_user: CurrentUser) -> [(int, int)]: q = session.query(DocType.nlp_task_id, func.count(DocType.doc_type_id)) \ .filter(~DocType.is_deleted) if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) elif current_user.user_role in [ RoleEnum.reviewer.value, RoleEnum.annotator.value ]: # Reviewer and annotator joins mark_job to filter visible doc types q = session.query(DocType.nlp_task_id, func.count(DocType.doc_type_id)) \ .join(MarkJob, MarkJob.doc_type_id == DocType.doc_type_id) \ .filter(~DocType.is_deleted, ~MarkJob.is_deleted, or_(func.json_contains(MarkJob.annotator_ids, str(current_user.user_id)), func.json_contains(MarkJob.annotator_ids, str(current_user.user_id)))) count = q.group_by(DocType.nlp_task_id).all() return count
def get_doc_term_alias_mapping(doc_type_id) -> typing.Dict: q = session.query(DocTerm) \ .filter(DocTerm.doc_type_id == doc_type_id) \ .with_entities(DocTerm.doc_term_id, DocTerm.doc_term_alias) alias_id_mapping = { alias: term_id for term_id, alias in q.all() if len(alias) > 0 } return alias_id_mapping
def get_mark_task_with_doc_and_user_task_list_by_id(task_id): mark_task, doc, doc_type = session.query(MarkTask, Doc, DocType) \ .join(Doc, Doc.doc_id == MarkTask.doc_id) \ .join(MarkJob, MarkJob.mark_job_id == MarkTask.mark_job_id) \ .join(DocType, DocType.doc_type_id == MarkJob.doc_type_id) \ .filter( MarkTask.mark_task_id == task_id, ~MarkTask.is_deleted, ~Doc.is_deleted ).one() mark_task.doc = doc mark_task.doc_type = doc_type UserTaskPlaceholder = UserTask(annotator_id=0, is_deleted=False, user_task_status=StatusEnum.labeled.value, \ user_task_result=mark_task.mark_task_result) mark_task.user_task_list = session.query(UserTask).filter( UserTask.mark_task_id == task_id, ~UserTask.is_deleted).all() or [UserTaskPlaceholder] return mark_task
def get_relation_term_mapping(doc_type_id): q = session.query(RelationM2mTerm.doc_relation_id, func.group_concat(RelationM2mTerm.doc_term_id.distinct()))\ .join(DocTerm, DocTerm.doc_term_id, RelationM2mTerm.doc_relation_id)\ .join(DocRelation, DocRelation.doc_relation_id, RelationM2mTerm.doc_relation_id)\ .filter(~DocTerm.is_deleted, ~DocRelation.is_deleted, ~RelationM2mTerm.is_deleted, DocTerm.doc_type_id == doc_type_id)\ .group_by(RelationM2mTerm.doc_relation_id) return q.all()
def update_status_to_unlabel_by_mark_task_id(mark_task_id): q = session.query(UserTask).filter( UserTask.mark_task_id == mark_task_id, ~UserTask.is_deleted) if q.all(): q.update({UserTask.user_task_status: int(StatusEnum.unlabel)}, synchronize_session='fetch') else: abort(400, message="无法驳回无标注员任务") session.flush()
def count_train_job_by_nlp_task(current_user: CurrentUser): q = session.query(DocType.nlp_task_id, func.count(TrainJob.train_job_id)) \ .join(DocType, TrainJob.doc_type_id == DocType.doc_type_id) \ .filter(~TrainJob.is_deleted, ~DocType.is_deleted) if current_user.user_role in [ RoleEnum.manager.value, RoleEnum.guest.value ]: q = q.filter(DocType.group_id.in_(current_user.user_groups)) count = q.group_by(DocType.nlp_task_id).all() return count
def get_by_doc_type_id(self, doc_type_id, **kwargs): accept_keys = ["train_status"] q = session.query(TrainTask)\ .join(TrainJob, TrainJob.train_job_id == TrainTask.train_job_id)\ .filter(TrainJob.doc_type_id == doc_type_id, ~TrainJob.is_deleted, ~TrainTask.is_deleted) # Filter conditions for key, val in kwargs.items(): if key in accept_keys: q = q.filter(getattr(TrainTask, key) == val) return q.all()
def get_export_history(current_user, offset, limit): q = session.query(ExportJob.export_job_id, ExportJob.created_time, ExportJob.export_file_path, DocType.nlp_task_id, ExportJob.doc_type_id, ExportJob.export_job_status, DocType.doc_type_name, ExportJob.export_mark_job_ids) \ .outerjoin(DocType, ExportJob.doc_type_id == DocType.doc_type_id) \ .filter(ExportJob.created_by == current_user.user_id, ~ExportJob.is_deleted, ~DocType.is_deleted) count = q.count() q = q.order_by(ExportJob.export_job_id.desc()) q = q.offset(offset).limit(limit) return q.all(), count
def get_doc_term_by_doctype(doc_type_id, offset=0, limit=10, doc_term_ids=None): q = session.query(DocTerm).join(DocType, DocType.doc_type_id == DocTerm.doc_type_id).\ filter(DocTerm.doc_type_id == doc_type_id, ~DocTerm.is_deleted, ~DocType.is_deleted) if doc_term_ids: q = q.filter(DocTerm.doc_term_id.in_(doc_term_ids)) count = q.count() items = q.offset(offset).limit(limit).all() return items, count
def _get_user_task_map(mark_task_ids, select_keys): # tuple): """select_keys必须包含mark_task_id""" user_tasks = session.query(select_keys).filter( UserTask.mark_task_id.in_(mark_task_ids)).all() user_task_map = {} for user_task in user_tasks: mark_task_id_key = str(user_task.mark_task_id) if user_task_map.get(mark_task_id_key): user_task_map[mark_task_id_key].append(user_task) else: user_task_map[mark_task_id_key] = [user_task] return user_task_map
def get_online_ids_by_ids(self, doc_type_ids) -> Set[int]: """获取拥有上线模型的doctype ids""" online_doc_types = session.query(DocType.doc_type_id).join( TrainJob, DocType.doc_type_id == TrainJob.doc_type_id).join( TrainTask, TrainTask.train_job_id == TrainJob.train_job_id).filter( TrainTask.train_status == int(StatusEnum.online), TrainJob.doc_type_id.in_(doc_type_ids), ~TrainJob.is_deleted, ~TrainTask.is_deleted).all() online_doc_type_ids = set(item.doc_type_id for item in online_doc_types) return online_doc_type_ids
def get_predict_task_and_doc(**kwargs): accept_keys = ["predict_task_id", "predict_job_id"] q = session.query(PredictTask, Doc) \ .join(Doc, Doc.doc_id == PredictTask.doc_id) \ .filter( ~Doc.is_deleted, ~PredictTask.is_deleted, ) for key, val in kwargs.items(): if key in accept_keys: q = q.filter(getattr(PredictTask, key) == val) return q.all()
def update(self, doc_rule_id, **kwargs): accept_keys = ["rule_content", "state"] classify_rule = session.query(ClassifyDocRule).filter( ClassifyDocRule.classify_rule_id == doc_rule_id).one() for key, val in kwargs.items(): if key == "state": classify_rule.is_deleted = val elif key in accept_keys: setattr(classify_rule, key, val) session.commit() return classify_rule