def get_detail_by_id(task_id: int): sess = SessionLocal() task = sess.query(Task_.id, Task_.name, Task_.desc, Task_.create_time).filter(Task_.id == task_id).first() if not task: sess.close() return None ls_list = sess.query(LabelSys_.id, LabelSys_.name, LabelSys_.multi).join(TaskRecords_)\ .filter(TaskRecords_.task_id == task_id).all() print(ls_list) label_sys_list = [] label_list = [] for ls in ls_list: label_sys_list.append({ "id": ls.id, "name": ls.name, "multi": ls.multi }) l_list = sess.query(Label_.id, Label_.name, Label_.desc).join(LabelSys_).filter( Label_.label_sys_id == ls.id).all() label_list.append([{ "id": l.id, "name": l.name, "desc": l.desc } for l in l_list]) res = { "name": task.name, "desc": task.desc, "create_time": task.create_time, "label_sys_list": label_sys_list, "label_list": label_list } sess.close() return res
def add_label_sys(label_sys: LabelSys, admin_id: int): """ 添加新的分类体系. label_sys的json格式如下: { 'name':str, 'desc':str, 'multi':int, 'labels':[{'name':str,'desc':str,'keywords':str}, {'name':str,'desc':str,'keywords':str},...] } """ if check_label_sys_name(label_sys.name): return None sess = SessionLocal() label_sys_id = int(my_snow.get_id()) db_label_sys = LabelSys_(id=label_sys_id, name=label_sys.name, desc=label_sys.desc, multi=label_sys.multi, admin_id=admin_id) # (当前默认admin_id=1) sess.add(db_label_sys) sess.commit() sess.refresh(db_label_sys) label_id_list = [] for label in label_sys.labels: label_id = int(my_snow.get_id()) label_id_list.append(label_id) db_label = Label_(id=label_id, name=label.name, desc=label.desc, keywords=label.keywords, label_sys_id=label_sys_id) sess.add(db_label) sess.commit() sess.refresh(db_label) res = {'label_sys_id': label_sys_id, 'label_id_list': label_id_list} sess.close() return res
def add_task_info(task: Task, task_id, admin_id): """ 添加新的打标任务的基础信息. (当前默认admin_id=1; 暂不考虑state,doc_type信息) 从前端接收json信息(暂定) json格式如下: { 'name':str, 'desc':str, 'label_sys_ids':[str] } """ sess = SessionLocal() # 插入task表: current_time_str = datetime.fromtimestamp(int(time.time())) db_task = Task_(id=task_id, name=task.name, desc=task.desc, admin_id=admin_id, create_time=current_time_str) # ?? sess.add(db_task) sess.commit() # 插入task_records表 for label_sys_id in task.label_sys_ids: db_task_record = TaskRecords_(task_id=int(task_id), label_sys_id=label_sys_id) sess.add(db_task_record) sess.commit() sess.close() return {'task_id': task_id}
def upload_to_db(df, task_id): """ df: 通过检查的dataframe task_id: task_id """ # 开始往数据库插入: sess = SessionLocal() num_uploaded_docs = len(df) num_success_docs = 0 for item in df.iterrows(): try: title = item[1]['title'] content = item[1]['content'] doc_id = int(my_snow.get_id()) db_doc = Document_(id=doc_id, task_id=task_id, title=title, content=content, state=0) #初次上传,state都为0 sess.add(db_doc) # sess.refresh(db_doc) num_success_docs += 1 except: sess.rollback() # 报错的话需要通过rollback来撤销当前session的操作 print(traceback.format_exc()) sess.commit() sess.close() return { "num_uploaded_docs": num_uploaded_docs, "num_success_docs": num_success_docs }
def label_counts(label_sys_id, task_id_list): """ 目前只统计了单标签的数量 """ sess = SessionLocal() if not task_id_list: res = sess.query(Document_.id, Label_.name).filter(Document_.state == 1).join(TaggingRecords_).join(Label_)\ .filter(Label_.label_sys_id == label_sys_id).all() else: res = sess.query(Document_.id, Label_.name).filter(Document_.state == 1).join(TaggingRecords_).join(Label_)\ .filter(Label_.label_sys_id == label_sys_id).filter(Document_.task_id.in_(task_id_list)).all() ## 特殊的in语句 doc_label = {} for each in res: if each.id in doc_label.keys(): doc_label[each.id] = doc_label[each.id] + ',' + each.name else: doc_label[each.id] = each.name labels = list(doc_label.values()) # if not task_id_list: # res = sess.query(Label_.name).join(TaggingRecords_).join(Document_).filter(Document_.state == 1) \ # .filter(Label_.label_sys_id == label_sys_id).all() # else: # res = sess.query(Label_.name).join(TaggingRecords_).join(Document_).filter(Document_.state == 1) \ # .filter(Label_.label_sys_id == label_sys_id).filter(Document_.task_id.in_(task_id_list)).all() # labels = [each.name for each in res] c =Counter(labels) sess.close() return c
def check_task_name(task_name: str): sess = SessionLocal() res = sess.query(Task_).filter(Task_.name == task_name).first() sess.close() if res == None: return 0 else: return 1
def get_related_tasks(label_sys_id: int): sess = SessionLocal() recs = sess.query(Task_).join(TaskRecords_).filter(TaskRecords_.label_sys_id == label_sys_id).all() tasks = [] for rec in recs: tasks.append({'id':rec.id, 'name': rec.name}) sess.close() return tasks
def check_label_sys_name(label_sys_name: str): sess = SessionLocal() res = sess.query(LabelSys_).filter(LabelSys_.name == label_sys_name).first() sess.close() if res is None: return 0 else: return 1
def get_label_sys_list(): sess = SessionLocal() res = sess.query(LabelSys_.id, LabelSys_.name).all() sess.close() if not res: return None label_sys_list = [] for ls in res: label_sys_list.append({'id': ls.id, 'name': ls.name}) return label_sys_list
def get_tagged_docs(task_id: int, user_id: int): sess = SessionLocal() if user_id == -1: docs = sess.query(Document_.id, Document_.title).filter( Document_.task_id == task_id).filter(Document_.state == 1).all() else: docs = sess.query(Document_.id,Document_.title).filter(Document_.task_id == task_id).filter(Document_.state == 1)\ .join(TaggingRecords_).filter(TaggingRecords_.user_id == user_id).all() sess.close() if not docs: return None return [{'doc_id': doc.id, 'title': doc.title} for doc in docs]
def count_docs(task_id: int): sess = SessionLocal() docs = sess.query(Document_.id).filter(Document_.task_id == task_id).all() tagged_docs = sess.query(Document_.id).filter(Document_.task_id == task_id) \ .filter(Document_.state == 1).all() # unsure_docs = sess.query(Document_.id).filter(Document_.task_id == task_id) \ # .filter(Document_.state == 2).all() counts = { "num_docs": len(docs), "num_tagged_docs": len(tagged_docs) } # , "num_unsure_docs": len(unsure_docs) sess.close() return counts
def get_detail_by_id(label_sys_id: int): sess = SessionLocal() label_sys = sess.query(LabelSys_).filter(LabelSys_.id == label_sys_id).first() if not label_sys: sess.close() return None labels = sess.query(Label_).filter(Label_.label_sys_id == label_sys_id).all() label_sys_detail = {'id': label_sys_id, 'name': label_sys.name, 'desc': label_sys.desc, 'multi': str(label_sys.multi), 'num_labels': len(labels), 'labels': []} for label in labels: label_sys_detail['labels'].append({'id': label.id, 'name': label.name, 'desc': label.desc, 'keywords': label.keywords}) sess.close() return label_sys_detail
def get_summary(): """ 获取所有打标任务的一个总览信息. 包括名称,描述,分类体系名称列表,创建时间,完成进度(待定) 返回:None或者summary summary格式: [{'name':str,'desc':str,'create_time':str, 'num_docs':int, 'num_tagged_docs':int, 'label_sys_list':[{'id':int, 'name':str}]}] """ sess = SessionLocal() tasks = sess.query(Task_.id, Task_.name, Task_.desc, Task_.create_time).all() if not tasks: sess.close() return None task_summary = [] for task in tasks: # 查询label system关联信息: label_sys_list = [] lss = sess.query(LabelSys_.id, LabelSys_.name).join(TaskRecords_)\ .filter(TaskRecords_.task_id == task.id).all() for ls in lss: label_sys_list.append({'id': ls.id, 'name': ls.name}) # 查询document关联信息: docs = sess.query( Document_.id).filter(Document_.task_id == task.id).all() tagged_docs = sess.query(Document_.id).filter(Document_.task_id == task.id)\ .filter(Document_.state == 1).all() task_summary.append({ 'id': task.id, 'name': task.name, 'desc': task.desc, 'create_time': task.create_time, 'num_docs': len(docs), 'num_tagged_docs': len(tagged_docs), 'label_sys_list': label_sys_list }) sess.close() return task_summary
def get_summary(): """ 获取所有分类体系的一个总览信息. 返回:None或者summary summary格式: [{'id':int,'name':str,'desc':str,'multi':str,'num_labels':str},...] """ sess = SessionLocal() res = sess.query(LabelSys_.id, LabelSys_.name, LabelSys_.desc, LabelSys_.multi, func.count(Label_.id).label('num_labels')).join(Label_) \ .group_by(LabelSys_.id).all() if not res: sess.close() return None summary = [] for each in res: summary.append({'id': each.id, 'name': each.name, 'desc': each.desc, 'multi': str(each.multi), 'num_labels': str(each.num_labels)}) sess.close() return summary
def update_label_sys(label_sys: LabelSys, admin_id: int): """ 可以直接在下面的update语句里面,调整允许更新的字段。 目前multi字段是不允许更新的 """ assert label_sys.id is not None, "label_sys的id没给我!" sess = SessionLocal() res = sess.query(LabelSys_).filter(LabelSys_.id == label_sys.id)\ .update({LabelSys_.name : label_sys.name, LabelSys_.desc : label_sys.desc}) print('ls_res:',res) labels = label_sys.labels if not labels: return 1 for label in labels: assert label.id is not None, "label的id没给我!" res = sess.query(Label_).filter(Label_.id == label.id)\ .update({Label_.name : label.name, Label_.desc : label.desc, Label_.keywords : label.keywords}) print('l_res:',res) sess.commit() sess.close() return 1
def get_detail_by_name(label_sys_name: str): """ 获取某分类体系详情. 返回:None或label_sys_detail label_sys_detail格式: {'id':str,'name':str,'desc':str,'multi':str,'num_labels':str, 'labels':[{'id':str,'name':str,'desc':str},...]} """ sess = SessionLocal() label_sys = sess.query(LabelSys_).filter(LabelSys_.name == label_sys_name).first() if not label_sys: sess.close() return None labels = sess.query(Label_).filter(Label_.label_sys_id == label_sys.id).all() label_sys_detail = {'id': label_sys.id, 'name': label_sys.name, 'desc': label_sys.desc, 'multi': str(label_sys.multi), 'num_labels': len(labels), 'labels': []} for label in labels: label_sys_detail['labels'].append({'id': label.id, 'name': label.name, 'desc': label.desc}) sess.close() return label_sys_detail
def label_sys_tagged_data_download(label_sys_id, task_id_list): current_time_str = str(datetime.fromtimestamp(int(time.time()))).replace(' ','-').replace(':','-') sess = SessionLocal() ls_info = sess.query(LabelSys_).filter(LabelSys_.id == label_sys_id).first() ls_name = ls_info.name file_name = ls_name+'__'+current_time_str if not task_id_list: res = sess.query(Document_.id, Document_.title, Document_.content, Label_.label_sys_id, Label_.name).filter(Document_.state == 1).join(TaggingRecords_).join(Label_)\ .filter(Label_.label_sys_id == label_sys_id).all() else: res = sess.query(Document_.id, Document_.title, Document_.content, Label_.label_sys_id, Label_.name).filter(Document_.state == 1).join(TaggingRecords_).join(Label_)\ .filter(Label_.label_sys_id == label_sys_id).filter(Document_.task_id.in_(task_id_list)).all() ## 特殊的in语句 d = {} for each in res: if each.id in d.keys(): d[each.id]['label'] = d[each.id]['label'] + ',' + each.name else: d[each.id] = {'title':each.title, 'content':each.content, 'label':each.name} df = pd.concat([pd.DataFrame(d.keys(),columns=['doc_id']),pd.DataFrame(d.values())],axis=1) df.to_csv('download_datasets/%s.csv'%file_name) sess.close() return file_name
def delete_label_sys(label_sys_id:int): sess = SessionLocal() res = sess.query(LabelSys_).filter(LabelSys_.id == label_sys_id).delete() sess.commit() sess.close() return res
def delete_task(task_id: int): sess = SessionLocal() res = sess.query(Task_).filter(Task_.id == task_id).delete() sess.commit() sess.close() return res