class WordsegMarkJobImportResource(Resource): @parse( { "mark_job_name": fields.String(required=True), "mark_job_type": fields.String(required=True), "mark_job_desc": fields.String(), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), "task_type": fields.String(required=True, validate=lambda x: x in ['machine', 'manual']), }, locations=('form', 'files')) def post(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: """ 上传已标注数据 """ files = args['files'] # validate file extensions for f in files: if get_ext(f.filename) not in ["txt"]: abort(400, message="上传已标注分词数据仅支持txt格式。") result = MarkJobService().import_mark_job(files, args, nlp_task=NlpTaskEnum.wordseg) return {"message": "创建成功", "result": result}, 201
class ClassifyMarkJobImportResource(Resource): @parse({ "mark_job_name": fields.String(required=True), "mark_job_type": fields.String(required=True), "mark_job_desc": fields.String(), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), }, locations=('form', 'files')) def post( self: Resource, args: typing.Dict ) -> typing.Tuple[typing.Dict, int]: files = args['files'] for f in files: if get_ext(f.filename) not in ["csv"]: abort(400, message="已标注分类数据仅支持csv格式。") try: result = MarkJobService().import_mark_job(files, args, nlp_task=NlpTaskEnum.classify) return { "message": "创建成功", "result": result }, 201 except UnicodeDecodeError: abort(400, message="文件编码错误 请上传utf-8编码文件") except KeyError: abort(400, message="文件格式不合规 请查看csv文件模版")
class WordsegMarkJobListResource(Resource): @parse({ "is_superuser": fields.Boolean(missing=False), "query": fields.String(missing=''), "offset": fields.Integer(missing=0), "limit": fields.Integer(missing=10), "doc_type_id": fields.Integer(missing=None), 'order_by': fields.String(missing='-created_time'), }) def get(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: count, result = MarkJobService().get_mark_job_list_by_nlp_task( args, nlp_task=NlpTaskEnum.wordseg) return { "message": "请求成功", "result": result, "count": count, }, 200 @parse( { "mark_job_name": fields.String(required=True), "mark_job_type": fields.String(required=True), "mark_job_desc": fields.String(), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), "assign_mode": fields.String(required=True, validate=lambda x: x in ['average', 'together']), "assessor_id": fields.Integer(missing=0), "labeler_ids": fields.List(fields.Integer(), required=True), }, locations=('form', 'files')) def post(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: files = args["files"] assign_mode = args["assign_mode"] if assign_mode == AssignModeEnum.together: abort(400, message="不支持共同标注") job_type = Common().check_job_type_by_files(files) if job_type != "text": abort(400, message="请上传纯文本文档(txt/csv)") else: args['mark_job_type'] = job_type try: result = MarkJobService().create_mark_job(files, NlpTaskEnum.wordseg, args) return {"message": "创建成功", "result": result}, 201 except TypeError: abort(400, message="上传文件类型错误")
class RelationMarkJobListResource(Resource): @parse({ "is_superuser": fields.Boolean(missing=False), "query": fields.String(missing=''), "offset": fields.Integer(missing=0), "limit": fields.Integer(missing=10), "doc_type_id": fields.Integer(missing=None), 'order_by': fields.String(missing='-created_time'), }) def get(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: count, result = MarkJobService().get_mark_job_list_by_nlp_task( args, NlpTaskEnum.relation) return { "message": "请求成功", "result": result, "count": count, }, 200 @parse( { "mark_job_name": fields.String(required=True), "mark_job_type": fields.String(required=True), "mark_job_desc": fields.String(), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), "assign_mode": fields.String(required=True, validate=lambda x: x in ['average', 'together']), "assessor_id": fields.Integer(), "labeler_ids": fields.List(fields.Integer(), required=True), "use_rule": fields.Integer(missing=1) # 默认使用规则 }, locations=('form', 'files')) def post(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: files = args['files'] job_type = Common().check_job_type_by_files(files) if not job_type: abort(400, message='请上传全部纯文本文档(txt/csv)或者全部电子文档(pdf/word文档)') else: args['mark_job_type'] = job_type result = MarkJobService().create_mark_job(files, NlpTaskEnum.relation, args) return {"message": "创建成功", "result": result}, 201
class ExtractMarkJobImportResource(Resource): @parse({ "mark_job_name": fields.String(required=True), "mark_job_type": fields.String(required=True), "mark_job_desc": fields.String(), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), }, locations=('form', 'files')) def post(self: Resource, args: typing.Dict): files = args['files'] args['task_type'] = 'manual' # validate file extensions for f in files: if get_ext(f.filename) not in ["txt"]: abort(400, message="导入已标注序列标注数据仅支持txt格式。") result = MarkJobService().import_mark_job(files, args, nlp_task=NlpTaskEnum.extract) return { "message": "创建成功", "result": result }, 201
class ExtractJobListResource(Resource, CurrentUserMixin): @parse({ "offset": fields.Integer(missing=0), "limit": fields.Integer(missing=10), "query": fields.String(missing=''), "doc_type_id": fields.Integer(missing=0), "order_by": fields.String(missing='-created_time'), }) def get(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: nlp_task_id = Common().get_nlp_task_id_by_route() order_by = args["order_by"][1:] order_by_desc = True if args["order_by"][0] == "-" else False count, predict_job_list = PredictService( ).get_predict_job_list_by_nlp_task_id( nlp_task_id=nlp_task_id, doc_type_id=args['doc_type_id'], search=args['query'], order_by=order_by, order_by_desc=order_by_desc, offset=args['offset'], limit=args['limit'], current_user=self.get_current_user()) # get the serialized result result = PredictJobSchema().dump(predict_job_list, many=True) return { "message": "请求成功", "result": result, "count": count, }, 200 @parse( { "extract_job_name": fields.String(required=True), "extract_job_type": fields.String(required=True), "extract_job_desc": fields.String(missing=""), "doc_type_id": fields.Integer(required=True), "files": fields.List(fields.File(), required=True), "task_type": fields.String(required=True, validate=lambda x: x in ['machine', 'manual']), "use_rule": fields.Integer(missing=0) }, locations=('form', 'files')) def post(self: Resource, args: typing.Dict) -> typing.Tuple[typing.Dict, int]: predict_job = PredictService().create_predict_job_by_doc_type_id( doc_type_id=args["doc_type_id"], predict_job_name=args["extract_job_name"], predict_job_desc=args["extract_job_desc"], predict_job_type=args["extract_job_type"], files=args["files"], use_rule=args["use_rule"]) result = PredictJobSchema().dump(predict_job) return {"message": "创建成功", "result": result}, 201