def run_neuroner_predict_erks(project_id, document): neuroner_home_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'erks/erks_bps/neuroner') pretrained_model_folder = os.path.join(neuroner_home_dir, 'trained_models/my_model_2') output_folder = os.path.join(neuroner_home_dir, 'output') dataset_text_folder = os.path.join( neuroner_home_dir, "data", project_id + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) deploy_dir = os.path.join(dataset_text_folder, 'deploy') result = None try: os.makedirs(deploy_dir) with open(os.path.join(deploy_dir, "document1.txt"), 'w') as d: d.writelines(document) argv = [] argv.append('--train_model=False') argv.append('--use_pretrained_model=True') argv.append('--dataset_text_folder=' + dataset_text_folder) argv.append('--pretrained_model_folder=' + pretrained_model_folder) argv.append('--output_folder=' + output_folder) arguments = parse_arguments(argv) nn = NeuroNER(**arguments) nn.fit() nn.close() result = nn.brat_entities except Exception as e: log_exception(e) return result
def get_type_system_diagram(project_id): result = {} try: type_system_diagram = models.get_type_system_diagram(project_id) result["resultOK"] = True result["result"] = type_system_diagram except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def create_ground_truth(self, documents, document_type): str_buffer = [] is_begin_set = False for document in documents: ground_truth = self.get_base_ground_truth(document) text = ground_truth["text"] offset = 0 begin = 0 length = len(text) iter_data = iter(text) try: for char in iter_data: if char == "\r": if offset + 1 < length and text[offset + 1] == '\r': self.sentence_parser(ground_truth=ground_truth, begin=begin, sentence_buffer=str_buffer) str_buffer = [] iter_data.next() offset += 1 is_begin_set = False else: str_buffer.append(char) elif char in self.sentence_breaker: str_buffer.append(char) self.sentence_parser(ground_truth=ground_truth, begin=begin, sentence_buffer=str_buffer) str_buffer = [] is_begin_set = False else: str_buffer.append(char) if not is_begin_set: begin = offset is_begin_set = True offset += 1 self.sentence_parser(ground_truth=ground_truth, begin=begin, sentence_buffer=str_buffer) except Exception as e: log_exception(e) ground_truth_collection.insert_one({ "project_id": self.global_project_id, "global_document_id": self.global_document_id, "ground_truth": ground_truth, "document_type": document_type })
def online_text_ground_truth(project_id): result = {} try: result = {} document = models.get_online_text_ground_truth(project_id) result["resultOK"] = True result["document"] = document except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def get_relationship_type_list(project_id): result = {} relationship_type_list = None try: result = {} relationship_type_list = models.get_relationship_type_list(project_id) result["resultOK"] = True result["list"] = relationship_type_list except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def ground_truth(project_id): result = {} try: document_id = str(request.json['document_id']) result = {} document = models.get_ground_truth(project_id, document_id) result["resultOK"] = True result["document"] = document except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def sire_info(project_id): result = {} try: document_id = str(request.json['document_id']) result = {} document = models.get_sire_info(project_id) result["resultOK"] = True result["sireInfo"] = document except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def save_all_typesystem(project_id): result = {} try: type_system_diagram = request.json['typeSystemDiagram'] entity_types = request.json['entityTypes'] relation_types = request.json['relationTypes'] save_result = models.save_all_typesystem(project_id=project_id, type_system_diagram=type_system_diagram, entity_types=entity_types, relation_types=relation_types) result["resultOK"] = True result["result"] = save_result except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def get_entity_type_list(project_id='asdf'): result = {} entity_type_list = None try: #project_id = str(request.json['project_id']) result = {} entity_type_list = models.get_entity_type_list(project_id) result["resultOK"] = True result["list"] = entity_type_list except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def save_all_annotation(project_id): result = {} try: ground_truth_id = str(request.json['ground_truth_id']) save_data = request.json['saveData'] result = {} save_result = models.save_all_annotation( project_id, ground_truth_id=ground_truth_id, save_data=save_data) result["resultOK"] = True result["result"] = save_result except Exception as e: result["resultOK"] = False result["message"] = str(Exception) log_exception(e) return dumps(result, ensure_ascii=False)
def document_parser(self, data): offset = 0 str_buffer = [] is_name = True is_in_quote = False documents = [] document_index = 1 if self.global_modified_date is None: self.global_modified_date = self.get_epoch_time() tmp_document = self.get_base_document( document_index=document_index, modified_date=self.global_modified_date) """에라 모르겠다...정규식은 실패임""" try: length = len(data) iter_data = iter(data) for char in iter_data: if is_name: if char == ",": tmp_document["name"] = ''.join(str_buffer).strip() str_buffer = [] is_name = False else: str_buffer.append(char) else: if is_in_quote: if char == '"': if offset + 1 < length and data[offset + 1] == '"': str_buffer.append(char) # str_buffer.append(data[offset + 1]) #iter_data.next() next(iter_data) else: tmp_document["text"] = ''.join( str_buffer).strip() str_buffer = [] is_name = True is_in_quote = False documents.append(tmp_document) document_index += 1 tmp_document = self.get_base_document( document_index=document_index, modified_date=self.global_modified_date) else: str_buffer.append(char) else: if char == '"': is_in_quote = True else: if char == " " or char == "\t": pass else: raise MyException("wrong document format") offset += 1 # print documents except MyException as e: log_exception(e) except Exception as e: log_exception(e) return documents