def evaluate_dataset(self, file_name, mode, do_inference=True, use_prior=True, use_context=True, size=-1): if not os.path.isfile(file_name): print("[ERROR] Invalid input data file.") return self.inference_processor = InferenceProcessor(mode, do_inference, use_prior, use_context) dataset = DataReader(file_name, size) for sentence in dataset.sentences: processed = self.process_sentence(sentence) if processed == -1: continue self.evaluated.append(processed) processed.print_self() evaluator = Evaluator() evaluator.print_performance(self.evaluated)
def handle_input(self): start_time = time.time() ret = {} r = request.get_json() if "tokens" not in r or "mention_starts" not in r or "mention_ends" not in r or "index" not in r: ret["type"] = [["INVALID_INPUT"]] ret["index"] = -1 ret["mentions"] = [] ret["candidates"] = [[]] return json.dumps(ret) sentences = [] for i in range(0, len(r["mention_starts"])): sentence = Sentence(r["tokens"], int(r["mention_starts"][i]), int(r["mention_ends"][i]), "") sentences.append(sentence) mode = r["mode"] predicted_types = [] predicted_candidates = [] other_possible_types = [] selected_candidates = [] mentions = [] if mode != "figer": if mode != "custom": selected_inference_processor = InferenceProcessor( mode, resource_loader=self.runner.inference_processor) else: rules = r["taxonomy"] mappings = self.parse_custom_rules(rules) selected_inference_processor = InferenceProcessor( mode, custom_mapping=mappings) else: selected_inference_processor = self.runner.inference_processor for sentence in sentences: sentence.set_signature(selected_inference_processor.signature()) cached = self.mem_cache.query_cache(sentence) if cached is not None: sentence = cached else: self.runner.process_sentence(sentence, selected_inference_processor) try: self.mem_cache.insert_cache(sentence) self.surface_cache.insert_cache(sentence) except: print("Cache insertion exception. Ignored.") predicted_types.append(list(sentence.predicted_types)) predicted_candidates.append(sentence.elmo_candidate_titles) mentions.append(sentence.get_mention_surface_raw()) selected_candidates.append(sentence.selected_title) other_possible_types.append(sentence.could_also_be_types) elapsed_time = time.time() - start_time print("Processed mention " + str([x.get_mention_surface() for x in sentences]) + " in mode " + mode + ". TIME: " + str(elapsed_time) + " seconds.") ret["type"] = predicted_types ret["candidates"] = predicted_candidates ret["mentions"] = mentions ret["index"] = r["index"] ret["selected_candidates"] = selected_candidates ret["other_possible_type"] = other_possible_types return json.dumps(ret)
file.write(sen.tokens[i] + '\t' + p_labels[i] + '\t' + gold_labels[i] + '\n') file.write('\n') # file_name = 'CoNLL_dev' file_name = 'On' freebase_file = open('data/title2freebase.pickle', 'rb') freebase = pickle.load(freebase_file) prediction_data = ReadData('result_' + file_name + '.out') outfile = open('fixed_result_' + file_name + '.out', 'w') inference_processor = InferenceProcessor("ontonotes") prior_threshold = 0.5 for sen in prediction_data: for idx, prediction in enumerate(sen.predictions): surface = '_'.join(sen.tokens[prediction[0]:prediction[1]]) if surface not in freebase: if surface[0] + surface[1:].lower() in freebase: surface = surface[0] + surface[1:].lower() else: if surface.upper() in freebase: surface = surface.upper() else: if surface.lower() in freebase: surface = surface.lower()
def __init__(self): self.bert_processor = BertProcessor() self.esa_processor = EsaProcessor() self.inference_processor = InferenceProcessor("ontonotes") self.evaluator = Evaluator() self.evaluated = []
def __init__(self, allow_tensorflow=True): self.elmo_processor = ElmoProcessor(allow_tensorflow) self.esa_processor = EsaProcessor() self.inference_processor = InferenceProcessor("figer") self.evaluator = Evaluator() self.evaluated = []