def evaluate_dataset(self, file_name, mode, do_inference=True, use_prior=True, use_context=True, size=-1): if not os.path.isfile(file_name): print("[ERROR] Invalid input data file.") return self.inference_processor = InferenceProcessor(mode, do_inference, use_prior, use_context) dataset = DataReader(file_name, size) for sentence in dataset.sentences: processed = self.process_sentence(sentence) if processed == -1: continue self.evaluated.append(processed) processed.print_self() evaluator = Evaluator() evaluator.print_performance(self.evaluated)
def handle_input(self): start_time = time.time() ret = {} r = request.get_json() if "tokens" not in r or "mention_starts" not in r or "mention_ends" not in r or "index" not in r: ret["type"] = [["INVALID_INPUT"]] ret["index"] = -1 ret["mentions"] = [] ret["candidates"] = [[]] return json.dumps(ret) sentences = [] for i in range(0, len(r["mention_starts"])): sentence = Sentence(r["tokens"], int(r["mention_starts"][i]), int(r["mention_ends"][i]), "") sentences.append(sentence) mode = r["mode"] predicted_types = [] predicted_candidates = [] other_possible_types = [] selected_candidates = [] mentions = [] if mode != "figer": if mode != "custom": selected_inference_processor = InferenceProcessor( mode, resource_loader=self.runner.inference_processor) else: rules = r["taxonomy"] mappings = self.parse_custom_rules(rules) selected_inference_processor = InferenceProcessor( mode, custom_mapping=mappings) else: selected_inference_processor = self.runner.inference_processor for sentence in sentences: sentence.set_signature(selected_inference_processor.signature()) cached = self.mem_cache.query_cache(sentence) if cached is not None: sentence = cached else: self.runner.process_sentence(sentence, selected_inference_processor) try: self.mem_cache.insert_cache(sentence) self.surface_cache.insert_cache(sentence) except: print("Cache insertion exception. Ignored.") predicted_types.append(list(sentence.predicted_types)) predicted_candidates.append(sentence.elmo_candidate_titles) mentions.append(sentence.get_mention_surface_raw()) selected_candidates.append(sentence.selected_title) other_possible_types.append(sentence.could_also_be_types) elapsed_time = time.time() - start_time print("Processed mention " + str([x.get_mention_surface() for x in sentences]) + " in mode " + mode + ". TIME: " + str(elapsed_time) + " seconds.") ret["type"] = predicted_types ret["candidates"] = predicted_candidates ret["mentions"] = mentions ret["index"] = r["index"] ret["selected_candidates"] = selected_candidates ret["other_possible_type"] = other_possible_types return json.dumps(ret)
file.write(sen.tokens[i] + '\t' + p_labels[i] + '\t' + gold_labels[i] + '\n') file.write('\n') # file_name = 'CoNLL_dev' file_name = 'On' freebase_file = open('data/title2freebase.pickle', 'rb') freebase = pickle.load(freebase_file) prediction_data = ReadData('result_' + file_name + '.out') outfile = open('fixed_result_' + file_name + '.out', 'w') inference_processor = InferenceProcessor("ontonotes") prior_threshold = 0.5 for sen in prediction_data: for idx, prediction in enumerate(sen.predictions): surface = '_'.join(sen.tokens[prediction[0]:prediction[1]]) if surface not in freebase: if surface[0] + surface[1:].lower() in freebase: surface = surface[0] + surface[1:].lower() else: if surface.upper() in freebase: surface = surface.upper() else: if surface.lower() in freebase: surface = surface.lower()
def __init__(self): self.bert_processor = BertProcessor() self.esa_processor = EsaProcessor() self.inference_processor = InferenceProcessor("ontonotes") self.evaluator = Evaluator() self.evaluated = []
class ZoeRunner: """ @allow_tensorflow sets whether the system will do run-time ELMo processing. It's set to False in experiments as ELMo results are cached, but please set it to default True when running on new sentences. """ def __init__(self): self.bert_processor = BertProcessor() self.esa_processor = EsaProcessor() self.inference_processor = InferenceProcessor("ontonotes") self.evaluator = Evaluator() self.evaluated = [] """ Process a single sentence @sentence: a sentence in zoe_utils.Sentence structure @return: a sentence in zoe_utils that has predicted types set """ def process_sentence(self, sentence, inference_processor=None): esa_candidates = self.esa_processor.get_candidates(sentence) bert_candidates = self.bert_processor.rank_candidates( sentence, esa_candidates) if inference_processor is None: inference_processor = self.inference_processor print("Ranking finished") inference_processor.inference(sentence, bert_candidates, esa_candidates) return sentence """ Helper function to evaluate on a dataset that has multiple sentences @file_name: A string indicating the data file. Note the format needs to be the common json format, see examples @mode: A string indicating the mode. This adjusts the inference mode, and set caches etc. @return: None """ def evaluate_dataset(self, file_name, mode, do_inference=True, use_prior=True, use_context=True, size=-1): if not os.path.isfile(file_name): print("[ERROR] Invalid input data file.") return self.inference_processor = InferenceProcessor(mode, do_inference, use_prior, use_context) dataset = DataReader(file_name, size) for sentence in dataset.sentences: processed = self.process_sentence(sentence) if processed == -1: continue self.evaluated.append(processed) processed.print_self() evaluator = Evaluator() evaluator.print_performance(self.evaluated) """ Helper function that saves the predicted sentences list to a file. @file_name: A string indicating the target file path. Note it will override the content @return: None """ def save(self, file_name): with open(file_name, "wb") as handle: pickle.dump(self.evaluated, handle, pickle.HIGHEST_PROTOCOL) @staticmethod def evaluate_saved_runlog(log_name): with open(log_name, "rb") as handle: sentences = pickle.load(handle) evaluator = Evaluator() evaluator.print_performance(sentences) def find_best_prediction_seq(self, i, choice): if i == -1: return [] if i not in choice: result = self.find_best_prediction_seq(i - 1, choice) return result else: result = self.find_best_prediction_seq(choice[i][0] - 1, choice) result.append(choice[i]) return result def dp(self, sen, cands, scores): f = [0] * len(sen.tokens) choice = {} f[0] = 0 for i in range(0, len(sen.tokens)): # if i > 0: # f[i] = f[i-1] for mention in cands: if mention[1] - 1 > i: break # if f[i] == 0: # f[i] = f[mention[0]-1] + scores[mention] # else: if f[mention[0] - 1] + scores[mention] > f[i]: f[i] = f[mention[0] - 1] + scores[mention] choice[i] = mention final_prediction = self.find_best_prediction_seq( len(sen.tokens) - 1, choice) # print(choice) return final_prediction def print_prediction(self, outfile, sen, prediction, types): p_labels = ['O'] * len(sen.tokens) for mention in prediction: if types[mention] == 'O': p_labels[mention[0]] = 'O' else: p_labels[mention[0]] = 'B-' + types[mention] for i in range(mention[0] + 1, mention[1]): if types[mention] == 'O': p_labels[i] = 'O' else: p_labels[i] = 'I-' + types[mention] gold_labels = ['O'] * len(sen.tokens) for mention in sen.gold: gold_labels[mention[0]] = 'B-' + mention[2] for i in range(mention[0] + 1, mention[1]): gold_labels[i] = 'I-' + mention[2] for i in range(0, len(sen.tokens)): outfile.write(sen.tokens[i] + '\t' + p_labels[i] + '\t' + gold_labels[i] + '\n') outfile.write('\n') def post_processing(self, sen): surface = ' '.join( sen.tokens[sen.mention_start:sen.mention_end]).lower() if surface in self.inference_processor.prior_prob_map: prior_prob = self.inference_processor.prior_prob_map[surface] if float(prior_prob[1]) > 0.7: type = self.inference_processor.get_coarse_types_of_title( prior_prob[0]) if len(type) == 0: return sen # print(type) type = list(type)[0] if type == '': return sen print(surface, prior_prob[0], type) sen.set_predictions(type, 1) return sen
def __init__(self, allow_tensorflow=True): self.elmo_processor = ElmoProcessor(allow_tensorflow) self.esa_processor = EsaProcessor() self.inference_processor = InferenceProcessor("figer") self.evaluator = Evaluator() self.evaluated = []