示例#1
0
 def evaluate_dataset(self,
                      file_name,
                      mode,
                      do_inference=True,
                      use_prior=True,
                      use_context=True,
                      size=-1):
     if not os.path.isfile(file_name):
         print("[ERROR] Invalid input data file.")
         return
     self.inference_processor = InferenceProcessor(mode, do_inference,
                                                   use_prior, use_context)
     dataset = DataReader(file_name, size)
     for sentence in dataset.sentences:
         processed = self.process_sentence(sentence)
         if processed == -1:
             continue
         self.evaluated.append(processed)
         processed.print_self()
         evaluator = Evaluator()
         evaluator.print_performance(self.evaluated)
示例#2
0
    def handle_input(self):
        start_time = time.time()
        ret = {}
        r = request.get_json()
        if "tokens" not in r or "mention_starts" not in r or "mention_ends" not in r or "index" not in r:
            ret["type"] = [["INVALID_INPUT"]]
            ret["index"] = -1
            ret["mentions"] = []
            ret["candidates"] = [[]]
            return json.dumps(ret)
        sentences = []
        for i in range(0, len(r["mention_starts"])):
            sentence = Sentence(r["tokens"], int(r["mention_starts"][i]),
                                int(r["mention_ends"][i]), "")
            sentences.append(sentence)
        mode = r["mode"]
        predicted_types = []
        predicted_candidates = []
        other_possible_types = []
        selected_candidates = []
        mentions = []
        if mode != "figer":
            if mode != "custom":
                selected_inference_processor = InferenceProcessor(
                    mode, resource_loader=self.runner.inference_processor)
            else:
                rules = r["taxonomy"]
                mappings = self.parse_custom_rules(rules)
                selected_inference_processor = InferenceProcessor(
                    mode, custom_mapping=mappings)
        else:
            selected_inference_processor = self.runner.inference_processor

        for sentence in sentences:
            sentence.set_signature(selected_inference_processor.signature())
            cached = self.mem_cache.query_cache(sentence)
            if cached is not None:
                sentence = cached
            else:
                self.runner.process_sentence(sentence,
                                             selected_inference_processor)
                try:
                    self.mem_cache.insert_cache(sentence)
                    self.surface_cache.insert_cache(sentence)
                except:
                    print("Cache insertion exception. Ignored.")
            predicted_types.append(list(sentence.predicted_types))
            predicted_candidates.append(sentence.elmo_candidate_titles)
            mentions.append(sentence.get_mention_surface_raw())
            selected_candidates.append(sentence.selected_title)
            other_possible_types.append(sentence.could_also_be_types)

        elapsed_time = time.time() - start_time
        print("Processed mention " +
              str([x.get_mention_surface() for x in sentences]) + " in mode " +
              mode + ". TIME: " + str(elapsed_time) + " seconds.")
        ret["type"] = predicted_types
        ret["candidates"] = predicted_candidates
        ret["mentions"] = mentions
        ret["index"] = r["index"]
        ret["selected_candidates"] = selected_candidates
        ret["other_possible_type"] = other_possible_types
        return json.dumps(ret)
示例#3
0
        file.write(sen.tokens[i] + '\t' + p_labels[i] + '\t' + gold_labels[i] +
                   '\n')
    file.write('\n')


# file_name = 'CoNLL_dev'
file_name = 'On'

freebase_file = open('data/title2freebase.pickle', 'rb')

freebase = pickle.load(freebase_file)
prediction_data = ReadData('result_' + file_name + '.out')

outfile = open('fixed_result_' + file_name + '.out', 'w')

inference_processor = InferenceProcessor("ontonotes")

prior_threshold = 0.5

for sen in prediction_data:
    for idx, prediction in enumerate(sen.predictions):
        surface = '_'.join(sen.tokens[prediction[0]:prediction[1]])
        if surface not in freebase:
            if surface[0] + surface[1:].lower() in freebase:
                surface = surface[0] + surface[1:].lower()
            else:
                if surface.upper() in freebase:
                    surface = surface.upper()
                else:
                    if surface.lower() in freebase:
                        surface = surface.lower()
示例#4
0
 def __init__(self):
     self.bert_processor = BertProcessor()
     self.esa_processor = EsaProcessor()
     self.inference_processor = InferenceProcessor("ontonotes")
     self.evaluator = Evaluator()
     self.evaluated = []
示例#5
0
 def __init__(self, allow_tensorflow=True):
     self.elmo_processor = ElmoProcessor(allow_tensorflow)
     self.esa_processor = EsaProcessor()
     self.inference_processor = InferenceProcessor("figer")
     self.evaluator = Evaluator()
     self.evaluated = []