def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False, gold_file=None): ''' Prints the output of a 2-step SRL system in CoNLL style for evaluating. ''' # load boundary identification network and reader md_boundary = Metadata.load_from_file('srl_boundary') nn_boundary = taggers.load_network(md_boundary) reader_boundary = taggers.create_reader(md_boundary, gold_file) itd_boundary = reader_boundary.get_inverse_tag_dictionary() # same for arg classification md_classify = Metadata.load_from_file('srl_classify') nn_classify = taggers.load_network(md_classify) reader_classify = taggers.create_reader(md_classify, gold_file) itd_classify = reader_classify.get_inverse_tag_dictionary() if find_preds_automatically: tagger = taggers.SRLTagger() else: iter_predicates = iter(reader_boundary.predicates) actual_sentences = [ actual_sentence for actual_sentence, _ in reader_boundary.sentences ] for sent in actual_sentences: if find_preds_automatically: pred_pos = tagger.find_predicates(sent) else: pred_pos = iter_predicates.next() verbs = [(position, sent[position].word) for position in pred_pos] sent_bound_codified = np.array( [reader_boundary.converter.convert(t) for t in sent]) sent_class_codified = np.array( [reader_classify.converter.convert(t) for t in sent]) answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos) boundaries = [[itd_boundary[x] for x in pred_answer] for pred_answer in answers] arg_limits = [ utils.boundaries_to_arg_limits(pred_boundaries) for pred_boundaries in boundaries ] answers = nn_classify.tag_sentence(sent_class_codified, pred_pos, arg_limits, allow_repeats=not no_repeat) arguments = [[itd_classify[x] for x in pred_answer] for pred_answer in answers] tags = join_2_steps(boundaries, arguments) print(prop_conll(verbs, tags, len(sent)))
def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False, gold_file=None): """ Prints the output of a 2-step SRL system in CoNLL style for evaluating. """ # load boundary identification network and reader md_boundary = Metadata.load_from_file('srl_boundary') nn_boundary = taggers.load_network(md_boundary) reader_boundary = taggers.create_reader(md_boundary, gold_file) itd_boundary = reader_boundary.get_inverse_tag_dictionary() # same for arg classification md_classify = Metadata.load_from_file('srl_classify') nn_classify = taggers.load_network(md_classify) reader_classify = taggers.create_reader(md_classify, gold_file) itd_classify = reader_classify.get_inverse_tag_dictionary() if find_preds_automatically: tagger = taggers.SRLTagger() else: iter_predicates = iter(reader_boundary.predicates) actual_sentences = [actual_sentence for actual_sentence, _ in reader_boundary.sentences] for sent in actual_sentences: if find_preds_automatically: pred_pos = tagger.find_predicates(sent) else: pred_pos = next(iter_predicates) verbs = [(position, sent[position].word) for position in pred_pos] sent_bound_codified = np.array([reader_boundary.converter.convert(t) for t in sent]) sent_class_codified = np.array([reader_classify.converter.convert(t) for t in sent]) answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos) boundaries = [[itd_boundary[x] for x in pred_answer] for pred_answer in answers] arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries) for pred_boundaries in boundaries] answers = nn_classify.tag_sentence(sent_class_codified, pred_pos, arg_limits, allow_repeats=not no_repeat) arguments = [[itd_classify[x] for x in pred_answer] for pred_answer in answers] tags = join_2_steps(boundaries, arguments) print(prop_conll(verbs, tags, len(sent)))
def _load_data(self): """Loads data for Dependency Parsing""" md_udep = Metadata.load_from_file('unlabeled_dependency', paths=self.paths) self.unlabeled_nn = load_network(md_udep) self.unlabeled_reader = create_reader(md_udep) md_ldep = Metadata.load_from_file('labeled_dependency', paths=self.paths) self.labeled_nn = load_network(md_ldep) self.labeled_reader = create_reader(md_ldep) self.itd = self.labeled_reader.get_inverse_tag_dictionary() self.use_pos = md_udep.use_pos or md_ldep.use_pos if self.use_pos: self.pos_tagger = POSTagger(self.data_dir, language=self.language)
def evaluate_srl_classify(no_repeat=False, gold_file=None): """Evaluates the performance of the network on the SRL classifying task.""" # load data md = Metadata.load_from_file('srl_classify') nn = taggers.load_network(md) r = taggers.create_reader(md, gold_file) r.create_converter() r.codify_sentences() hits = 0 total_args = 0 for sentence, tags, predicates, args in zip(r.sentences, r.tags, r.predicates, r.arg_limits): # the answer includes all predicates answer = nn.tag_sentence(sentence, predicates, args, allow_repeats=not no_repeat) for pred_answer, pred_gold in zip(answer, tags): for net_tag, gold_tag in zip(pred_answer, pred_gold): if net_tag == gold_tag: hits += 1 total_args += len(pred_gold) print('Accuracy: %f' % (float(hits) / total_args))
def evaluate_srl_classify(no_repeat=False, gold_file=None): """Evaluates the performance of the network on the SRL classifying task.""" # load data md = Metadata.load_from_file('srl_classify') nn = taggers.load_network(md) r = taggers.create_reader(md, gold_file) r.create_converter() r.codify_sentences() hits = 0 total_args = 0 for sentence, tags, predicates, args in izip(r.sentences, r.tags, r.predicates, r.arg_limits): # the answer includes all predicates answer = nn.tag_sentence(sentence, predicates, args, allow_repeats=not no_repeat) for pred_answer, pred_gold in izip(answer, tags): for net_tag, gold_tag in izip(pred_answer, pred_gold): if net_tag == gold_tag: hits += 1 total_args += len(pred_gold) print 'Accuracy: %f' % (float(hits) / total_args)
def _load_data(self): """Loads data for POS""" md = Metadata.load_from_file('pos', self.paths) self.nn = load_network(md) self.reader = create_reader(md) self.reader.create_converter() self.itd = self.reader.get_inverse_tag_dictionary()
def evaluate_srl_1step(find_preds_automatically=False, gold_file=None): """ Evaluates the network on the SRL task performed with one step for id + class. """ md = Metadata.load_from_file('srl') nn = taggers.load_network(md) r = taggers.create_reader(md, gold_file=gold_file) itd = r.get_inverse_tag_dictionary() if find_preds_automatically: tagger = taggers.SRLTagger() else: iter_predicates = iter(r.predicates) for sent in iter(r.sentences): # the other elements in the list are the tags for each proposition actual_sent = sent[0] if find_preds_automatically: pred_positions = tagger.find_predicates(sent) else: pred_positions = iter_predicates.next() verbs = [(position, actual_sent[position].word) for position in pred_positions] sent_codified = np.array([r.converter.convert(token) for token in actual_sent]) answers = nn.tag_sentence(sent_codified, pred_positions) tags = [convert_iob_to_iobes([itd[x] for x in pred_answer]) for pred_answer in answers] print prop_conll(verbs, tags, len(actual_sent))
def evaluate_srl_identify(gold_file): """ Evaluates the performance of the network on the SRL task for the argument boundaries identification subtask """ md = Metadata.load_from_file('srl_boundary') nn = taggers.load_network(md) srl_reader = taggers.create_reader(md, gold_file=gold_file) net_itd = srl_reader.get_inverse_tag_dictionary() srl_reader.load_tag_dict(config.FILES['srl_tags'], iob=True) srl_reader.convert_tags('iob', update_tag_dict=False) gold_itd = srl_reader.get_inverse_tag_dictionary() # used for calculating precision counter_predicted_args = Counter() # used for calculating recall counter_existing_args = Counter() # used for calculating both counter_correct_args = Counter() srl_reader.codify_sentences() for sent, preds, sent_tags in izip(srl_reader.sentences, srl_reader.predicates, srl_reader.tags): # one answer for each predicate answers = nn.tag_sentence(sent, preds) for answer, tags in zip(answers, sent_tags): correct_args, existing_args = sentence_recall(answer, tags, gold_itd, net_itd) counter_correct_args.update(correct_args) counter_existing_args.update(existing_args) _, predicted_args = sentence_precision(answer, tags, gold_itd, net_itd) counter_predicted_args.update(predicted_args) correct_args = sum(counter_correct_args.values()) total_args = sum(counter_existing_args.values()) total_found_args = sum(counter_predicted_args.values()) rec = correct_args / float(total_args) prec = correct_args / float(total_found_args) try: f1 = 2 * rec * prec / (rec + prec) except ZeroDivisionError: f1 = 0 print 'Recall: %f, Precision: %f, F-1: %f' % (rec, prec, f1) print print 'Argument\tRecall' for arg in counter_existing_args: rec = counter_correct_args[arg] / float(counter_existing_args[arg]) # a couple of notes about precision per argument: # - we can't compute it if we are only interested in boundaries. hence, we can't compute f-1 # - if the network never tagged a given argument, its precision is 100% (it never made a mistake) print '%s\t\t%f' % (arg, rec)
def _load_data(self): """Loads data for SRL""" # load boundary identification network and reader md_boundary = Metadata.load_from_file('srl_boundary', self.paths) self.boundary_nn = load_network(md_boundary) self.boundary_reader = create_reader(md_boundary) self.boundary_reader.create_converter() self.boundary_itd = self.boundary_reader.get_inverse_tag_dictionary() # same for arg classification md_classify = Metadata.load_from_file('srl_classify', self.paths) self.classify_nn = load_network(md_classify) self.classify_reader = create_reader(md_classify) self.classify_reader.create_converter() self.classify_itd = self.classify_reader.get_inverse_tag_dictionary() # predicate detection md_pred = Metadata.load_from_file('srl_predicates', self.paths) self.pred_nn = load_network(md_pred) self.pred_reader = create_reader(md_pred) self.pred_reader.create_converter()
def evaluate_unlabeled_dependency(gold_file, punctuation): """ Evaluate unlabeled accuracy per token. """ md = Metadata.load_from_file('unlabeled_dependency') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file) #reader.codify_sentences() logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 num_tokens = 0 sentence_hits = 0 num_sentences = 0 for sent, heads in zip(reader.sentences, reader.heads): sent_codified = reader.codify_sentence(sent) answer = nn.tag_sentence(sent_codified) correct_sentence = True for i, (net_tag, gold_tag) in enumerate(zip(answer, heads)): token = sent[i] # detect punctuation if punctuation and is_punctuation(token): continue if net_tag == gold_tag or (gold_tag == i and net_tag == len(sent)): hits += 1 else: correct_sentence = False num_tokens += 1 if correct_sentence: sentence_hits += 1 num_sentences += 1 accuracy = float(hits) / num_tokens sent_accuracy = 100 * float(sentence_hits) / num_sentences print '%d hits out of %d' % (hits, num_tokens) print '%d sentences completely correct (%f%%)' % (sentence_hits, sent_accuracy) print 'Accuracy: %f%%' % (100 * accuracy)
def _load_data(self): """Loads data for POS from SENNA dump""" md = Metadata.load_from_file('pos') self.nn, word_dict, suff = load_network() self.reader = POSReader() self.reader.word_dict = word_dict self.reader.create_converter(md) self.itd = self.reader.get_inverse_tag_dictionary() self.nn.padding_left = self.reader.converter.get_padding_left() self.nn.padding_right = self.reader.converter.get_padding_right() self.nn.pre_padding = np.array([self.nn.padding_left] * 2) self.nn.pos_padding = np.array([self.nn.padding_right] * 2) Suffix.codes = {} for i, s in enumerate(suff): Suffix.codes[s] = i Suffix.other = Suffix.codes['NOSUFFIX']
def evaluate_unlabeled_dependency(gold_file, punctuation): """ Evaluate unlabeled accuracy per token. """ md = Metadata.load_from_file('unlabeled_dependency') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file) logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 num_tokens = 0 sentence_hits = 0 num_sentences = 0 for sent, heads in zip(reader.sentences, reader.heads): sent_codified = reader.codify_sentence(sent) answer = nn.tag_sentence(sent_codified) correct_sentence = True for i, (net_tag, gold_tag) in enumerate(zip(answer, heads)): token = sent[i] # detect punctuation if punctuation and is_punctuation(token): continue if net_tag == gold_tag or (gold_tag == i and net_tag == len(sent)): hits += 1 else: correct_sentence = False num_tokens += 1 if correct_sentence: sentence_hits += 1 num_sentences += 1 accuracy = float(hits) / num_tokens sent_accuracy = 100 * float(sentence_hits) / num_sentences print('%d hits out of %d' % (hits, num_tokens)) print('%d sentences completely correct (%f%%)' % (sentence_hits, sent_accuracy)) print('Accuracy: %f%%' % (100 * accuracy))
def evaluate_pos(gold_file=None, oov=None): """ Tests the network for tagging a given sequence. :param gold_file: file with gold data to evaluate against :param oov: either None or a list of tokens, that should contain the oov words. """ md = Metadata.load_from_file('pos') nn = taggers.load_network(md) pos_reader = taggers.create_reader(md, gold_file=gold_file) itd = pos_reader.get_inverse_tag_dictionary() logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 total = 0 #pos_reader.codify_sentences() for sent in pos_reader.sentences: tokens, tags = zip(*sent) sent_codified = np.array( [pos_reader.converter.convert(t) for t in tokens]) answer = nn.tag_sentence(sent_codified) if oov is not None: iter_sent = iter(tokens) for net_tag, gold_tag in zip(answer, tags): if oov is not None: # only check oov words word = iter_sent.next() if word.lower() not in oov: continue if itd[net_tag] == gold_tag: hits += 1 total += 1 print '%d hits out of %d' % (hits, total) accuracy = float(hits) / total logger.info('Done.') return accuracy
def evaluate_pos(gold_file=None, oov=None): """ Tests the network for tagging a given sequence. :param gold_file: file with gold data to evaluate against :param oov: either None or a list of tokens, that should contain the oov words. """ md = Metadata.load_from_file('pos') nn = taggers.load_network(md) pos_reader = taggers.create_reader(md, gold_file=gold_file) itd = pos_reader.get_inverse_tag_dictionary() logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 total = 0 #pos_reader.codify_sentences() for sent in pos_reader.sentences: tokens, tags = zip(*sent) sent_codified = np.array([pos_reader.converter.convert(t) for t in tokens]) answer = nn.tag_sentence(sent_codified) if oov is not None: iter_sent = iter(tokens) for net_tag, gold_tag in zip(answer, tags): if oov is not None: # only check oov words word = iter_sent.next() if word.lower() not in oov: continue if itd[net_tag] == gold_tag: hits += 1 total += 1 print '%d hits out of %d' % (hits, total) accuracy = float(hits) / total logger.info('Done.') return accuracy
def evaluate_labeled_dependency(gold_file, punctuation): """ Evaluate the accuracy for dependency labels per token. """ md = Metadata.load_from_file('labeled_dependency') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file) reader.codify_sentences() logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 num_tokens = 0 sentence_hits = 0 num_sentences = 0 for sent, heads, labels in zip(reader.sentences, reader.heads, reader.labels): answer = nn.tag_sentence(sent, heads) correct_sentence = True for net_tag, gold_tag in zip(answer, labels): if net_tag == gold_tag: hits += 1 else: correct_sentence = False num_tokens += 1 if correct_sentence: sentence_hits += 1 num_sentences += 1 accuracy = float(hits) / num_tokens sent_accuracy = 100 * float(sentence_hits) / num_sentences print '%d hits out of %d' % (hits, num_tokens) print '%d sentences completely correct (%f%%)' % (sentence_hits, sent_accuracy) print 'Accuracy: %f' % accuracy
def evaluate_labeled_dependency(gold_file): """ Evaluate the accuracy for dependency labels per token. """ md = Metadata.load_from_file('labeled_dependency') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file) reader.codify_sentences() logger = logging.getLogger("Logger") logger.debug('Loaded network') logger.debug(nn.description()) logger.info('Starting test...') hits = 0 num_tokens = 0 sentence_hits = 0 num_sentences = 0 for sent, heads, labels in zip(reader.sentences, reader.heads, reader.labels): answer = nn.tag_sentence(sent, heads) correct_sentence = True for net_tag, gold_tag in zip(answer, labels): if net_tag == gold_tag: hits += 1 else: correct_sentence = False num_tokens += 1 if correct_sentence: sentence_hits += 1 num_sentences += 1 accuracy = float(hits) / num_tokens sent_accuracy = 100 * float(sentence_hits) / num_sentences print('%d hits out of %d' % (hits, num_tokens)) print('%d sentences completely correct (%f%%)' % (sentence_hits, sent_accuracy)) print('Accuracy: %f' % accuracy)
def evaluate_srl_predicates(gold_file): ''' Evaluates the performance of the network on the SRL task for the predicate detection subtask. ''' md = Metadata.load_from_file('srl_predicates') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file=gold_file) reader.codify_sentences() total_tokens = 0 # true/false positives and negatives tp, fp, tn, fn = 0, 0, 0, 0 # for each sentence, tags are 0 at non-predicates and 1 at predicates for sent, tags in zip(reader.sentences, reader.tags): answer = nn.tag_sentence(sent) for net_tag, gold_tag in zip(answer, tags): if gold_tag == 1: if net_tag == gold_tag: tp += 1 else: fn += 1 else: if net_tag == gold_tag: tn += 1 else: fp += 1 total_tokens += len(sent) precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) print('True positives: %d, false positives: %d, \ true negatives: %d, false negatives: %d' % (tp, fp, tn, fn)) print('Accuracy: %f' % (float(tp + tn) / total_tokens)) print('Precision: %f' % precision) print('Recall: %f' % recall) print('F-1: %f' % (2 * precision * recall / (precision + recall)))
def evaluate_srl_predicates(gold_file): """ Evaluates the performance of the network on the SRL task for the predicate detection subtask. """ md = Metadata.load_from_file('srl_predicates') nn = taggers.load_network(md) reader = taggers.create_reader(md, gold_file=gold_file) reader.codify_sentences() total_tokens = 0 # true/false positives and negatives tp, fp, tn, fn = 0, 0, 0, 0 # for each sentence, tags are 0 at non-predicates and 1 at predicates for sent, tags in zip(reader.sentences, reader.tags): answer = nn.tag_sentence(sent) for net_tag, gold_tag in zip(answer, tags): if gold_tag == 1: if net_tag == gold_tag: tp += 1 else: fn += 1 else: if net_tag == gold_tag: tn += 1 else: fp += 1 total_tokens += len(sent) precision = float(tp) / (tp + fp) recall = float(tp) / (tp + fn) print('True positives: %d, false positives: %d, \ true negatives: %d, false negatives: %d' % (tp, fp, tn, fn)) print('Accuracy: %f' % (float(tp + tn) / total_tokens)) print('Precision: %f' % precision) print('Recall: %f' % recall) print('F-1: %f' % (2 * precision * recall / (precision + recall)))