示例#1
0
def evaluate_srl_2_steps(no_repeat=False,
                         find_preds_automatically=False,
                         gold_file=None):
    '''
    Prints the output of a 2-step SRL system in CoNLL style for evaluating.
    '''
    # load boundary identification network and reader
    md_boundary = Metadata.load_from_file('srl_boundary')
    nn_boundary = taggers.load_network(md_boundary)
    reader_boundary = taggers.create_reader(md_boundary, gold_file)
    itd_boundary = reader_boundary.get_inverse_tag_dictionary()

    # same for arg classification
    md_classify = Metadata.load_from_file('srl_classify')
    nn_classify = taggers.load_network(md_classify)
    reader_classify = taggers.create_reader(md_classify, gold_file)
    itd_classify = reader_classify.get_inverse_tag_dictionary()

    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(reader_boundary.predicates)

    actual_sentences = [
        actual_sentence for actual_sentence, _ in reader_boundary.sentences
    ]

    for sent in actual_sentences:

        if find_preds_automatically:
            pred_pos = tagger.find_predicates(sent)
        else:
            pred_pos = iter_predicates.next()

        verbs = [(position, sent[position].word) for position in pred_pos]
        sent_bound_codified = np.array(
            [reader_boundary.converter.convert(t) for t in sent])
        sent_class_codified = np.array(
            [reader_classify.converter.convert(t) for t in sent])

        answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos)
        boundaries = [[itd_boundary[x] for x in pred_answer]
                      for pred_answer in answers]

        arg_limits = [
            utils.boundaries_to_arg_limits(pred_boundaries)
            for pred_boundaries in boundaries
        ]

        answers = nn_classify.tag_sentence(sent_class_codified,
                                           pred_pos,
                                           arg_limits,
                                           allow_repeats=not no_repeat)

        arguments = [[itd_classify[x] for x in pred_answer]
                     for pred_answer in answers]
        tags = join_2_steps(boundaries, arguments)

        print(prop_conll(verbs, tags, len(sent)))
示例#2
0
def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False,
                         gold_file=None):
    """
    Prints the output of a 2-step SRL system in CoNLL style for evaluating.
    """
    # load boundary identification network and reader 
    md_boundary = Metadata.load_from_file('srl_boundary')
    nn_boundary = taggers.load_network(md_boundary)
    reader_boundary = taggers.create_reader(md_boundary, gold_file)
    itd_boundary = reader_boundary.get_inverse_tag_dictionary()
    
    # same for arg classification
    md_classify = Metadata.load_from_file('srl_classify')
    nn_classify = taggers.load_network(md_classify)
    reader_classify = taggers.create_reader(md_classify, gold_file)
    itd_classify = reader_classify.get_inverse_tag_dictionary()
    
    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(reader_boundary.predicates)
    
    actual_sentences = [actual_sentence
                        for actual_sentence, _ in reader_boundary.sentences]
    
    for sent in actual_sentences:
        
        if find_preds_automatically:
            pred_pos = tagger.find_predicates(sent)
        else:
            pred_pos = next(iter_predicates)
        
        verbs = [(position, sent[position].word) for position in pred_pos]
        sent_bound_codified = np.array([reader_boundary.converter.convert(t)
                                        for t in sent])
        sent_class_codified = np.array([reader_classify.converter.convert(t)
                                        for t in sent])
        
        answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos)
        boundaries = [[itd_boundary[x] for x in pred_answer]
                      for pred_answer in answers]
        
        arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries) 
                      for pred_boundaries in boundaries]
        
        answers = nn_classify.tag_sentence(sent_class_codified, 
                                           pred_pos, arg_limits,
                                           allow_repeats=not no_repeat)
        
        arguments = [[itd_classify[x] for x in pred_answer]
                     for pred_answer in answers]
        tags = join_2_steps(boundaries, arguments)        
        
        print(prop_conll(verbs, tags, len(sent)))
示例#3
0
 def _load_data(self):
     """Loads data for Dependency Parsing"""
     md_udep = Metadata.load_from_file('unlabeled_dependency',
                                       paths=self.paths)
     self.unlabeled_nn = load_network(md_udep)
     self.unlabeled_reader = create_reader(md_udep)
     
     md_ldep = Metadata.load_from_file('labeled_dependency',
                                       paths=self.paths)
     self.labeled_nn = load_network(md_ldep)
     self.labeled_reader = create_reader(md_ldep)
     self.itd = self.labeled_reader.get_inverse_tag_dictionary()
     
     self.use_pos = md_udep.use_pos or md_ldep.use_pos
     if self.use_pos:
         self.pos_tagger = POSTagger(self.data_dir, language=self.language)
示例#4
0
def evaluate_srl_classify(no_repeat=False, gold_file=None):
    """Evaluates the performance of the network on the SRL classifying task."""
    # load data
    md = Metadata.load_from_file('srl_classify')
    nn = taggers.load_network(md)
    r = taggers.create_reader(md, gold_file)
    r.create_converter()
    
    r.codify_sentences()
    hits = 0
    total_args = 0
    
    for sentence, tags, predicates, args in zip(r.sentences, r.tags,
                                                r.predicates, r.arg_limits):
        
        # the answer includes all predicates
        answer = nn.tag_sentence(sentence, predicates, args,
                                 allow_repeats=not no_repeat)
        
        for pred_answer, pred_gold in zip(answer, tags):
        
            for net_tag, gold_tag in zip(pred_answer, pred_gold):
                if net_tag == gold_tag:
                    hits += 1
            
            total_args += len(pred_gold)
    
    print('Accuracy: %f' % (float(hits) / total_args))
示例#5
0
def evaluate_srl_classify(no_repeat=False, gold_file=None):
    """Evaluates the performance of the network on the SRL classifying task."""
    # load data
    md = Metadata.load_from_file('srl_classify')
    nn = taggers.load_network(md)
    r = taggers.create_reader(md, gold_file)
    r.create_converter()

    r.codify_sentences()
    hits = 0
    total_args = 0

    for sentence, tags, predicates, args in izip(r.sentences, r.tags,
                                                 r.predicates, r.arg_limits):

        # the answer includes all predicates
        answer = nn.tag_sentence(sentence,
                                 predicates,
                                 args,
                                 allow_repeats=not no_repeat)

        for pred_answer, pred_gold in izip(answer, tags):

            for net_tag, gold_tag in izip(pred_answer, pred_gold):
                if net_tag == gold_tag:
                    hits += 1

            total_args += len(pred_gold)

    print 'Accuracy: %f' % (float(hits) / total_args)
示例#6
0
 def _load_data(self):
     """Loads data for POS"""
     md = Metadata.load_from_file('pos', self.paths)
     self.nn = load_network(md)
     self.reader = create_reader(md)
     self.reader.create_converter()
     self.itd = self.reader.get_inverse_tag_dictionary()
示例#7
0
def evaluate_srl_1step(find_preds_automatically=False, gold_file=None):
    """
    Evaluates the network on the SRL task performed with one step for
    id + class.
    """
    md = Metadata.load_from_file('srl')
    nn = taggers.load_network(md)
    r = taggers.create_reader(md, gold_file=gold_file)
    
    itd = r.get_inverse_tag_dictionary()
    
    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(r.predicates)
    
    for sent in iter(r.sentences):
        
        # the other elements in the list are the tags for each proposition
        actual_sent = sent[0]
        
        if find_preds_automatically:
            pred_positions = tagger.find_predicates(sent)
        else:
            pred_positions = iter_predicates.next()
            
        verbs = [(position, actual_sent[position].word) for position in pred_positions]
        sent_codified = np.array([r.converter.convert(token) for token in actual_sent])
        
        answers = nn.tag_sentence(sent_codified, pred_positions)
        tags = [convert_iob_to_iobes([itd[x] for x in pred_answer])
                for pred_answer in answers]
            
        print prop_conll(verbs, tags, len(actual_sent))
示例#8
0
def evaluate_srl_identify(gold_file):
    """
    Evaluates the performance of the network on the SRL task for the 
    argument boundaries identification subtask
    """
    md = Metadata.load_from_file('srl_boundary')
    nn = taggers.load_network(md)
    srl_reader = taggers.create_reader(md, gold_file=gold_file)
    
    net_itd = srl_reader.get_inverse_tag_dictionary()
    srl_reader.load_tag_dict(config.FILES['srl_tags'], iob=True)
    
    srl_reader.convert_tags('iob', update_tag_dict=False)
    gold_itd = srl_reader.get_inverse_tag_dictionary()
 
    # used for calculating precision
    counter_predicted_args = Counter()
    # used for calculating recall
    counter_existing_args = Counter()
    # used for calculating both
    counter_correct_args = Counter()

    srl_reader.codify_sentences()
    
    for sent, preds, sent_tags in izip(srl_reader.sentences, srl_reader.predicates, srl_reader.tags):
        
        # one answer for each predicate
        answers = nn.tag_sentence(sent, preds)
        
        for answer, tags in zip(answers, sent_tags):
            correct_args, existing_args = sentence_recall(answer, tags, gold_itd, net_itd)
            counter_correct_args.update(correct_args)
            counter_existing_args.update(existing_args)
            
            _, predicted_args = sentence_precision(answer, tags, gold_itd, net_itd)
            counter_predicted_args.update(predicted_args)
            
    correct_args = sum(counter_correct_args.values())
    total_args = sum(counter_existing_args.values())
    total_found_args = sum(counter_predicted_args.values())
    rec = correct_args / float(total_args)
    prec = correct_args / float(total_found_args)
    try:
        f1 = 2 * rec * prec / (rec + prec)
    except ZeroDivisionError:
        f1 = 0

    print 'Recall: %f, Precision: %f, F-1: %f' % (rec, prec, f1)
    print
    print 'Argument\tRecall'
    
    for arg in counter_existing_args:
        rec = counter_correct_args[arg] / float(counter_existing_args[arg])
        
        # a couple of notes about precision per argument:
        # - we can't compute it if we are only interested in boundaries. hence, we can't compute f-1
        # - if the network never tagged a given argument, its precision is 100% (it never made a mistake)
                
        print '%s\t\t%f' % (arg, rec)
示例#9
0
 def _load_data(self):
     """Loads data for SRL"""
     # load boundary identification network and reader
     md_boundary = Metadata.load_from_file('srl_boundary', self.paths)
     self.boundary_nn = load_network(md_boundary)
     self.boundary_reader = create_reader(md_boundary)
     self.boundary_reader.create_converter()
     self.boundary_itd = self.boundary_reader.get_inverse_tag_dictionary()
     
     # same for arg classification
     md_classify = Metadata.load_from_file('srl_classify', self.paths)
     self.classify_nn = load_network(md_classify)
     self.classify_reader = create_reader(md_classify)
     self.classify_reader.create_converter()
     self.classify_itd = self.classify_reader.get_inverse_tag_dictionary()
     
     # predicate detection
     md_pred = Metadata.load_from_file('srl_predicates', self.paths)
     self.pred_nn = load_network(md_pred)
     self.pred_reader = create_reader(md_pred)
     self.pred_reader.create_converter()
示例#10
0
def evaluate_unlabeled_dependency(gold_file, punctuation):
    """
    Evaluate unlabeled accuracy per token.
    """
    md = Metadata.load_from_file('unlabeled_dependency')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file)
    #reader.codify_sentences()

    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    num_tokens = 0
    sentence_hits = 0
    num_sentences = 0

    for sent, heads in zip(reader.sentences, reader.heads):

        sent_codified = reader.codify_sentence(sent)
        answer = nn.tag_sentence(sent_codified)
        correct_sentence = True

        for i, (net_tag, gold_tag) in enumerate(zip(answer, heads)):

            token = sent[i]
            # detect punctuation
            if punctuation and is_punctuation(token):
                continue

            if net_tag == gold_tag or (gold_tag == i and net_tag == len(sent)):
                hits += 1
            else:
                correct_sentence = False

            num_tokens += 1

        if correct_sentence:
            sentence_hits += 1
        num_sentences += 1

    accuracy = float(hits) / num_tokens
    sent_accuracy = 100 * float(sentence_hits) / num_sentences
    print '%d hits out of %d' % (hits, num_tokens)
    print '%d sentences completely correct (%f%%)' % (sentence_hits,
                                                      sent_accuracy)
    print 'Accuracy: %f%%' % (100 * accuracy)
示例#11
0
 def _load_data(self):
     """Loads data for POS from SENNA dump"""
     md = Metadata.load_from_file('pos')
     self.nn, word_dict, suff = load_network()
     self.reader = POSReader()
     self.reader.word_dict = word_dict
     self.reader.create_converter(md)
     self.itd = self.reader.get_inverse_tag_dictionary()
     self.nn.padding_left = self.reader.converter.get_padding_left()
     self.nn.padding_right = self.reader.converter.get_padding_right()
     self.nn.pre_padding = np.array([self.nn.padding_left] * 2)
     self.nn.pos_padding = np.array([self.nn.padding_right] * 2)
     Suffix.codes = {}
     for i, s in enumerate(suff):
         Suffix.codes[s] = i
     Suffix.other = Suffix.codes['NOSUFFIX']
示例#12
0
def evaluate_unlabeled_dependency(gold_file, punctuation):
    """
    Evaluate unlabeled accuracy per token.
    """
    md = Metadata.load_from_file('unlabeled_dependency')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file)

    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    num_tokens = 0
    sentence_hits = 0
    num_sentences = 0
    
    for sent, heads in zip(reader.sentences, reader.heads):
        
        sent_codified = reader.codify_sentence(sent)
        answer = nn.tag_sentence(sent_codified)
        correct_sentence = True
            
        for i, (net_tag, gold_tag) in enumerate(zip(answer, heads)):
            
            token = sent[i]
            # detect punctuation
            if punctuation and is_punctuation(token):
                continue
            
            if net_tag == gold_tag or (gold_tag == i and net_tag == len(sent)):
                hits += 1
            else:
                correct_sentence = False
            
            num_tokens += 1
            
        if correct_sentence:
            sentence_hits += 1
        num_sentences += 1
        
    accuracy = float(hits) / num_tokens
    sent_accuracy = 100 * float(sentence_hits) / num_sentences
    print('%d hits out of %d' % (hits, num_tokens))
    print('%d sentences completely correct (%f%%)' % (sentence_hits,
                                                      sent_accuracy))
    print('Accuracy: %f%%' % (100 * accuracy))
示例#13
0
def evaluate_pos(gold_file=None, oov=None):
    """
    Tests the network for tagging a given sequence.
    
    :param gold_file: file with gold data to evaluate against
    :param oov: either None or a list of tokens, that should contain the oov words.
    """
    md = Metadata.load_from_file('pos')
    nn = taggers.load_network(md)
    pos_reader = taggers.create_reader(md, gold_file=gold_file)
    itd = pos_reader.get_inverse_tag_dictionary()

    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    total = 0
    #pos_reader.codify_sentences()

    for sent in pos_reader.sentences:

        tokens, tags = zip(*sent)
        sent_codified = np.array(
            [pos_reader.converter.convert(t) for t in tokens])
        answer = nn.tag_sentence(sent_codified)
        if oov is not None:
            iter_sent = iter(tokens)

        for net_tag, gold_tag in zip(answer, tags):

            if oov is not None:
                # only check oov words
                word = iter_sent.next()
                if word.lower() not in oov:
                    continue

            if itd[net_tag] == gold_tag:
                hits += 1

            total += 1

    print '%d hits out of %d' % (hits, total)
    accuracy = float(hits) / total
    logger.info('Done.')
    return accuracy
示例#14
0
def evaluate_pos(gold_file=None, oov=None):
    """
    Tests the network for tagging a given sequence.
    
    :param gold_file: file with gold data to evaluate against
    :param oov: either None or a list of tokens, that should contain the oov words.
    """
    md = Metadata.load_from_file('pos')
    nn = taggers.load_network(md)
    pos_reader = taggers.create_reader(md, gold_file=gold_file)
    itd = pos_reader.get_inverse_tag_dictionary()
    
    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    total = 0
    #pos_reader.codify_sentences()
    
    for sent in pos_reader.sentences:
        
        tokens, tags = zip(*sent)
        sent_codified = np.array([pos_reader.converter.convert(t) for t in tokens])
        answer = nn.tag_sentence(sent_codified)
        if oov is not None:
            iter_sent = iter(tokens)
        
        for net_tag, gold_tag in zip(answer, tags):
            
            if oov is not None:
                # only check oov words
                word = iter_sent.next()
                if word.lower() not in oov:
                    continue
            
            if itd[net_tag] == gold_tag:
                hits += 1
            
            total += 1                
        
    print '%d hits out of %d' % (hits, total)
    accuracy = float(hits) / total
    logger.info('Done.')
    return accuracy
示例#15
0
def evaluate_labeled_dependency(gold_file, punctuation):
    """
    Evaluate the accuracy for dependency labels per token.
    """
    md = Metadata.load_from_file('labeled_dependency')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file)
    reader.codify_sentences()

    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    num_tokens = 0
    sentence_hits = 0
    num_sentences = 0

    for sent, heads, labels in zip(reader.sentences, reader.heads,
                                   reader.labels):

        answer = nn.tag_sentence(sent, heads)
        correct_sentence = True

        for net_tag, gold_tag in zip(answer, labels):

            if net_tag == gold_tag:
                hits += 1
            else:
                correct_sentence = False

            num_tokens += 1

        if correct_sentence:
            sentence_hits += 1
        num_sentences += 1

    accuracy = float(hits) / num_tokens
    sent_accuracy = 100 * float(sentence_hits) / num_sentences
    print '%d hits out of %d' % (hits, num_tokens)
    print '%d sentences completely correct (%f%%)' % (sentence_hits,
                                                      sent_accuracy)
    print 'Accuracy: %f' % accuracy
示例#16
0
def evaluate_labeled_dependency(gold_file):
    """
    Evaluate the accuracy for dependency labels per token.
    """
    md = Metadata.load_from_file('labeled_dependency')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file)
    reader.codify_sentences()
    
    logger = logging.getLogger("Logger")
    logger.debug('Loaded network')
    logger.debug(nn.description())
    logger.info('Starting test...')
    hits = 0
    num_tokens = 0
    sentence_hits = 0
    num_sentences = 0
    
    for sent, heads, labels in zip(reader.sentences, reader.heads,
                                   reader.labels):
        
        answer = nn.tag_sentence(sent, heads)
        correct_sentence = True
        
        for net_tag, gold_tag in zip(answer, labels):
            
            if net_tag == gold_tag:
                hits += 1
            else:
                correct_sentence = False
            
            num_tokens += 1
            
        if correct_sentence:
            sentence_hits += 1
        num_sentences += 1
        
    accuracy = float(hits) / num_tokens
    sent_accuracy = 100 * float(sentence_hits) / num_sentences
    print('%d hits out of %d' % (hits, num_tokens))
    print('%d sentences completely correct (%f%%)' % (sentence_hits,
                                                      sent_accuracy))
    print('Accuracy: %f' % accuracy)
示例#17
0
def evaluate_srl_predicates(gold_file):
    '''
    Evaluates the performance of the network on the SRL task for the
    predicate detection subtask.
    '''
    md = Metadata.load_from_file('srl_predicates')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file=gold_file)
    reader.codify_sentences()

    total_tokens = 0
    # true/false positives and negatives
    tp, fp, tn, fn = 0, 0, 0, 0

    # for each sentence, tags are 0 at non-predicates and 1 at predicates
    for sent, tags in zip(reader.sentences, reader.tags):
        answer = nn.tag_sentence(sent)

        for net_tag, gold_tag in zip(answer, tags):
            if gold_tag == 1:
                if net_tag == gold_tag:
                    tp += 1
                else:
                    fn += 1
            else:
                if net_tag == gold_tag:
                    tn += 1
                else:
                    fp += 1

        total_tokens += len(sent)

    precision = float(tp) / (tp + fp)
    recall = float(tp) / (tp + fn)

    print('True positives: %d, false positives: %d, \
true negatives: %d, false negatives: %d' % (tp, fp, tn, fn))
    print('Accuracy: %f' % (float(tp + tn) / total_tokens))
    print('Precision: %f' % precision)
    print('Recall: %f' % recall)
    print('F-1: %f' % (2 * precision * recall / (precision + recall)))
示例#18
0
def evaluate_srl_predicates(gold_file):
    """
    Evaluates the performance of the network on the SRL task for the
    predicate detection subtask.
    """
    md = Metadata.load_from_file('srl_predicates')
    nn = taggers.load_network(md)
    reader = taggers.create_reader(md, gold_file=gold_file)
    reader.codify_sentences()
    
    total_tokens = 0
    # true/false positives and negatives
    tp, fp, tn, fn = 0, 0, 0, 0
    
    # for each sentence, tags are 0 at non-predicates and 1 at predicates
    for sent, tags in zip(reader.sentences, reader.tags):
        answer = nn.tag_sentence(sent)
        
        for net_tag, gold_tag in zip(answer, tags):
            if gold_tag == 1:
                if net_tag == gold_tag: tp += 1
                else: fn += 1
            else:
                if net_tag == gold_tag: tn += 1
                else: fp += 1
        
        total_tokens += len(sent)
    
    precision = float(tp) / (tp + fp)
    recall = float(tp) / (tp + fn)
    
    print('True positives: %d, false positives: %d, \
true negatives: %d, false negatives: %d' % (tp, fp, tn, fn))
    print('Accuracy: %f' % (float(tp + tn) / total_tokens))
    print('Precision: %f' % precision)
    print('Recall: %f' % recall)
    print('F-1: %f' % (2 * precision * recall / (precision + recall)))