Пример #1
0
def evaluate_srl_2_steps(no_repeat=False,
                         find_preds_automatically=False,
                         gold_file=None):
    '''
    Prints the output of a 2-step SRL system in CoNLL style for evaluating.
    '''
    # load boundary identification network and reader
    md_boundary = Metadata.load_from_file('srl_boundary')
    nn_boundary = taggers.load_network(md_boundary)
    reader_boundary = taggers.create_reader(md_boundary, gold_file)
    itd_boundary = reader_boundary.get_inverse_tag_dictionary()

    # same for arg classification
    md_classify = Metadata.load_from_file('srl_classify')
    nn_classify = taggers.load_network(md_classify)
    reader_classify = taggers.create_reader(md_classify, gold_file)
    itd_classify = reader_classify.get_inverse_tag_dictionary()

    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(reader_boundary.predicates)

    actual_sentences = [
        actual_sentence for actual_sentence, _ in reader_boundary.sentences
    ]

    for sent in actual_sentences:

        if find_preds_automatically:
            pred_pos = tagger.find_predicates(sent)
        else:
            pred_pos = iter_predicates.next()

        verbs = [(position, sent[position].word) for position in pred_pos]
        sent_bound_codified = np.array(
            [reader_boundary.converter.convert(t) for t in sent])
        sent_class_codified = np.array(
            [reader_classify.converter.convert(t) for t in sent])

        answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos)
        boundaries = [[itd_boundary[x] for x in pred_answer]
                      for pred_answer in answers]

        arg_limits = [
            utils.boundaries_to_arg_limits(pred_boundaries)
            for pred_boundaries in boundaries
        ]

        answers = nn_classify.tag_sentence(sent_class_codified,
                                           pred_pos,
                                           arg_limits,
                                           allow_repeats=not no_repeat)

        arguments = [[itd_classify[x] for x in pred_answer]
                     for pred_answer in answers]
        tags = join_2_steps(boundaries, arguments)

        print(prop_conll(verbs, tags, len(sent)))
Пример #2
0
def evaluate_srl_2_steps(no_repeat=False, find_preds_automatically=False,
                         gold_file=None):
    """
    Prints the output of a 2-step SRL system in CoNLL style for evaluating.
    """
    # load boundary identification network and reader 
    md_boundary = Metadata.load_from_file('srl_boundary')
    nn_boundary = taggers.load_network(md_boundary)
    reader_boundary = taggers.create_reader(md_boundary, gold_file)
    itd_boundary = reader_boundary.get_inverse_tag_dictionary()
    
    # same for arg classification
    md_classify = Metadata.load_from_file('srl_classify')
    nn_classify = taggers.load_network(md_classify)
    reader_classify = taggers.create_reader(md_classify, gold_file)
    itd_classify = reader_classify.get_inverse_tag_dictionary()
    
    if find_preds_automatically:
        tagger = taggers.SRLTagger()
    else:
        iter_predicates = iter(reader_boundary.predicates)
    
    actual_sentences = [actual_sentence
                        for actual_sentence, _ in reader_boundary.sentences]
    
    for sent in actual_sentences:
        
        if find_preds_automatically:
            pred_pos = tagger.find_predicates(sent)
        else:
            pred_pos = next(iter_predicates)
        
        verbs = [(position, sent[position].word) for position in pred_pos]
        sent_bound_codified = np.array([reader_boundary.converter.convert(t)
                                        for t in sent])
        sent_class_codified = np.array([reader_classify.converter.convert(t)
                                        for t in sent])
        
        answers = nn_boundary.tag_sentence(sent_bound_codified, pred_pos)
        boundaries = [[itd_boundary[x] for x in pred_answer]
                      for pred_answer in answers]
        
        arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries) 
                      for pred_boundaries in boundaries]
        
        answers = nn_classify.tag_sentence(sent_class_codified, 
                                           pred_pos, arg_limits,
                                           allow_repeats=not no_repeat)
        
        arguments = [[itd_classify[x] for x in pred_answer]
                     for pred_answer in answers]
        tags = join_2_steps(boundaries, arguments)        
        
        print(prop_conll(verbs, tags, len(sent)))
Пример #3
0
    def tag_tokens(self, tokens, no_repeats=False):
        """
        Runs the SRL process on the given tokens.

        :param tokens: a list of tokens (as strings)
        :param no_repeats: whether to prevent repeated argument labels
        :returns: a list of lists (one list for each sentence).
            Sentences have tuples (all_tokens, predicate, arg_structure), where
            arg_structure is a dictionary mapping argument labels to the words
            it includes.
        """
        if self.language == 'pt':
            tokens_obj = [attributes.Token(utils.clean_text(t, False))
                          for t in tokens]
        else:
            tokens_obj = [attributes.Token(t) for t in tokens]
        
        converted_bound = np.array([self.boundary_reader.converter.convert(t) 
                                    for t in tokens_obj])
        converted_class = np.array([self.classify_reader.converter.convert(t)
                                    for t in tokens_obj])
        
        pred_positions = self.find_predicates(tokens_obj)
        
        # first, argument boundary detection
        # the answer includes all predicates
        answers = self.boundary_nn.tag_sentence(converted_bound, pred_positions)
        boundaries = [[self.boundary_itd[x] for x in pred_answer]
                      for pred_answer in answers]
        arg_limits = [utils.boundaries_to_arg_limits(pred_boundaries)
                      for pred_boundaries in boundaries]
        
        # now, argument classification
        answers = self.classify_nn.tag_sentence(converted_class,
                                                pred_positions, arg_limits,
                                                allow_repeats=not no_repeats)
        arguments = [[self.classify_itd[x] for x in pred_answer]
                     for pred_answer in answers]
        
        structures = _group_arguments(tokens, pred_positions, boundaries,
                                      arguments)
        return SRLAnnotatedSentence(tokens, structures)