Пример #1
0
    def add_all_annotations(self, doc):
        logging.info("Adding propbank annotations for " + doc.docid)

        instances = self.propbank_annos[doc.docid]

        for inst in instances:
            parsed_sents = doc.get_parsed_sents()

            tree = parsed_sents[inst.sentnum]

            p_word_idx = utils.make_words_from_pointer(tree, inst.predicate)
            pred_span = utils.get_nltk_span(doc.get_token_spans(),
                                            inst.sentnum, p_word_idx)

            pred_node_repr = "%s:%d:%s" % (doc.docid, inst.sentnum,
                                           inst.predicate)

            self.stats['predicate_count'] += 1

            for argloc, arg_slot in inst.arguments:
                a_word_idx = utils.make_words_from_pointer(tree, argloc)
                arg_span = utils.get_nltk_span(doc.get_token_spans(),
                                               inst.sentnum, a_word_idx)

                if len(arg_span) == 0:
                    continue

                self.stats['argument_count'] += 1

                p = doc.add_predicate(None, pred_span, frame_type='PROPBANK')
                arg_em = doc.add_entity_mention(None, arg_span)
                arg_node_repr = "%s:%d:%s" % (doc.docid, inst.sentnum, argloc)

                if p and arg_em:
                    p.add_meta('node', pred_node_repr)

                    arg_mention = doc.add_argument_mention(
                        p, arg_em.aid, arg_slot.lower())
                    arg_mention.add_meta('node', arg_node_repr)
Пример #2
0
    def add_predicate(self, doc, parsed_sents, predicate_node):
        pred_node_repr = "%s:%d:%s" % (doc.docid, predicate_node.sent_num,
                                       predicate_node.pointer)
        p_tree = parsed_sents[predicate_node.sent_num]
        p_word_idx = utils.make_words_from_pointer(p_tree,
                                                   predicate_node.pointer)
        predicate_span = utils.get_nltk_span(doc.token_spans,
                                             predicate_node.sent_num,
                                             p_word_idx)

        if len(predicate_span) == 0:
            logging.warning("Zero length predicate found")
            return

        p = doc.add_predicate(None, predicate_span, frame_type='NOMBANK')

        if p:
            p.add_meta('node', pred_node_repr)

        return p
Пример #3
0
    def add_nombank_arg(
        self,
        doc,
        parsed_sents,
        wsj_spans,
        arg_type,
        predicate,
        arg_node,
        implicit=False,
    ):
        arg_type = arg_type.lower()

        a_tree = parsed_sents[arg_node.sent_num]
        a_word_idx = utils.make_words_from_pointer(a_tree, arg_node.pointer)

        arg_node_repr = "%s:%d:%s" % (doc.docid, arg_node.sent_num,
                                      arg_node.pointer)
        argument_span = utils.get_nltk_span(wsj_spans, arg_node.sent_num,
                                            a_word_idx)

        if len(argument_span) == 0:
            # Some arguments are empty nodes, they will be ignored.
            return

        em = doc.add_entity_mention(None, argument_span)

        if em:
            if implicit:
                arg_type = "i_" + arg_type

            arg_mention = doc.add_argument_mention(predicate, em.aid, arg_type)
            arg_mention.add_meta("node", arg_node_repr)

            if implicit:
                arg_mention.add_meta("implicit", True)
                arg_mention.add_meta("sent_num", arg_node.sent_num)
                arg_mention.add_meta("text", em.text)

            return arg_mention