Exemplo n.º 1
0
    def read_sequence_list_conll(self,
                                 train_file,
                                 train_file_parsed=None,
                                 train_files_parsed_path=None,
                                 max_sent_len=100000,
                                 max_nr_sent=100000):
        """
        Read a conll2002 or conll2003 file into a sequence list.
        Optionally add a sequence list/tree with *unk* for decoding in wordrep.
        """
        instance_list = self.read_conll_instances(train_file,
                                                  train_file_parsed,
                                                  train_files_parsed_path,
                                                  max_sent_len, max_nr_sent)

        if self.wordrep_dict is not None:

            seq_list = SequenceListLabel(self.word_dict, self.tag_dict,
                                         self.wordrep_dict)  # for indices
            for sent_x, sent_y, sent_ in instance_list:
                # sent_ is a normalized tree
                if self.use_wordrep_tree:
                    seq_list.add_sequence(sent_x, sent_y, None, sent_)
                # sent is a normalized chain
                else:
                    seq_list.add_sequence(sent_x, sent_y, sent_)
        else:
            seq_list = SequenceListLabel(self.word_dict,
                                         self.tag_dict)  # for indices
            for sent_x, sent_y in instance_list:
                seq_list.add_sequence(sent_x, sent_y)

        return seq_list
Exemplo n.º 2
0
def read_sequence_list_conll(infile, word_dict, tag_dict, max_sent_len=100000, max_nr_sent=100000):
    instance_list, word_dict, tag_dict = read_output_instances(infile, word_dict, tag_dict, max_sent_len, max_nr_sent)

    seq_list = SequenceListLabel(word_dict, tag_dict, tag_dict)
    for sent_x, sent_gold, sent_predict in instance_list:
        seq_list.add_sequence(sent_x, sent_gold, sent_predict)
    return seq_list, word_dict, tag_dict
Exemplo n.º 3
0
def read_sequence_list_conll(infile,
                             word_dict,
                             tag_dict,
                             max_sent_len=100000,
                             max_nr_sent=100000):
    instance_list, word_dict, tag_dict = read_output_instances(
        infile, word_dict, tag_dict, max_sent_len, max_nr_sent)

    seq_list = SequenceListLabel(word_dict, tag_dict, tag_dict)
    for sent_x, sent_gold, sent_predict in instance_list:
        seq_list.add_sequence(sent_x, sent_gold, sent_predict)
    return seq_list, word_dict, tag_dict
Exemplo n.º 4
0
    def read_sequence_list_conll(self, train_file, train_file_parsed=None, train_files_parsed_path=None,
                                 max_sent_len=100000, max_nr_sent=100000):
        """
        Read a conll2002 or conll2003 file into a sequence list.
        Optionally add a sequence list/tree with *unk* for decoding in wordrep.
        """
        instance_list = self.read_conll_instances(train_file, train_file_parsed, train_files_parsed_path, max_sent_len,
                                                  max_nr_sent)

        if self.wordrep_dict is not None:

            seq_list = SequenceListLabel(self.word_dict, self.tag_dict, self.wordrep_dict)  # for indices
            for sent_x, sent_y, sent_ in instance_list:
                # sent_ is a normalized tree
                if self.use_wordrep_tree:
                    seq_list.add_sequence(sent_x, sent_y, None, sent_)
                # sent is a normalized chain
                else:
                    seq_list.add_sequence(sent_x, sent_y, sent_)
        else:
            seq_list = SequenceListLabel(self.word_dict, self.tag_dict)  # for indices
            for sent_x, sent_y in instance_list:
                seq_list.add_sequence(sent_x, sent_y)

        return seq_list