示例#1
0
 def __init__(self, chunked_sents, **kwargs):
     chunked_sents = [tree2conlltags(sent) for sent in chunked_sents
                      ]  #converts the sentences to IOB form
     chunked_sents = [[((word, pos), chunk) for (word, pos, chunk) in sent]
                      for sent in chunked_sents
                      ]  #convert from triplets to pairs
     #self.feature_detector = features
     self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                         feature_detector=features,
                                         **kwargs)  #init the tagger
    def __init__(self, chunked_sents, **kwargs):
        assert isinstance(chunked_sents, Iterable)
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        def triplets2tagged_pairs(iob_sent):
            return [((word, pos), chunk) for word, pos, chunk in iob_sent]

        chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features

        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)
示例#3
0
    def __init__(self, chunked_sents, **kwargs):
        # Transform the trees in IOB annotated sentences [(word, pos, chunk)]
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        # Make tags compatible with the tagger interface [((word, pos), chunk)]
        def get_tagged_pairs(chunked_sent):
            return [((word, pos), chunk) for word, pos, chunk in chunked_sent]

        chunked_sents = [get_tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)
示例#4
0
    def __init__(self, chunked_sents, **kwargs):
        assert isinstance(chunked_sents, Iterable)

        # Transform the trees in IOB annotated sentences [(word, pos, chunk), ...]
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        # Transform the triplets in pairs, make it compatible with the tagger interface [((word, pos), chunk), ...]
        def triplets2tagged_pairs(iob_sent):
            return [((word, pos), chunk) for word, pos, chunk in iob_sent]

        chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)
示例#5
0
class ClassifierChunkParser(ChunkParserI):
    def __init__(self, chunked_sents, **kwargs):
        assert isinstance(chunked_sents, Iterable)

        # Transform the trees in IOB annotated sentences [(word, pos, chunk), ...]
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        # Transform the triplets in pairs, make it compatible with the tagger interface [((word, pos), chunk), ...]
        def triplets2tagged_pairs(iob_sent):
            return [((word, pos), chunk) for word, pos, chunk in iob_sent]

        chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)

    def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)

        # Transform the result from [((w1, t1), iob1), ...]
        # to the preferred list of triplets format [(w1, t1, iob1), ...]
        iob_triplets = [(w, t, c) for ((w, t), c) in chunks]

        # Transform the list of triplets to nltk.Tree format
        return conlltags2tree(iob_triplets)
class ClassifierChunkParser(ChunkParserI):
    def __init__(self, chunked_sents, **kwargs):
        assert isinstance(chunked_sents, Iterable)
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        def triplets2tagged_pairs(iob_sent):
            return [((word, pos), chunk) for word, pos, chunk in iob_sent]

        chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features

        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)

    def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)
        iob_triplets = [(w, t, c) for ((w, t), c) in chunks]
        return conlltags2tree(iob_triplets)
示例#7
0
class ClassifierChunkParser(ChunkParserI):

    #Constructor
    def __init__(self, chunked_sents, **kwargs):
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents
                         ]  #converts the sentences to IOB form
        chunked_sents = [[((word, pos), chunk) for (word, pos, chunk) in sent]
                         for sent in chunked_sents
                         ]  #convert from triplets to pairs
        #self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)  #init the tagger

#Parses the tagged sentences and returns the chunks in the IOB format

    def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)  #tag the sentences
        iob_triplets = [(w, t, c) for ((w, t), c) in chunks
                        ]  #convert from pairs to triplets
        return iob_triplets  #convert to tree format
示例#8
0
    def train_merger(self, train_file_path, test_split=0.1):
        print("Loading Data...")
        file = open(train_file_path, "r", encoding='utf-8')
        file_content = file.read()
        file_content = file_content.split("\n\n")

        data_list = []
        for line in file_content:
            line = nltk.chunk.util.conllstr2tree(line,
                                                 chunk_types=('NP', ),
                                                 root_label='S')
            if (len(line) > 0):
                data_list.append(line)

        # train_sents, test_sents = train_test_split(data_list, test_size=test_split, random_state=91)
        train_sents = data_list
        test_sents = []

        print("Training the model ...")

        # Transform the trees in IOB annotated sentences [(word, pos, chunk), ...]
        chunked_sents = [tree2conlltags(sent) for sent in train_sents]

        # Transform the triplets in pairs, make it compatible with the tagger interface [((word, pos), chunk), ...]
        def triplets2tagged_pairs(iob_sent):
            return [((word, pos), chunk) for word, pos, chunk in iob_sent]

        chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = self.features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=self.features)

        token_merger_model = self.tagger

        if len(test_sents) > 0:
            print("evaluating...")
            print(token_merger_model.evaluate(test_sents))

        return token_merger_model
示例#9
0
class FooChunkParser(ChunkParserI):
    def __init__(self, chunked_sents, **kwargs):
        # Transform the trees in IOB annotated sentences [(word, pos, chunk)]
        chunked_sents = [tree2conlltags(sent) for sent in chunked_sents]

        # Make tags compatible with the tagger interface [((word, pos), chunk)]
        def get_tagged_pairs(chunked_sent):
            return [((word, pos), chunk) for word, pos, chunk in chunked_sent]

        chunked_sents = [get_tagged_pairs(sent) for sent in chunked_sents]

        self.feature_detector = features
        self.tagger = ClassifierBasedTagger(train=chunked_sents,
                                            feature_detector=features,
                                            **kwargs)

    def parse(self, tagged_sent):
        chunks = self.tagger.tag(tagged_sent)
        iob_triplets = [(word, token, chunk)
                        for ((word, token), chunk) in chunks]
        # Transform the list of triplets to nltk.Tree format
        return conlltags2tree(iob_triplets)