Exemplo n.º 1
0
 def test_update(self):
     tagger = TaggingPerceptron(vocabulary(self.dataset),
                                         tags(self.dataset))
     for ss, pred ,tt, expected_w in zip(self.sents, self.predicted_tags,
                  self.tags, self.expected_ws):
         
         w = tagger.update(ss, pred, tt)
         self.assertSequenceEqual(list(w), list(expected_w))
Exemplo n.º 2
0
    def test_update(self):
        tagger = TaggingPerceptron(vocabulary(self.dataset),
                                            tags(self.dataset))
        for ss, pred ,tt, expected_w in zip(self.sents, self.predicted_tags,
                     self.tags, self.expected_ws):

            w = tagger.update(ss, pred, tt)
            self.assertSequenceEqual(list(w), list(expected_w))
Exemplo n.º 3
0
    def test_feature_vector(self):

        tagger = TaggingPerceptron(vocabulary(self.dataset),
                                            tags(self.dataset))
        for sample_ix, (sent, tag) in enumerate(zip(self.sents, \
                    self.tags)):
            ftr_vectr = tagger.feature_vector(sent,tag)
            self.assertSequenceEqual(list(ftr_vectr),
                    list(self.expected_train_ftr_vectos[sample_ix]))
Exemplo n.º 4
0
    def test_feature_vector(self):

        tagger = TaggingPerceptron(vocabulary(self.dataset),
                                            tags(self.dataset)) 
        for sample_ix, (sent, tag) in enumerate(zip(self.sents, \
                    self.tags)):
            ftr_vectr = tagger.feature_vector(sent,tag)
            self.assertSequenceEqual(list(ftr_vectr),
                    list(self.expected_train_ftr_vectos[sample_ix]))
Exemplo n.º 5
0
    def test_decode(self):
        tagger = TaggingPerceptron(vocabulary(self.dataset),
                                   tags(self.dataset))

        # one iteration of updates
        tagger.train(2, self.sents, self.tags, self.sents, self.tags)

        for sent, expected_output in zip(self.sents,
                                         self.expected_tags_two_itr):
            tag_seq = tagger.decode(sent)
            self.assertSequenceEqual(tag_seq, expected_output)
Exemplo n.º 6
0
    def test_decode(self):
        tagger = TaggingPerceptron(vocabulary(self.dataset),
                                    tags(self.dataset))

        # one iteration of updates
        tagger.train(2, self.sents, self.tags,
                                self.sents, self.tags)

        for sent, expected_output in zip(self.sents,
                                    self.expected_tags_two_itr):
            tag_seq = tagger.decode(sent)
            self.assertSequenceEqual(tag_seq, expected_output)
Exemplo n.º 7
0
                if self._n % report_every == 1:
                    test_accuracy, confusion, word_errors = \
                        self.accuracy(test_sents, test_tags)
                    print("\t".join(ss))
                    print("\t".join(tt))
                    print("\t".join(pred))
                    print("After %i sents, accuracy is %f, nonzero feats %i" %
                          (self._n, test_accuracy,
                           sum(1 for x in self._w if x != 0.0)))

        self.finalize()
        test_accuracy, confusion, word_errors = \
            self.accuracy(test_sents, test_tags)
        print("---------------")
        print("Final accuracy: %f" % test_accuracy)


if __name__ == "__main__":

    #CoNLL
    conll_train = CoNLL2003_Train()
    conll_valid = CoNLL2003_Valid()
    train_sents, train_tags = dataset_to_sents_and_tags(conll_train)

    valid_sents, valid_tags = dataset_to_sents_and_tags(conll_valid)

    tp = TaggingPerceptron(vocabulary(conll_train), tags(conll_train))

    itrs = 5
    tp.train(itrs, train_sents, train_tags, valid_sents, valid_tags)