Пример #1
0
 def __init__(self,
              train=None,
              model=None,
              backoff=None,
              cutoff=0,
              verbose=False):
     NgramTagger.__init__(self, 1, train, model, backoff, cutoff, verbose)
Пример #2
0
def getNgramTaggerAccuracy(n, trainingSet, testingSet):
    # trains and returns the accuracy of the NgramTagger given a value of n

    # get untagged sentences and gold POS tags
    testingUntaggedSentences = [[taggedWord[0] for taggedWord in sentence] for sentence in testingSet]
    testingGoldPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in testingSet]

    # train tagger
    ngramTagger = NgramTagger(n, trainingSet)

    # test tagger and get predicted POS tags
    ngramTaggedSentences = ngramTagger.tag_sents(testingUntaggedSentences)
    ngramTaggedSentencesPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in ngramTaggedSentences]

    # calculate and return accuracy
    return calculateAccuracy(testingGoldPOSTags, ngramTaggedSentencesPOSTags)
Пример #3
0
def getNgramTaggerAccuracy(n, trainingSet, testingSet):
    # trains and returns the accuracy of the NgramTagger given a value of n

    # get untagged sentences and gold POS tags
    testingUntaggedSentences = [[taggedWord[0] for taggedWord in sentence]
                                for sentence in testingSet]
    testingGoldPOSTags = [[taggedWord[1] for taggedWord in sentence]
                          for sentence in testingSet]

    # train tagger
    ngramTagger = NgramTagger(n, trainingSet)

    # test tagger and get predicted POS tags
    ngramTaggedSentences = ngramTagger.tag_sents(testingUntaggedSentences)
    ngramTaggedSentencesPOSTags = [[taggedWord[1] for taggedWord in sentence]
                                   for sentence in ngramTaggedSentences]

    # calculate and return accuracy
    return calculateAccuracy(testingGoldPOSTags, ngramTaggedSentencesPOSTags)
    def test_ngram_taggers(self):
        unitagger = UnigramTagger(self.corpus, backoff=self.default_tagger)
        bitagger = BigramTagger(self.corpus, backoff=unitagger)
        tritagger = TrigramTagger(self.corpus, backoff=bitagger)
        ntagger = NgramTagger(4, self.corpus, backoff=tritagger)

        encoded = self.encoder.encode(ntagger)
        decoded = self.decoder.decode(encoded)

        self.assertEqual(repr(ntagger), repr(decoded))
        self.assertEqual(repr(tritagger), repr(decoded.backoff))
        self.assertEqual(repr(bitagger), repr(decoded.backoff.backoff))
        self.assertEqual(repr(unitagger), repr(decoded.backoff.backoff.backoff))
        self.assertEqual(repr(self.default_tagger), 
                         repr(decoded.backoff.backoff.backoff.backoff))
 def __init__(self, *args, **kwargs):
     NgramTagger.__init__(self, 4, *args, **kwargs)
	def __init__(self, *args, **kwargs):
		NgramTagger.__init__(self, 4, *args, **kwargs)
Пример #7
0
def ngram_tagger(n, train_data, backoff=None):
    Ngram_Tagger = NgramTagger(n, train_data, backoff=backoff)
    return (Ngram_Tagger)