def q5_calculate_perplexity_of_reuters_corpus():
    training = ReutersTrainingCorpus()
    test = ReutersTestCorpus()
    language_model_bigram = BigramLanguageModel(training)
    perplexity_calculator = PerplexityCalculator()
    training_perplexity_bigram = perplexity_calculator.calculate_corpus_perplexity(language_model_bigram, training)
    test_perplexity_bigram = perplexity_calculator.calculate_corpus_perplexity(language_model_bigram, test)

    print("Training perplexity basic model: ", training_perplexity_bigram)
    print("Test perplexity basic model: ", test_perplexity_bigram)
Exemplo n.º 2
0
def q5_calculate_perplexity_of_reuters_corpus():
    training = ReutersTrainingCorpus()
    test = ReutersTestCorpus()
    language_model_bigram = BigramLanguageModel(training)
    perplexity_calculator = PerplexityCalculator()
    training_perplexity_bigram = perplexity_calculator.calculate_corpus_perplexity(
        language_model_bigram, training)
    test_perplexity_bigram = perplexity_calculator.calculate_corpus_perplexity(
        language_model_bigram, test)

    print("Training perplexity basic model: ", training_perplexity_bigram)
    print("Test perplexity basic model: ", test_perplexity_bigram)
Exemplo n.º 3
0
    def find_lambdas_brute_force(self, training_corpus, holdout_corpus, n):
        ngram_counter = NgramCalculatorContainer(training_corpus, n)
        minimum_perplexity = sys.maxsize
        best_lambdas = []
        perplexity_calculator = PerplexityCalculator()
        increment = .1
        tries = round(1 / increment)

        for i in range(tries + 1):
            l1 = round(increment * i, 2)
            for j in range(tries-i):
                l2 = round(increment * j, 2)
                l3 = round(1 - l1 - l2, 2)
                lambdas = [l1, l2, l3]
                ngram_probability_calculator = InterpolatingNgramProbabilityCalculator(ngram_counter, lambdas)
                language_model = NgramLanguageModel(
                    training_corpus, n, ngram_probability_calculator=ngram_probability_calculator
                )
                perplexity = perplexity_calculator.calculate_corpus_perplexity(language_model, holdout_corpus)
                if perplexity < minimum_perplexity:
                    minimum_perplexity = perplexity
                    best_lambdas = lambdas

        return best_lambdas, minimum_perplexity