def cache_different_limits(limits): prediction_texts = PredictionTexts([1, 2, 3, 4, 5, 7, 8]) ngram_predictions = NgramPredictor(4).batch_predict(prediction_texts) perplexities = [] for limit in limits: perplexity = predictor_perplexity( interpolated_with_cache_probability( 0.22, UnigramCachePredictor(limit=limit).batch_predict(prediction_texts), ngram_predictions ) ) perplexities.append(perplexity) print limit, perplexity plt.ylabel("Perplejidad") plt.xlabel("Limite de Cache") plt.plot(limits, perplexities, ".-") plt.show()
def interpolate(cache_lambda, cache_probs, probs): if len(cache_probs) == 1: cache_probs = [cache_probs[0] / len(probs)] * len(probs) elif len(probs) == 1: probs = [probs[0] / len(cache_probs)] * len(cache_probs) return interpolated_with_cache_probability(cache_lambda, cache_probs, probs)