def evaluate_fit(trained_model, test_sessions, test_queries): """ Evaluate the model's fit to the observed data - i.e. whether C==0 or C==1 for every item/session. There are two measures: - Log likelihood goes from negative infinity (bad) to 0 (good) - It measures the likelihood of observing the clicks in all the test sessions if the model is correct - Perplexity goes from 1 (good) to 2 (bad). - It's a measure of how surprised we are about all clicks and non-clicks in all of the test sessions if the model is correct. - When comparing models you can use perplexity gain (pB - pA) / (pB - 1) - It can be computed at individual ranks, or averaged across all ranks. Perplexity is normally higher for higher ranks. """ print("-------------------------------") print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))) print("-------------------------------") loglikelihood = LogLikelihood() perplexity = Perplexity() start = time.time() ll_value = loglikelihood.evaluate(trained_model, test_sessions) end = time.time() print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start)) start = time.time() perp_value = perplexity.evaluate(trained_model, test_sessions)[0] end = time.time() print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
test_queries = Utils.get_unique_queries(test_sessions) print("===============================") print("Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries))) print("===============================") start = time.time() click_model.train(train_sessions) end = time.time() print("\tTrained %s click model in %i secs:\n%r" % (click_model.__class__.__name__, end - start, click_model)) print("-------------------------------") print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))) print("-------------------------------") loglikelihood = LogLikelihood() perplexity = Perplexity() start = time.time() ll_value = loglikelihood.evaluate(click_model, test_sessions) end = time.time() print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start)) start = time.time() perp_value = perplexity.evaluate(click_model, test_sessions)[0] end = time.time() print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))