Пример #1
0
def evaluate_fit(trained_model, test_sessions, test_queries):
    """
    Evaluate the model's fit to the observed data - i.e. whether C==0 or C==1 for every
    item/session.

    There are two measures:

    - Log likelihood goes from negative infinity (bad) to 0 (good)
    - It measures the likelihood of observing the clicks in all the test sessions if the model is correct
    - Perplexity goes from 1 (good) to 2 (bad).
    - It's a measure of how surprised we are about all clicks and non-clicks in all of the test sessions if the model is correct.
    - When comparing models you can use perplexity gain (pB - pA) / (pB - 1)
    - It can be computed at individual ranks, or averaged across all ranks. Perplexity is normally higher for higher ranks.
    """
    print("-------------------------------")
    print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)))
    print("-------------------------------")

    loglikelihood = LogLikelihood()
    perplexity = Perplexity()

    start = time.time()
    ll_value = loglikelihood.evaluate(trained_model, test_sessions)
    end = time.time()
    print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start))

    start = time.time()
    perp_value = perplexity.evaluate(trained_model, test_sessions)[0]
    end = time.time()
    print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
Пример #2
0
    test_queries = Utils.get_unique_queries(test_sessions)

    print("===============================")
    print("Training on %d search sessions (%d unique queries)." %
          (len(train_sessions), len(train_queries)))
    print("===============================")

    start = time.time()
    click_model.train(train_sessions)
    end = time.time()
    print("\tTrained %s click model in %i secs:\n%r" %
          (click_model.__class__.__name__, end - start, click_model))

    print("-------------------------------")
    print("Testing on %d search sessions (%d unique queries)." %
          (len(test_sessions), len(test_queries)))
    print("-------------------------------")

    loglikelihood = LogLikelihood()
    perplexity = Perplexity()

    start = time.time()
    ll_value = loglikelihood.evaluate(click_model, test_sessions)
    end = time.time()
    print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start))

    start = time.time()
    perp_value = perplexity.evaluate(click_model, test_sessions)[0]
    end = time.time()
    print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))