def testResolveQuestion(self): # First check that we get nothing back when we test with an empty model: cm = confusion_matrices.ConfusionMatrices() cm.priors = {} cm.confusion_matrices = {} resolution_map = cm.ResolveQuestion(test_util.IPEIROTIS_RESPONSES[0]) test_util.AssertMapsAlmostEqual(self, {}, resolution_map) # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.priors = IPEIROTIS_MLE_PRIORS cm.confusion_matrices = IPEIROTIS_MLE_CM for i in range(len(test_util.IPEIROTIS_DATA)): resolution_map = cm.ResolveQuestion(test_util.IPEIROTIS_RESPONSES[i]) test_util.AssertMapsAlmostEqual(self, test_util.IPEIROTIS_ALL_ANSWERS[i], resolution_map, label='question ' + str(i) + ', answer') # And again for the Dawid & Skene example: cm.priors = DS_MLE_PRIORS cm.confusion_matrices = DS_MLE_CM for i in range(len(test_util.DS_DATA)): resolution_map = cm.ResolveQuestion(test_util.DS_RESPONSES[i]) test_util.AssertMapsAlmostEqual(self, test_util.DS_EM_CM_RESOLUTIONS[i], resolution_map, label='question ' + str(i) + ', answer')
def testIntegrate(self): # Seed the random number generator to produce deterministic test results: numpy.random.seed(0) # TODO(tpw): This is necessary but not sufficient. Python's arbitrary # ordering of dict iteration makes some deeper calls # non-deterministic, and thus this test may exhibit flakiness. # Initialize a confusion matrices model: cm = confusion_matrices.ConfusionMatrices() # First check the estimated answers for the Ipeirotis example, using the # EM algorithm's results as a starting point for the sampling chain: data = test_util.IPEIROTIS_DATA_FINAL sampler = substitution_sampling.SubstitutionSampling() sampler.Integrate(data, cm, golden_questions=['url1', 'url2'], number_of_samples=20000) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, IPEIROTIS_SS_CM_RESOLUTIONS, result, places=1) # Now check the estimated answers for the Dawid & Skene example, again using # the EM algorithm's results as a starting point for the sampling chain: numpy.random.seed(0) data = test_util.DS_DATA_FINAL sampler = substitution_sampling.SubstitutionSampling() sampler.Integrate(data, cm, number_of_samples=20000) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_SS_CM_RESOLUTIONS, result, places=2)
def testIterateUntilConvergence(self): # Initialize a confusion matrices model and a VB object: cm = confusion_matrices.ConfusionMatrices() vb = alternating_resolution.VariationalBayes() # First test with the Ipeirotis example: data = test_util.IPEIROTIS_DATA cm.InitializeResolutions(data) self.assertTrue( vb.IterateUntilConvergence(data, cm, golden_questions=['url1', 'url2'])) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, IPEIROTIS_VB_CM_RESOLUTIONS, result) # Now test with the Dawid & Skene example: data = test_util.DS_DATA cm.InitializeResolutions(data) # VB is a little slower than EM, so we'll give it algorithm up to 50 # iterations to converge: self.assertTrue(vb.IterateUntilConvergence(data, cm, max_iterations=50)) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_VB_CM_RESOLUTIONS, result)
def testMutualInformation(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.priors = IPEIROTIS_MLE_PRIORS cm.confusion_matrices = IPEIROTIS_MLE_CM # First we'll test for a first judgment: expected = {'worker1': 0.0, 'worker2': 0.419973, 'worker3': 0.970951, 'worker4': 0.970951, 'worker5': 0.970951} result = {} for contributor in expected: result[contributor] = cm.MutualInformation(contributor) test_util.AssertMapsAlmostEqual(self, expected, result, label='contributor') # Now we'll test for a second judgment: previous_responses = [('worker2', 'p**n', {})] expected = {'worker1': 0.0, 'worker2': 0.4581059, 'worker3': 0.9182958, 'worker4': 0.9182958, 'worker5': 0.9182958} result = {} for contributor in expected: result[contributor] = cm.MutualInformation( contributor, previous_responses=previous_responses) test_util.AssertMapsAlmostEqual(self, expected, result, label='contributor') # However, if the first judgment was given by a perfect contributor (for # example, worker3), then no second judgment can give any more information: previous_responses = [('worker3', 'notporn', {})] self.assertAlmostEqual(0.0, cm.MutualInformation( 'worker2', previous_responses=previous_responses))
def testSetMLEParameters(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.SetMLEParameters(test_util.IPEIROTIS_DATA_FINAL) # Check that cm.priors was set correctly: test_util.AssertMapsAlmostEqual(self, IPEIROTIS_MLE_PRIORS, cm.priors, label='answer') # Check that cm.confusion_matrices was set correctly: test_util.AssertConfusionMatricesAlmostEqual(self, IPEIROTIS_MLE_CM, cm.confusion_matrices)
def testQuestionEntropy(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.priors = IPEIROTIS_MLE_PRIORS # Test in binary: self.assertAlmostEqual(0.9709506, cm.QuestionEntropy()) # And in digits: self.assertAlmostEqual(0.2922853, cm.QuestionEntropy(radix=10)) # Ensure that we correctly normalize non-normalized priors (those produced # by VariationalParameters, for example): cm.priors = {k: v * 3.0 for k, v in IPEIROTIS_MLE_PRIORS.iteritems()} self.assertAlmostEqual(0.9709506, cm.QuestionEntropy()) self.assertAlmostEqual(0.2922853, cm.QuestionEntropy(radix=10))
def testIterateUntilConvergence(self): # Initialize a confusion matrices model and an EM object: cm = confusion_matrices.ConfusionMatrices() maximizer = alternating_resolution.ExpectationMaximization() # First with the Ipeirotis example: data = test_util.IPEIROTIS_DATA cm.InitializeResolutions(data) self.assertTrue(maximizer.IterateUntilConvergence(data, cm)) expected = cm.ExtractResolutions(test_util.IPEIROTIS_DATA_FINAL) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result) # Now with the Dawid & Skene example: data = test_util.DS_DATA cm.InitializeResolutions(data) # The algorithm takes more than 10 steps to converge, so we expect # IterateUntilConvergence to return False: self.assertFalse( maximizer.IterateUntilConvergence(data, cm, max_iterations=10)) # Nevertheless, its results are accurate to 3 places: expected = cm.ExtractResolutions(test_util.DS_DATA_FINAL) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result)
def testPointwiseMutualInformation(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.priors = IPEIROTIS_MLE_PRIORS cm.confusion_matrices = IPEIROTIS_MLE_CM # First we'll test the information of a first judgment. # We're going to define four helper variables: # Two for the information conent of the true resolutions: notporn_inf = -math.log(cm.priors['notporn'], 2) porn_inf = -math.log(cm.priors['p**n'], 2) # Two for the information given by contributor 'worker2', judgment 'p**n': norm = (cm.priors['notporn'] * cm.confusion_matrices['worker2']['notporn']['p**n'] + cm.priors['p**n'] * cm.confusion_matrices['worker2']['p**n']['p**n']) inf_01 = math.log( cm.confusion_matrices['worker2']['notporn']['p**n'] / norm, 2) inf_11 = math.log( cm.confusion_matrices['worker2']['p**n']['p**n'] / norm, 2) # Now, worker1 is a spammer which always gives us zero information. # worker2 gives us complete information when it gives judgment 'notporn', # but partial information when it gives judgment 'p**n'. # The other three contributors give us complete information for all of # their judgments, even though worker5 always lies. # Entries for impossible contingencies in the matrix below are filled in # with 0.0. expected = {'worker1': {'notporn': {'notporn': 0.0, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': 0.0}}, 'worker2': {'notporn': {'notporn': notporn_inf, 'p**n': inf_01}, 'p**n': {'notporn': 0.0, 'p**n': inf_11}}, 'worker3': {'notporn': {'notporn': notporn_inf, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': porn_inf}}, 'worker4': {'notporn': {'notporn': notporn_inf, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': porn_inf}}, 'worker5': {'notporn': {'notporn': 0.0, 'p**n': notporn_inf}, 'p**n': {'notporn': porn_inf, 'p**n': 0.0}}} # Pack the results into a structure having the same form as # cm.confusion_matrices: result = {} for contributor in expected: result[contributor] = {} for answer in expected[contributor]: result[contributor][answer] = {} for judgment in expected[contributor][answer]: result[contributor][answer][judgment] = cm.PointwiseMutualInformation( contributor, answer, judgment) # Thus, we can use the method test_util.AssertConfusionMatricesAlmostEqual: test_util.AssertConfusionMatricesAlmostEqual(self, expected, result) # Now we'll test the information of a second judgment. # Start by supposing that worker2 gave judgment 'p**n': previous_responses = [('worker2', 'p**n', {})] # Suppose the correct answer is 'notporn', and the next judgment is # worker2 giving another 'p**n' judgment. After the first judgment, the # probability of the correct answer is 1/3. After the second judgment, the # probability of the correct answer is 1/7. So the change in information is # log(3/7), or about -1.222392 bits: self.assertAlmostEqual(math.log(3.0/7.0, 2), cm.PointwiseMutualInformation( 'worker2', 'notporn', 'p**n', previous_responses=previous_responses)) # Now suppose the correct answer is 'p**n', and the next judgment is # worker2 giving another 'p**n' judgment. After the first judgment, the # probability of the correct answer is 2/3. After the second judgment, the # probability of the correct answer is 6/7. So the change in information is # log(9/7): self.assertAlmostEqual(math.log(9.0/7.0, 2), cm.PointwiseMutualInformation( 'worker2', 'p**n', 'p**n', previous_responses=previous_responses)) # Now suppose the correct answer is 'notporn', and the next judgment is # worker2 giving a 'notporn' judgment. After the first judgment, the # probability of the correct answer is 1/3. After the second judgment, the # probability of the correct answer is 1. So the change in information is # log(3): self.assertAlmostEqual(math.log(3.0, 2), cm.PointwiseMutualInformation( 'worker2', 'notporn', 'notporn', previous_responses=previous_responses)) # Finally, suppose the correct answer is 'p**n', and the next judgment is # worker5 giving a 'notporn' judgment. After the first judgment, the # probability of the correct answer is 2/3. After the second judgment, the # probability of the correct answer is 1. So the change in information is # log(3/2): self.assertAlmostEqual(math.log(1.5, 2), cm.PointwiseMutualInformation( 'worker5', 'p**n', 'notporn', previous_responses=previous_responses))
def _CreateSubmodel(unused_path): """Returns a new ConfusionMatrix object. Convenient for overriding.""" return confusion_matrices.ConfusionMatrices()