def testIterateUntilConvergence(self): # Initialize a confusion matrices model and a VB object: cm = confusion_matrices.ConfusionMatrices() vb = alternating_resolution.VariationalBayes() # First test with the Ipeirotis example: data = test_util.IPEIROTIS_DATA cm.InitializeResolutions(data) self.assertTrue( vb.IterateUntilConvergence(data, cm, golden_questions=['url1', 'url2'])) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, IPEIROTIS_VB_CM_RESOLUTIONS, result) # Now test with the Dawid & Skene example: data = test_util.DS_DATA cm.InitializeResolutions(data) # VB is a little slower than EM, so we'll give it algorithm up to 50 # iterations to converge: self.assertTrue(vb.IterateUntilConvergence(data, cm, max_iterations=50)) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_VB_CM_RESOLUTIONS, result)
def testIntegrate(self): # Seed the random number generator to produce deterministic test results: numpy.random.seed(0) # TODO(tpw): This is necessary but not sufficient. Python's arbitrary # ordering of dict iteration makes some deeper calls # non-deterministic, and thus this test may exhibit flakiness. # Initialize a confusion matrices model: cm = confusion_matrices.ConfusionMatrices() # First check the estimated answers for the Ipeirotis example, using the # EM algorithm's results as a starting point for the sampling chain: data = test_util.IPEIROTIS_DATA_FINAL sampler = substitution_sampling.SubstitutionSampling() sampler.Integrate(data, cm, golden_questions=['url1', 'url2'], number_of_samples=20000) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, IPEIROTIS_SS_CM_RESOLUTIONS, result, places=1) # Now check the estimated answers for the Dawid & Skene example, again using # the EM algorithm's results as a starting point for the sampling chain: numpy.random.seed(0) data = test_util.DS_DATA_FINAL sampler = substitution_sampling.SubstitutionSampling() sampler.Integrate(data, cm, number_of_samples=20000) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_SS_CM_RESOLUTIONS, result, places=2)
def testIterateUntilConvergence(self): maximizer = alternating_resolution.ExpectationMaximization() # First test with the Ipeirotis example (expecting the same resolution as # we get with confusion matrices): data = test_util.IPEIROTIS_DATA pc = probability_correct.ProbabilityCorrect() pc.InitializeResolutions(data) self.assertTrue(maximizer.IterateUntilConvergence(data, pc)) expected = pc.ExtractResolutions(test_util.IPEIROTIS_DATA_FINAL) result = pc.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result) self.assertEqual(2, pc._answer_space_size) # Now with the Dawid & Skene example (with a resolution, above, differing # slightly from the confusion matrix case): data = test_util.DS_DATA pc.UnsetAnswerSpaceSize() # to reset the model pc.InitializeResolutions(data) # The algorithm takes more than 10 steps to converge, so we expect # IterateUntilConvergence to return False: self.assertFalse( maximizer.IterateUntilConvergence(data, pc, max_iterations=10)) # Nevertheless, its results are accurate to 3 places: result = pc.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_EM_PC_RESOLUTIONS, result) self.assertEqual(4, pc._answer_space_size)
def testIterateUntilConvergence(self): data = {1: ([], {}), 2: ([], {'C': 1.0})} mock_model = MockModel() resolution = MockResolution() # Our silly model should take far more than MAX_ITERATIONS to converge: self.assertFalse( resolution.IterateUntilConvergence(data, mock_model, golden_questions=[2])) # resolution should have called IterateOnce exactly MAX_ITERATIONS times: expected_parameter = float(alternating_resolution.MAX_ITERATIONS) self.assertEqual(expected_parameter, mock_model.dummy_parameter) # The resolution to question 1 should be as returned by # MockModel.ResolveQuestion, and the resolution to question 2 should be # left to what we set above because we marked it golden: test_util.AssertResolutionsAlmostEqual( self, { 1: { 'A': 1.0 / expected_parameter, 'B': 1.0 - 1.0 / expected_parameter }, 2: { 'C': 1.0 } }, mock_model.ExtractResolutions(data)) # Now we'll force convergence by setting mock_model.dummy_parameter very # high... mock_model.dummy_parameter = 10.0 / alternating_resolution.EPSILON # and check that resolution understands that it has converged: self.assertTrue(resolution.IterateUntilConvergence(data, mock_model))
def testInitializeResolutions(self): data = copy.deepcopy(TEST_DATA) expected = { 'q1': { gaussian_contributors.MEAN: 6.0, gaussian_contributors.VARIANCE: 1.0 / 6.0 }, 'q2': { gaussian_contributors.MEAN: 6.0 + 1.0 / 3.0, gaussian_contributors.VARIANCE: 42.0 / 108.0 }, 'q3': { gaussian_contributors.MEAN: 6.0 + 2.0 / 3.0, gaussian_contributors.VARIANCE: 114.0 / 108.0 }, 'q4': { gaussian_contributors.MEAN: 7.0, gaussian_contributors.VARIANCE: 13.0 / 6.0 } } gaussian_contributors.GaussianContributors.InitializeResolutions( data, overwrite_all_resolutions=True) result = model.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result) # Check that non-numeric judgments cause a TypeError: self.assertRaises( TypeError, gaussian_contributors.GaussianContributors.InitializeResolutions, {'q1': ([('c1', 'WTF', {})], {})})
def testIntegrate(self): # Seed the random number generator to produce deterministic test results: numpy.random.seed(0) # Set up mock data, a mock model object, and a SubstitutionSampling object: data = {1: ([], {}), 2: ([], {'C': 1.0})} mock_model = MockModel() ss = substitution_sampling.SubstitutionSampling() # Call ss.Integrate and check that the result is close to the mock model's # resolution: ss.Integrate(data, mock_model, golden_questions=[2], number_of_samples=NUMBER_OF_SAMPLES) test_util.AssertResolutionsAlmostEqual( self, { 1: MOCK_RESOLUTION, 2: { 'C': 1.0 } }, mock_model.ExtractResolutions(data), places=2) # ss should have called SetSampleParameters NUMBER_OF_SAMPLES times: self.assertEqual(NUMBER_OF_SAMPLES, mock_model.times_called)
def testInitializeResolutions(self): data = copy.deepcopy(TEST_DATA) expected = { 'q1': { ('ABO', ): 2.0 / 3.0, ('ABO', 'A'): 1.0 / 3.0, ('ABO', 'B'): 1.0 / 3.0, ('Rh', ): 1.0 / 3.0, ('Rh', '+'): 1.0 / 3.0 }, 'q2': { ('ABO', ): 1.0 / 3.0, ('ABO', 'O'): 1.0 / 3.0, ('Rh', ): 2.0 / 3.0, ('Rh', '+'): 1.0 / 3.0, ('Rh', '-'): 1.0 / 3.0 }, 'q3': { ('Rh', ): 2.0 / 3.0, ('Rh', '-'): 2.0 / 3.0, ('Other', ): 1.0 / 3.0 } } decision_tree.DecisionTree.InitializeResolutions( data, overwrite_all_resolutions=True) result = decision_tree.DecisionTree.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result)
def testIterateUntilConvergence(self): # Test first with the Ipeirotis example (expecting a resolution, above, # differing slightly from the expectation-maximization case): data = test_util.IPEIROTIS_DATA pc = probability_correct.ProbabilityCorrect() pc.InitializeResolutions(data) maximizer = alternating_resolution.VariationalBayes() # Run the variational inference algorithm: self.assertTrue(maximizer.IterateUntilConvergence( data, pc, golden_questions=['url1', 'url2'])) # Check the results: result = pc.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, IPEIROTIS_VB_PC_RESOLUTIONS, result) self.assertEqual(2, pc._answer_space_size) # Run the same experiment without gold data, and check that having gold # data gives us more faith in the contributors who agree with it: gold_contributor_accuracy = pc.probability_correct.copy() self.assertTrue(maximizer.IterateUntilConvergence(data, pc)) non_gold_contributor_accuracy = pc.probability_correct.copy() self.assertGreater(gold_contributor_accuracy['worker2'], non_gold_contributor_accuracy['worker2']) self.assertGreater(gold_contributor_accuracy['worker3'], non_gold_contributor_accuracy['worker3']) self.assertGreater(gold_contributor_accuracy['worker4'], non_gold_contributor_accuracy['worker4']) # Test with the Dawid & Skene example (expecting a resolution, above, # differing slightly from the expectation-maximization case): data = test_util.DS_DATA pc = probability_correct.ProbabilityCorrect() pc.InitializeResolutions(data) maximizer = alternating_resolution.VariationalBayes() # Run the variational inference algorithm: self.assertTrue(maximizer.IterateUntilConvergence(data, pc)) # Check the results: result = pc.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, DS_VB_PC_RESOLUTIONS, result) self.assertEqual(4, pc._answer_space_size)
def testIterateUntilConvergence(self): # Initialize a confusion matrices model and an EM object: cm = confusion_matrices.ConfusionMatrices() maximizer = alternating_resolution.ExpectationMaximization() # First with the Ipeirotis example: data = test_util.IPEIROTIS_DATA cm.InitializeResolutions(data) self.assertTrue(maximizer.IterateUntilConvergence(data, cm)) expected = cm.ExtractResolutions(test_util.IPEIROTIS_DATA_FINAL) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result) # Now with the Dawid & Skene example: data = test_util.DS_DATA cm.InitializeResolutions(data) # The algorithm takes more than 10 steps to converge, so we expect # IterateUntilConvergence to return False: self.assertFalse( maximizer.IterateUntilConvergence(data, cm, max_iterations=10)) # Nevertheless, its results are accurate to 3 places: expected = cm.ExtractResolutions(test_util.DS_DATA_FINAL) result = cm.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result)
def testIterateUntilConvergence(self): # Initialize a Gaussian model and an EM object: gc = gaussian_contributors.GaussianContributors() em = alternating_resolution.ExpectationMaximization() # Use the judgments from the Dawid & Skene example: data = test_util.DS_DATA gc.InitializeResolutions(data) # The algorithm converges slowly because EPSILON is small: self.assertFalse(em.IterateUntilConvergence(data, gc, max_iterations=1000)) expected = {} for question in data: expected[question] = dict([ (gaussian_contributors.MEAN, EXPECTED_MEAN[question]), (gaussian_contributors.VARIANCE, EXPECTED_VARIANCE)]) result = gc.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, result)
def testIterateOnce(self): data = {1: ([], {}), 2: ([], {'C': 1.0})} mock_model = MockModel() resolution = MockResolution() resolution.IterateOnce(data, mock_model, golden_questions=[2]) # resolution should have called ChangeParameters once and ResolveQuestion # once: self.assertEqual(1.0, mock_model.dummy_parameter) # The resolution to question 1 should be as set by # MockModel.ResolveQuestion, and the resolution to question 2 should be # left to what we set above because we marked it golden: test_util.AssertResolutionsAlmostEqual( self, { 1: { 'A': 1.0, 'B': 0.0 }, 2: { 'C': 1.0 } }, mock_model.ExtractResolutions(data))
def testInitializeResolutions(self): # Test that the method behaves as expected, setting initial guesses: data = copy.deepcopy(test_util.DS_DATA) expected = {1: {1: 1.0}, 2: {3: 5.0 / 7.0, 4: 2.0 / 7.0}, 3: {1: 3.0 / 7.0, 2: 4.0 / 7.0}, 4: {1: 2.0 / 7.0, 2: 4.0 / 7.0, 3: 1.0 / 7.0}, 5: {2: 6.0 / 7.0, 3: 1.0 / 7.0}, 6: {2: 5.0 / 7.0, 3: 2.0 / 7.0}, 7: {1: 4.0 / 7.0, 2: 3.0 / 7.0}, 8: {3: 6.0 / 7.0, 4: 1.0 / 7.0}, 9: {2: 6.0 / 7.0, 3: 1.0 / 7.0}, 10: {2: 5.0 / 7.0, 3: 2.0 / 7.0}, 11: {4: 1.0}, 12: {2: 3.0 / 7.0, 3: 3.0 / 7.0, 4: 1.0 / 7.0}, 13: {1: 1.0}, 14: {1: 1.0 / 7.0, 2: 5.0 / 7.0, 3: 1.0 / 7.0}, 15: {1: 6.0 / 7.0, 2: 1.0 / 7.0}, 16: {1: 6.0 / 7.0, 2: 1.0 / 7.0}, 17: {1: 1.0}, 18: {1: 1.0}, 19: {1: 1.0 / 7.0, 2: 6.0 / 7.0}, 20: {1: 1.0 / 7.0, 2: 5.0 / 7.0, 3: 1.0 / 7.0}, 21: {2: 1.0}, 22: {1: 1.0 / 7.0, 2: 6.0 / 7.0}, 23: {2: 6.0 / 7.0, 3: 1.0 / 7.0}, 24: {1: 1.0 / 7.0, 2: 6.0 / 7.0}, 25: {1: 1.0}, 26: {1: 1.0}, 27: {2: 6.0 / 7.0, 3: 1.0 / 7.0}, 28: {1: 1.0}, 29: {1: 1.0}, 30: {1: 5.0 / 7.0, 2: 2.0 / 7.0}, 31: {1: 1.0}, 32: {2: 1.0 / 7.0, 3: 6.0 / 7.0}, 33: {1: 1.0}, 34: {2: 1.0}, 35: {2: 5.0 / 7.0, 3: 2.0 / 7.0}, 36: {3: 4.0 / 7.0, 4: 3.0 / 7.0}, 37: {1: 1.0 / 7.0, 2: 5.0 / 7.0, 3: 1.0 / 7.0}, 38: {2: 3.0 / 7.0, 3: 4.0 / 7.0}, 39: {2: 1.0 / 7.0, 3: 5.0 / 7.0, 4: 1.0 / 7.0}, 40: {1: 1.0}, 41: {1: 1.0}, 42: {1: 5.0 / 7.0, 2: 2.0 / 7.0}, 43: {2: 6.0 / 7.0, 3: 1.0 / 7.0}, 44: {1: 6.0 / 7.0, 2: 1.0 / 7.0}, 45: {2: 1.0}} model.StatisticalModel.InitializeResolutions(data) results = model.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, results) # Now write garbage resolutions and check that the constructor overwrites # them: for _, resolution_map in data.itervalues(): resolution_map['None of the above'] = 1.0 model.StatisticalModel.InitializeResolutions(data, overwrite_all_resolutions=True) results = model.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, results) # Finally, test that the judgments_to_answers mapping works: judgments_to_answers = {1: 'Good', 2: None, 3: 'Bad', 4: 'Bad'} model.StatisticalModel.InitializeResolutions( data, overwrite_all_resolutions=True, judgments_to_answers=judgments_to_answers) expected = {1: {'Good': 1.0}, 2: {'Bad': 1.0}, 3: {'Good': 1.0}, 4: {'Good': 2.0 / 3.0, 'Bad': 1.0 / 3.0}, 5: {'Bad': 1.0}, 6: {'Bad': 1.0}, 7: {'Good': 1.0}, 8: {'Bad': 1.0}, 9: {'Bad': 1.0}, 10: {'Bad': 1.0}, 11: {'Bad': 1.0}, 12: {'Bad': 1.0}, 13: {'Good': 1.0}, 14: {'Good': 1.0 / 2.0, 'Bad': 1.0 / 2.0}, 15: {'Good': 1.0}, 16: {'Good': 1.0}, 17: {'Good': 1.0}, 18: {'Good': 1.0}, 19: {'Good': 1.0}, 20: {'Good': 1.0 / 2.0, 'Bad': 1.0 / 2.0}, 21: {}, 22: {'Good': 1.0}, 23: {'Bad': 1.0}, 24: {'Good': 1.0}, 25: {'Good': 1.0}, 26: {'Good': 1.0}, 27: {'Bad': 1.0}, 28: {'Good': 1.0}, 29: {'Good': 1.0}, 30: {'Good': 1.0}, 31: {'Good': 1.0}, 32: {'Bad': 1.0}, 33: {'Good': 1.0}, 34: {}, 35: {'Bad': 1.0}, 36: {'Bad': 1.0}, 37: {'Good': 1.0 / 2.0, 'Bad': 1.0 / 2.0}, 38: {'Bad': 1.0}, 39: {'Bad': 1.0}, 40: {'Good': 1.0}, 41: {'Good': 1.0}, 42: {'Good': 1.0}, 43: {'Bad': 1.0}, 44: {'Good': 1.0}, 45: {}} results = model.ExtractResolutions(data) test_util.AssertResolutionsAlmostEqual(self, expected, results)