def testConfusionMatricesDirichletParameters(self): # Check the Dirichlet parameters for the Ipeirotis data: result = confusion_matrices.ConfusionMatricesDirichletParameters( test_util.IPEIROTIS_DATA_FINAL) test_util.AssertConfusionMatricesAlmostEqual(self, IPEIROTIS_CM_DIRICHLET, result, places=2) # And for the Dawid & Skene data: result = confusion_matrices.ConfusionMatricesDirichletParameters( test_util.DS_DATA_FINAL) test_util.AssertConfusionMatricesAlmostEqual(self, DS_CM_DIRICHLET, result, places=2) # Check that the weighted test data gives the same results as the original: result = confusion_matrices.ConfusionMatricesDirichletParameters( test_util.DS_DATA_EXTRA, question_weights=test_util.DS_EXTRA_WEIGHTS) test_util.AssertConfusionMatricesAlmostEqual(self, DS_CM_DIRICHLET, result, places=2) # Test that the Dirichlet vectors include judgments for questions with # empty resolutions (this was once not true due to a whitespace bug): result = confusion_matrices.ConfusionMatricesDirichletParameters( {'q1': ([('c1', 'B', None)], {'A': 1.0}), 'q2': ([('c1', 'C', None)], {})}) test_util.AssertConfusionMatricesAlmostEqual( self, {'c1': {'A': {'B': 2.0, 'C': 1.0}}}, result)
def testVariationalConfusionMatrices(self): # Test with the Ipeirotis data: test_util.AssertConfusionMatricesAlmostEqual( self, IPEIROTIS_VARIATIONAL_CM, confusion_matrices.VariationalConfusionMatrices(IPEIROTIS_CM_DIRICHLET)) # And again with the Dawid & Skene data: test_util.AssertConfusionMatricesAlmostEqual( self, DS_VARIATIONAL_CM, confusion_matrices.VariationalConfusionMatrices(DS_CM_DIRICHLET))
def testMLEConfusionMatrices(self): # Check that MLEConfusionMatrices returns agnostic matrices when there are # no resolutions: result = confusion_matrices.MLEConfusionMatrices(test_util.DS_DATA) test_util.AssertConfusionMatricesAlmostEqual(self, {}, result) # Check that MLEConfusionMatrices returns the correct matrices for the # Ipeirotis example: result = confusion_matrices.MLEConfusionMatrices( test_util.IPEIROTIS_DATA_FINAL) test_util.AssertConfusionMatricesAlmostEqual(self, IPEIROTIS_MLE_CM, result) # And for the Dawid & Skene example: result = confusion_matrices.MLEConfusionMatrices( test_util.DS_DATA_FINAL) test_util.AssertConfusionMatricesAlmostEqual(self, DS_MLE_CM, result) # Check that the weighted test data gives the same results as the original: result = confusion_matrices.MLEConfusionMatrices( test_util.DS_DATA_EXTRA, question_weights=test_util.DS_EXTRA_WEIGHTS) test_util.AssertConfusionMatricesAlmostEqual(self, DS_MLE_CM, result)
def testSetMLEParameters(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.SetMLEParameters(test_util.IPEIROTIS_DATA_FINAL) # Check that cm.priors was set correctly: test_util.AssertMapsAlmostEqual(self, IPEIROTIS_MLE_PRIORS, cm.priors, label='answer') # Check that cm.confusion_matrices was set correctly: test_util.AssertConfusionMatricesAlmostEqual(self, IPEIROTIS_MLE_CM, cm.confusion_matrices)
def testPointwiseMutualInformation(self): # Use data from the solution to the Ipeirotis example: cm = confusion_matrices.ConfusionMatrices() cm.priors = IPEIROTIS_MLE_PRIORS cm.confusion_matrices = IPEIROTIS_MLE_CM # First we'll test the information of a first judgment. # We're going to define four helper variables: # Two for the information conent of the true resolutions: notporn_inf = -math.log(cm.priors['notporn'], 2) porn_inf = -math.log(cm.priors['p**n'], 2) # Two for the information given by contributor 'worker2', judgment 'p**n': norm = (cm.priors['notporn'] * cm.confusion_matrices['worker2']['notporn']['p**n'] + cm.priors['p**n'] * cm.confusion_matrices['worker2']['p**n']['p**n']) inf_01 = math.log( cm.confusion_matrices['worker2']['notporn']['p**n'] / norm, 2) inf_11 = math.log( cm.confusion_matrices['worker2']['p**n']['p**n'] / norm, 2) # Now, worker1 is a spammer which always gives us zero information. # worker2 gives us complete information when it gives judgment 'notporn', # but partial information when it gives judgment 'p**n'. # The other three contributors give us complete information for all of # their judgments, even though worker5 always lies. # Entries for impossible contingencies in the matrix below are filled in # with 0.0. expected = {'worker1': {'notporn': {'notporn': 0.0, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': 0.0}}, 'worker2': {'notporn': {'notporn': notporn_inf, 'p**n': inf_01}, 'p**n': {'notporn': 0.0, 'p**n': inf_11}}, 'worker3': {'notporn': {'notporn': notporn_inf, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': porn_inf}}, 'worker4': {'notporn': {'notporn': notporn_inf, 'p**n': 0.0}, 'p**n': {'notporn': 0.0, 'p**n': porn_inf}}, 'worker5': {'notporn': {'notporn': 0.0, 'p**n': notporn_inf}, 'p**n': {'notporn': porn_inf, 'p**n': 0.0}}} # Pack the results into a structure having the same form as # cm.confusion_matrices: result = {} for contributor in expected: result[contributor] = {} for answer in expected[contributor]: result[contributor][answer] = {} for judgment in expected[contributor][answer]: result[contributor][answer][judgment] = cm.PointwiseMutualInformation( contributor, answer, judgment) # Thus, we can use the method test_util.AssertConfusionMatricesAlmostEqual: test_util.AssertConfusionMatricesAlmostEqual(self, expected, result) # Now we'll test the information of a second judgment. # Start by supposing that worker2 gave judgment 'p**n': previous_responses = [('worker2', 'p**n', {})] # Suppose the correct answer is 'notporn', and the next judgment is # worker2 giving another 'p**n' judgment. After the first judgment, the # probability of the correct answer is 1/3. After the second judgment, the # probability of the correct answer is 1/7. So the change in information is # log(3/7), or about -1.222392 bits: self.assertAlmostEqual(math.log(3.0/7.0, 2), cm.PointwiseMutualInformation( 'worker2', 'notporn', 'p**n', previous_responses=previous_responses)) # Now suppose the correct answer is 'p**n', and the next judgment is # worker2 giving another 'p**n' judgment. After the first judgment, the # probability of the correct answer is 2/3. After the second judgment, the # probability of the correct answer is 6/7. So the change in information is # log(9/7): self.assertAlmostEqual(math.log(9.0/7.0, 2), cm.PointwiseMutualInformation( 'worker2', 'p**n', 'p**n', previous_responses=previous_responses)) # Now suppose the correct answer is 'notporn', and the next judgment is # worker2 giving a 'notporn' judgment. After the first judgment, the # probability of the correct answer is 1/3. After the second judgment, the # probability of the correct answer is 1. So the change in information is # log(3): self.assertAlmostEqual(math.log(3.0, 2), cm.PointwiseMutualInformation( 'worker2', 'notporn', 'notporn', previous_responses=previous_responses)) # Finally, suppose the correct answer is 'p**n', and the next judgment is # worker5 giving a 'notporn' judgment. After the first judgment, the # probability of the correct answer is 2/3. After the second judgment, the # probability of the correct answer is 1. So the change in information is # log(3/2): self.assertAlmostEqual(math.log(1.5, 2), cm.PointwiseMutualInformation( 'worker5', 'p**n', 'notporn', previous_responses=previous_responses))
def testSampleConfusionMatrices(self): # Seed the random number generator to produce deterministic test results: numpy.random.seed(0) # We also need the Dirichlet parameter dicts to have fixed iteration order: ipeirotis_cm_dirichlet_ordered = collections.OrderedDict() for contributor in sorted(IPEIROTIS_CM_DIRICHLET): matrix = collections.OrderedDict() for answer in sorted(IPEIROTIS_CM_DIRICHLET[contributor]): row = collections.OrderedDict( sorted(IPEIROTIS_CM_DIRICHLET[contributor][answer].iteritems())) matrix[answer] = row ipeirotis_cm_dirichlet_ordered[contributor] = matrix # And also for the Dawid & Skene data: ds_cm_dirichlet_ordered = collections.OrderedDict() for contributor in sorted(DS_CM_DIRICHLET): matrix = collections.OrderedDict() for answer in sorted(DS_CM_DIRICHLET[contributor]): row = collections.OrderedDict( sorted(DS_CM_DIRICHLET[contributor][answer].iteritems())) matrix[answer] = row ds_cm_dirichlet_ordered[contributor] = matrix # Check that each row of a set of randomly-sampled matrices sums to unity: result = confusion_matrices.SampleConfusionMatrices( ipeirotis_cm_dirichlet_ordered) for contributor in result: for answer in result[contributor]: self.assertAlmostEqual(1.0, sum(result[contributor][answer].itervalues())) # Check that the mean of 10000 samples is close to the actual mean of the # Dirichlet distribution for a set of confusion matrices, for a case with a # narrow distribution (the Dawid and Skene example): samples = [ confusion_matrices.SampleConfusionMatrices(ds_cm_dirichlet_ordered) for _ in range(10000)] expected = {1: {1: {1: 0.845, 2: 0.120, 3: 0.017, 4: 0.017}, 2: {1: 0.082, 2: 0.835, 3: 0.066, 4: 0.016}, 3: {1: 0.052, 2: 0.320, 3: 0.575, 4: 0.052}, 4: {1: 0.077, 2: 0.077, 3: 0.462, 4: 0.385}}, 2: {1: {1: 0.728, 2: 0.181, 3: 0.045, 4: 0.045}, 2: {1: 0.087, 2: 0.566, 3: 0.303, 4: 0.043}, 3: {1: 0.111, 2: 0.111, 3: 0.668, 4: 0.111}, 4: {1: 0.143, 2: 0.143, 3: 0.143, 4: 0.571}}, 3: {1: {1: 0.864, 2: 0.045, 3: 0.045, 4: 0.045}, 2: {1: 0.131, 2: 0.694, 3: 0.130, 4: 0.043}, 3: {1: 0.111, 2: 0.336, 3: 0.221, 4: 0.332}, 4: {1: 0.143, 2: 0.143, 3: 0.429, 4: 0.286}}, 4: {1: {1: 0.818, 2: 0.091, 3: 0.045, 4: 0.045}, 2: {1: 0.088, 2: 0.740, 3: 0.129, 4: 0.043}, 3: {1: 0.111, 2: 0.111, 3: 0.557, 4: 0.221}, 4: {1: 0.143, 2: 0.143, 3: 0.286, 4: 0.429}}, 5: {1: {1: 0.864, 2: 0.045, 3: 0.045, 4: 0.045}, 2: {1: 0.175, 2: 0.650, 3: 0.131, 4: 0.043}, 3: {1: 0.111, 2: 0.227, 3: 0.551, 4: 0.111}, 4: {1: 0.143, 2: 0.143, 3: 0.286, 4: 0.429}}} mean = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict(float))) for contributor in expected: for answer in expected[contributor]: for judgment in expected[contributor][answer]: mean[contributor][answer][judgment] = numpy.mean( [sample[contributor][answer].get(judgment, 0.0) for sample in samples]) test_util.AssertConfusionMatricesAlmostEqual(self, expected, mean, places=2) # And again for a broad distribution (the Ipeirotis example), although now # we need 20000 samples to get a similar precision: samples = [ confusion_matrices.SampleConfusionMatrices( ipeirotis_cm_dirichlet_ordered) for _ in range(20000)] expected = {'worker1': {'notporn': {'notporn': 0.2, 'p**n': 0.8}, 'p**n': {'notporn': 0.25, 'p**n': 0.75}}, 'worker2': {'notporn': {'notporn': 0.6, 'p**n': 0.4}, 'p**n': {'notporn': 0.25, 'p**n': 0.75}}, 'worker3': {'notporn': {'notporn': 0.8, 'p**n': 0.2}, 'p**n': {'notporn': 0.25, 'p**n': 0.75}}, 'worker4': {'notporn': {'notporn': 0.8, 'p**n': 0.2}, 'p**n': {'notporn': 0.25, 'p**n': 0.75}}, 'worker5': {'notporn': {'notporn': 0.2, 'p**n': 0.8}, 'p**n': {'notporn': 0.75, 'p**n': 0.25}}} mean = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict(float))) for contributor in expected: for answer in expected[contributor]: for judgment in expected[contributor][answer]: mean[contributor][answer][judgment] = numpy.mean( [sample[contributor][answer].get(judgment, 0.0) for sample in samples]) test_util.AssertConfusionMatricesAlmostEqual(self, expected, mean, places=2) # We'll also check the sample variance in this case: # Define three quantities for convenience in the matrix below: x = 4.0 / 150.0 y = 6.0 / 150.0 z = 3.0 / 80.0 expected = {'worker1': {'notporn': {'notporn': x, 'p**n': x}, 'p**n': {'notporn': z, 'p**n': z}}, 'worker2': {'notporn': {'notporn': y, 'p**n': y}, 'p**n': {'notporn': z, 'p**n': z}}, 'worker3': {'notporn': {'notporn': x, 'p**n': x}, 'p**n': {'notporn': z, 'p**n': z}}, 'worker4': {'notporn': {'notporn': x, 'p**n': x}, 'p**n': {'notporn': z, 'p**n': z}}, 'worker5': {'notporn': {'notporn': x, 'p**n': x}, 'p**n': {'notporn': z, 'p**n': z}}} variance = collections.defaultdict( lambda: collections.defaultdict(lambda: collections.defaultdict(float))) for contributor in expected: for answer in expected[contributor]: for judgment in expected[contributor][answer]: variance[contributor][answer][judgment] = numpy.var( [sample[contributor][answer].get(judgment, 0.0) for sample in samples]) test_util.AssertConfusionMatricesAlmostEqual(self, expected, variance, places=2)
def testSetMLEParameters(self): # The previous test checked that we call submodels using the correct paths; # this test is more end-to-end, checking that the correct submodels are # created and that we set the correct parameters for one of them. decision_tree_model = decision_tree.DecisionTree() decision_tree_model.SetMLEParameters(TEST_DATA) # Test that the correct submodels were created: self.assertEqual(set(((), ('ABO', ), ('Rh', ))), set(decision_tree_model.model_tree.keys())) # Test the root confusion matrix parameters: expected_priors = { 'ABO': 1.0 / 3.0, 'Rh': 1.4 / 3.0, 'Other': 0.6 / 3.0 } test_util.AssertMapsAlmostEqual( self, expected_priors, decision_tree_model.model_tree[()].priors) expected_cm = { 'c1': { 'ABO': { 'ABO': 0.8 / 1.0, 'Rh': 0.2 / 1.0 }, 'Rh': { 'ABO': 0.2 / 1.4, 'Rh': 1.2 / 1.4 }, 'Other': { 'Rh': 1.0 } }, 'c2': { 'ABO': { 'Rh': 1.0 }, 'Rh': { 'Rh': 1.0 / 1.4, 'Other': 0.4 / 1.4 }, 'Other': { 'Other': 1.0 } }, 'c3': { 'ABO': { 'ABO': 1.0 }, 'Rh': { 'ABO': 1.0 / 1.4, 'Rh': 0.4 / 1.4 }, 'Other': { 'Rh': 1.0 } } } test_util.AssertConfusionMatricesAlmostEqual( self, expected_cm, decision_tree_model.model_tree[()].confusion_matrices) # Test the ('ABO',) confusion matrix parameters: expected_priors = {'A': 0.4, 'B': 0.4, 'O': 0.2} test_util.AssertMapsAlmostEqual( self, expected_priors, decision_tree_model.model_tree[('ABO', )].priors) expected = { 'c1': { 'A': { 'A': 1.0 }, 'B': { 'A': 1.0 } }, # c2 never said 'ABO', so it has no entry here 'c3': { 'A': { 'B': 1.0 }, 'B': { 'B': 1.0 }, 'O': { 'O': 1.0 } } } test_util.AssertConfusionMatricesAlmostEqual( self, expected, decision_tree_model.model_tree[('ABO', )].confusion_matrices) # Test the ('Rh',) confusion matrix parameters: expected_priors = {'+': 0.6 / 1.4, '-': 0.8 / 1.4} test_util.AssertMapsAlmostEqual( self, expected_priors, decision_tree_model.model_tree[('Rh', )].priors) expected = { 'c1': { '+': { '+': 1.0 }, '-': { '+': 0.5, '-': 0.5 } }, 'c2': { '+': { '+': 0.2 / 0.6, '-': 0.4 / 0.6 }, '-': { '-': 1.0 } }, 'c3': { '-': { '-': 1.0 } } } test_util.AssertConfusionMatricesAlmostEqual( self, expected, decision_tree_model.model_tree[('Rh', )].confusion_matrices)