class TolerantEvaluatorFitTransformTests(TestCase, TolerantFittingCases): # We try to mimic most of the cases seen on Fit tests, and check that # they are working equivalently with fit_transform fit_method_name = 'fit_transform' def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result]) def test_if_a_feature_is_excluded_all_results_doesnt_include_it(self): # This means: if a Feature evaluated fine for some samples until it was # excluded, once we decided to exclude it, we must make sure that # previous samples for which this feature was evaluated, are now # striped out of those evaluations self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated result = self.ev.fit_transform(SAMPLES + [{'nodescription': u'tada!'}]) # Check that there are results. Otherwise, next loop is dumb self.assertTrue(result) for r in result: self.assertEqual(len(r), 1) # only one value per sample self.assertEqual(r[0], 'a') # Remember DumbFeatureA returns 'a' def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator( [DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result]) def test_consumable_is_consumed_only_once(self): samples = (s for s in SAMPLES) # can be consumed once only self.ev = TolerantFeatureEvaluator([EntireSampleFeature]) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(SAMPLES), len(result))
class TolerantEvaluatorFitTransformTests(TestCase, TolerantFittingCases): # We try to mimic most of the cases seen on Fit tests, and check that # they are working equivalently with fit_transform fit_method_name = 'fit_transform' def test_sample_is_excluded_if_any_feature_fails_when_evaluating_it(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, EntireSampleFeature]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = len(SAMPLES) + 1 # dont exclude samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = self.ev.fit_transform(samples) self.assertTrue(len(list(result)) < len(samples)) # EntireSampleFeature is the last, so is the last value per tuple self.assertNotIn(nodescription, [r[-1] for r in result]) def test_if_a_feature_is_excluded_all_results_doesnt_include_it(self): # This means: if a Feature evaluated fine for some samples until it was # excluded, once we decided to exclude it, we must make sure that # previous samples for which this feature was evaluated, are now # striped out of those evaluations self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA]) self.ev.FEATURE_STRICT_UNTIL = 0 self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated result = self.ev.fit_transform(SAMPLES + [{'nodescription': u'tada!'}]) # Check that there are results. Otherwise, next loop is dumb self.assertTrue(result) for r in result: self.assertEqual(len(r), 1) # only one value per sample self.assertEqual(r[0], 'a') # Remember DumbFeatureA returns 'a' def test_when_feature_is_excluded_discarded_samples_are_reevaluated(self): self.ev = TolerantFeatureEvaluator([DescriptionFeature, DumbFeatureA, EntireSampleFeature]) self.ev.FEATURE_MAX_ERRORS_ALLOWED = 0 # No feature failure tolerated samples = SAMPLES[:] nodescription = {'nodescription': u'this sample has no description'} samples.append(nodescription) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(samples), len(result)) # EntireSampleFeature is the last, so is the last value per tuple self.assertIn(nodescription, [r[-1] for r in result]) def test_consumable_is_consumed_only_once(self): samples = (s for s in SAMPLES) # can be consumed once only self.ev = TolerantFeatureEvaluator([EntireSampleFeature]) result = list(self.ev.fit_transform(samples)) self.assertEqual(len(SAMPLES), len(result))