def test_score_with_nans_ignore(self): obs = deepcopy(self.observations) obs[0]['feautre_a'] = np.nan to_be_scored = { 'feature_a': np.nan, 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'} # score observation with nan value self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(self.observations) self.cbrw.fit() score = self.cbrw.score(to_be_scored) actual_score = self.cbrw.score(actually_scored) self.assertFalse(isnan(score[0])) self.assertEqual(score, actual_score) # fit includes observation with nan value self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(obs) self.cbrw.fit() score = self.cbrw.score(to_be_scored) actual_score = self.cbrw.score(actually_scored) self.assertFalse(isnan(score[0])) self.assertEqual(score, actual_score)
def test_score_unknown_features_default(self): self.cbrw.fit() # score observation where all features but not all # values have been previously observed to_be_scored = { 'feature_a': 'a_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } score = self.cbrw.score(to_be_scored) self.assertTrue(isnan(score[0])) # score observation where a feature has not # been previously observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } score = self.cbrw.score(to_be_scored) self.assertTrue(isnan(score[0])) # score valid and invalid observations in one call to_be_scored = [ self.observations[0], {'feature_x': 'x_val_x', 'feature_b': 'b_val_1'} ] scores = self.cbrw.score(to_be_scored) valid_score = scores[0] invalid_score = scores[1] self.assertFalse(isnan(valid_score)) self.assertGreaterEqual(valid_score, 0) self.assertLessEqual(valid_score, 1) self.assertTrue(isnan(invalid_score))
def test_value_scores_unknown_features_ignore(self): self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(self.observations) self.cbrw.fit() # score observation where all features but not all # values have been previously observed to_be_scored = { 'feature_a': 'a_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'} value_scores = self.cbrw.value_scores(to_be_scored)[0] actual_value_scores = self.cbrw.value_scores(actually_scored)[0] self.assertTrue(all(not isnan(vs) for vs in value_scores.values())) self.assertEqual(value_scores['feature_a'], 0) self.assertEqual(value_scores['feature_b'], actual_value_scores['feature_b']) self.assertEqual(value_scores['feature_c'], actual_value_scores['feature_c']) # score observation where a feature has not # been previously observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'} value_scores = self.cbrw.value_scores(to_be_scored)[0] actual_value_scores = self.cbrw.value_scores(actually_scored)[0] self.assertTrue(all(not isnan(vs) for vs in value_scores.values())) self.assertEqual(value_scores['feature_x'], 0) self.assertEqual(value_scores['feature_b'], actual_value_scores['feature_b']) self.assertEqual(value_scores['feature_c'], actual_value_scores['feature_c']) # score observation where no features have # previously been observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_y': 'y_val_1', 'feature_z': 'z_val_1' } value_scores = self.cbrw.value_scores(to_be_scored)[0] self.assertTrue(all(not isnan(vs) for vs in value_scores.values())) self.assertTrue(all(vs == 0 for vs in value_scores.values()))
def test_isnan(self): table = { 'numpy nan': { 'test': np.nan, 'expected': True }, 'float nan': { 'test': float('nan'), 'expected': True }, 'int zero': { 'test': 0, 'expected': False }, 'float zero': { 'test': 0.0, 'expected': False }, 'int nonzero': { 'test': 456, 'expected': False }, 'float nonzero': { 'test': 10.123, 'expected': False }, 'string': { 'test': 'nan', 'expected': False }, } for test_name, test in table.items(): isnan_result = cnt.isnan(test['test']) self.assertEqual(isnan_result, test['expected'], test_name)
class TestObservationCounterWithMissingData(unittest.TestCase): """ Unit tests for ObservationCounter """ observations = [{ 'feature_a': 'a_val_1', 'feature_c': 'c_val_2', 'feature_d': np.nan }, { 'feature_b': 'b_val_1', 'feature_a': 'a_val_1', 'feature_c': np.nan }] # keep a set of all feature_name, feature_val pairs for testing all_index_keys = set() for observation in observations: for item in observation.items(): if not cnt.isnan(cnt.get_feature_value(item)): all_index_keys.add(item) def setUp(self): self.oc = cnt.ObservationCounter() self.oc.update(self.observations) def test_update(self): # test n_obs expected_counts = {'feature_a': 2, 'feature_b': 1, 'feature_c': 1} for feature_name, count in self.oc.n_obs.items(): self.assertEqual(count, expected_counts[feature_name]) # test index self.assertSetEqual(set(self.oc.index.keys()), self.all_index_keys) # test counts table = { 'feature_a': { 'expected': [(('feature_a', 'a_val_1'), 2)] }, 'feature_b': { 'expected': [(('feature_b', 'b_val_1'), 1)] }, 'feature_c': { 'expected': [(('feature_c', 'c_val_2'), 1)] }, 'feature_d': { 'expected': [] } } for feature, test in table.items(): counts = self.oc.counts.get(feature, {}) self.assertCountEqual(counts.items(), test['expected'], feature) # test joint_counts expected_joint_counts = { (('feature_a', 'a_val_1'), ('feature_b', 'b_val_1')): 1, (('feature_a', 'a_val_1'), ('feature_c', 'c_val_2')): 1, } self.assertDictEqual(self.oc.joint_counts, expected_joint_counts)
def test_score_unknown_features_ignore(self): self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(self.observations) self.cbrw.fit() # score observation where all features but not all # values have been previously observed to_be_scored = { 'feature_a': 'a_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = { 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } score = self.cbrw.score(to_be_scored) actual_score = self.cbrw.score(actually_scored) self.assertFalse(isnan(score[0])) self.assertEqual(score, actual_score) # score observation where a feature has not # been previously observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = { 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } score = self.cbrw.score(to_be_scored) actual_score = self.cbrw.score(actually_scored) self.assertFalse(isnan(score[0])) self.assertEqual(score, actual_score) # score observation where all features not # previously observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_y': 'y_val_1', 'feature_z': 'z_val_1' } score = self.cbrw.score(to_be_scored) self.assertFalse(isnan(score[0])) self.assertEqual(score[0], 0)
def test_value_scores_with_nans_default(self): obs = deepcopy(self.observations) obs[0]['feautre_a'] = np.nan to_be_scored = { 'feature_a': np.nan, 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } # score observation with nan value self.cbrw.fit() value_scores = self.cbrw.value_scores(to_be_scored) self.assertTrue(isnan(value_scores[0]['feature_a'])) # fit includes observation with nan value self.cbrw = CBRW() self.cbrw.add_observations(obs) self.cbrw.fit() value_scores = self.cbrw.value_scores(to_be_scored) self.assertTrue(isnan(value_scores[0]['feature_a']))
def test_value_scores_with_nans_ignore(self): obs = deepcopy(self.observations) obs[0]['feautre_a'] = np.nan to_be_scored = { 'feature_a': np.nan, 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'} # score observation with nan value self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(self.observations) self.cbrw.fit() value_scores = self.cbrw.value_scores(to_be_scored)[0] actual_value_scores = self.cbrw.value_scores(actually_scored)[0] self.assertTrue(all(not isnan(vs) for vs in value_scores.values())) self.assertEqual(value_scores['feature_a'], 0) self.assertEqual(value_scores['feature_b'], actual_value_scores['feature_b']) self.assertEqual(value_scores['feature_c'], actual_value_scores['feature_c']) # fit includes observation with nan value self.cbrw = CBRW(ignore_unknown=True) self.cbrw.add_observations(obs) self.cbrw.fit() value_scores = self.cbrw.value_scores(to_be_scored)[0] actual_value_scores = self.cbrw.value_scores(actually_scored)[0] self.assertTrue(all(not isnan(vs) for vs in value_scores.values())) self.assertEqual(value_scores['feature_a'], 0) self.assertEqual(value_scores['feature_b'], actual_value_scores['feature_b']) self.assertEqual(value_scores['feature_c'], actual_value_scores['feature_c'])
def test_value_scores_unknown_features_default(self): self.cbrw.fit() # score observation where all features but not all # values have been previously observed to_be_scored = { 'feature_a': 'a_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } value_scores = self.cbrw.value_scores(to_be_scored) value_scores = value_scores[0] self.assertTrue(isnan(value_scores['feature_a'])) self.assertFalse(isnan(value_scores['feature_b'])) self.assertFalse(isnan(value_scores['feature_c'])) # score observation where a feature has not # been previously observed to_be_scored = { 'feature_x': 'x_val_x', 'feature_b': 'b_val_1', 'feature_c': 'c_val_1' } value_scores = self.cbrw.value_scores(to_be_scored) value_scores = value_scores[0] self.assertTrue(isnan(value_scores['feature_x'])) self.assertFalse(isnan(value_scores['feature_b'])) self.assertFalse(isnan(value_scores['feature_c'])) # score valid and invalid observations in one call to_be_scored = [ self.observations[0], { 'feature_x': 'x_val_x', 'feature_b': 'b_val_1' } ] value_scores = self.cbrw.value_scores(to_be_scored) valid_scores = value_scores[0] invalid_scores = value_scores[1] self.assertTrue( all(not isnan(valid_score) for valid_score in valid_scores.values())) self.assertTrue( any( isnan(invalid_score) for invalid_score in invalid_scores.values()))