def test_score_with_nans_ignore(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}

        # score observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # fit includes observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)
    def test_score_unknown_features_default(self):
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertTrue(isnan(score[0]))
        
        # score valid and invalid observations in one call
        to_be_scored = [
            self.observations[0],
            {'feature_x': 'x_val_x', 'feature_b': 'b_val_1'}
        ]
        scores = self.cbrw.score(to_be_scored)
        valid_score = scores[0]
        invalid_score = scores[1]
        self.assertFalse(isnan(valid_score))
        self.assertGreaterEqual(valid_score, 0)
        self.assertLessEqual(valid_score, 1)
        self.assertTrue(isnan(invalid_score))
    def test_value_scores_unknown_features_ignore(self):
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_x'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # score observation where no features have
        # previously been observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_y': 'y_val_1',
            'feature_z': 'z_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertTrue(all(vs == 0 for vs in value_scores.values()))
示例#4
0
 def test_isnan(self):
     table = {
         'numpy nan': {
             'test': np.nan,
             'expected': True
         },
         'float nan': {
             'test': float('nan'),
             'expected': True
         },
         'int zero': {
             'test': 0,
             'expected': False
         },
         'float zero': {
             'test': 0.0,
             'expected': False
         },
         'int nonzero': {
             'test': 456,
             'expected': False
         },
         'float nonzero': {
             'test': 10.123,
             'expected': False
         },
         'string': {
             'test': 'nan',
             'expected': False
         },
     }
     for test_name, test in table.items():
         isnan_result = cnt.isnan(test['test'])
         self.assertEqual(isnan_result, test['expected'], test_name)
示例#5
0
class TestObservationCounterWithMissingData(unittest.TestCase):
    """
    Unit tests for ObservationCounter
    """

    observations = [{
        'feature_a': 'a_val_1',
        'feature_c': 'c_val_2',
        'feature_d': np.nan
    }, {
        'feature_b': 'b_val_1',
        'feature_a': 'a_val_1',
        'feature_c': np.nan
    }]

    # keep a set of all feature_name, feature_val pairs for testing
    all_index_keys = set()
    for observation in observations:
        for item in observation.items():
            if not cnt.isnan(cnt.get_feature_value(item)):
                all_index_keys.add(item)

    def setUp(self):
        self.oc = cnt.ObservationCounter()
        self.oc.update(self.observations)

    def test_update(self):
        # test n_obs
        expected_counts = {'feature_a': 2, 'feature_b': 1, 'feature_c': 1}
        for feature_name, count in self.oc.n_obs.items():
            self.assertEqual(count, expected_counts[feature_name])

        # test index
        self.assertSetEqual(set(self.oc.index.keys()), self.all_index_keys)

        # test counts
        table = {
            'feature_a': {
                'expected': [(('feature_a', 'a_val_1'), 2)]
            },
            'feature_b': {
                'expected': [(('feature_b', 'b_val_1'), 1)]
            },
            'feature_c': {
                'expected': [(('feature_c', 'c_val_2'), 1)]
            },
            'feature_d': {
                'expected': []
            }
        }
        for feature, test in table.items():
            counts = self.oc.counts.get(feature, {})
            self.assertCountEqual(counts.items(), test['expected'], feature)

        # test joint_counts
        expected_joint_counts = {
            (('feature_a', 'a_val_1'), ('feature_b', 'b_val_1')): 1,
            (('feature_a', 'a_val_1'), ('feature_c', 'c_val_2')): 1,
        }
        self.assertDictEqual(self.oc.joint_counts, expected_joint_counts)
    def test_score_unknown_features_ignore(self):
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        actual_score = self.cbrw.score(actually_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score, actual_score)
        
        # score observation where all features not
        # previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_y': 'y_val_1',
            'feature_z': 'z_val_1'
        }
        score = self.cbrw.score(to_be_scored)
        self.assertFalse(isnan(score[0]))
        self.assertEqual(score[0], 0)
    def test_value_scores_with_nans_default(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }

        # score observation with nan value
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)
        self.assertTrue(isnan(value_scores[0]['feature_a']))

        # fit includes observation with nan value
        self.cbrw = CBRW()
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)
        self.assertTrue(isnan(value_scores[0]['feature_a']))
    def test_value_scores_with_nans_ignore(self):
        obs = deepcopy(self.observations)
        obs[0]['feautre_a'] = np.nan

        to_be_scored = {
            'feature_a': np.nan,
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        actually_scored = {'feature_b': 'b_val_1', 'feature_c': 'c_val_1'}

        # score observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(self.observations)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])

        # fit includes observation with nan value
        self.cbrw = CBRW(ignore_unknown=True)
        self.cbrw.add_observations(obs)
        self.cbrw.fit()
        value_scores = self.cbrw.value_scores(to_be_scored)[0]
        actual_value_scores = self.cbrw.value_scores(actually_scored)[0]
        self.assertTrue(all(not isnan(vs) for vs in value_scores.values()))
        self.assertEqual(value_scores['feature_a'], 0)
        self.assertEqual(value_scores['feature_b'],
                         actual_value_scores['feature_b'])
        self.assertEqual(value_scores['feature_c'],
                         actual_value_scores['feature_c'])
    def test_value_scores_unknown_features_default(self):
        self.cbrw.fit()

        # score observation where all features but not all
        # values have been previously observed
        to_be_scored = {
            'feature_a': 'a_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)
        value_scores = value_scores[0]
        self.assertTrue(isnan(value_scores['feature_a']))
        self.assertFalse(isnan(value_scores['feature_b']))
        self.assertFalse(isnan(value_scores['feature_c']))

        # score observation where a feature has not
        # been previously observed
        to_be_scored = {
            'feature_x': 'x_val_x',
            'feature_b': 'b_val_1',
            'feature_c': 'c_val_1'
        }
        value_scores = self.cbrw.value_scores(to_be_scored)
        value_scores = value_scores[0]
        self.assertTrue(isnan(value_scores['feature_x']))
        self.assertFalse(isnan(value_scores['feature_b']))
        self.assertFalse(isnan(value_scores['feature_c']))

        # score valid and invalid observations in one call
        to_be_scored = [
            self.observations[0], {
                'feature_x': 'x_val_x',
                'feature_b': 'b_val_1'
            }
        ]
        value_scores = self.cbrw.value_scores(to_be_scored)
        valid_scores = value_scores[0]
        invalid_scores = value_scores[1]
        self.assertTrue(
            all(not isnan(valid_score)
                for valid_score in valid_scores.values()))
        self.assertTrue(
            any(
                isnan(invalid_score)
                for invalid_score in invalid_scores.values()))