def test_normalize_dense_matrix_enum(self): normalization_parameters = { 'f1': NormalizationParameters(identify_types.ENUM, None, None, None, None, [12.0, 4.2, 2.1]), 'f2': NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1, None), 'f3': NormalizationParameters(identify_types.ENUM, None, None, None, None, [15.1, -3.2]) } features = list(normalization_parameters.keys()) norm_net = core.Net("net") blobname_template = '{}_blob' blob_map = prepare_normalization(norm_net, normalization_parameters, features, blobname_template, False) inputs = np.array( [[12.0, 1.0, 15.1], [4.2, 2.0, -3.2], [2.1, 3.0, 15.1], [2.1, 3.0, normalization.MISSING_VALUE]], dtype=np.float32) normalized_outputs = normalize_dense_matrix(inputs, features, normalization_parameters, blob_map, norm_net, blobname_template) np.testing.assert_array_equal( np.array([ [1, 0, 0, 1.0, 1, 0], [0, 1, 0, 2.0, 0, 1], [0, 0, 1, 3.0, 1, 0], [0, 0, 1, 3.0, 0, 0] # Missing values should go to all 0 ]), normalized_outputs)
def _prepare_state_normalization(self): """ Sets up operators for action normalization net. """ if self.skip_normalization: return self.state_norm_net = core.Net("state_norm_net") self.state_norm_blobname_template = '{}_input_state' self.state_norm_blobs = prepare_normalization( self.state_norm_net, self._state_normalization_parameters, self._state_features, self.state_norm_blobname_template, True)
def test_normalize_feature_map_enum(self): feature_name_1 = 'f1' feature_name_2 = 'f2' feature_name_3 = 'f3' normalization_parameters = { feature_name_1: NormalizationParameters(identify_types.ENUM, None, None, None, None, [12.0, 4.2, 2.1]), feature_name_2: NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1, None), feature_name_3: NormalizationParameters(identify_types.ENUM, None, None, None, None, [15.1, -3.2]) } feature_value_map = { feature_name_1: np.array([2.1, 4.2, 12.0, 12.0], dtype=np.float32), feature_name_2: np.array([1.9, 2.2, 5.0, 1.0], dtype=np.float32), feature_name_3: np.array([-3.2, -3.2, 15.1, normalization.MISSING_VALUE], dtype=np.float32) } features = list(feature_value_map.keys()) norm_net = core.Net("net") blobname_template = '{}_blob' blob_map = prepare_normalization(norm_net, normalization_parameters, features, blobname_template, False) normalized_features = normalize_feature_map(feature_value_map, norm_net, features, blob_map, blobname_template) for v in normalized_features.values(): self.assertTrue(np.all(np.isfinite(v))) np.testing.assert_array_equal( np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]]), normalized_features[feature_name_1]) np.testing.assert_array_equal( np.array([[1.9, 2.2, 5.0, 1.0]], dtype=np.float32), normalized_features[feature_name_2]) np.testing.assert_array_equal( np.array([ [0, 1], [0, 1], [1, 0], [0, 0] # Missing value should go to all 0 ]), normalized_features[feature_name_3])
def test_prepare_normalization_and_normalize(self): feature_value_map = preprocessing_util.read_data() types = identify_types.identify_types(feature_value_map) types_dict = identify_types.identify_types_dict(feature_value_map) normalization_parameters = normalization.identify_parameters( feature_value_map, types_dict) features = list(feature_value_map.keys()) norm_net = core.Net("net") blobname_template = '{}_blob' blob_map = prepare_normalization(norm_net, normalization_parameters, features, blobname_template, False) normalized_features = normalize_feature_map(feature_value_map, norm_net, features, blob_map, blobname_template) self.assertTrue( all([ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ])) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)))) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual(possible_value_map[original_feature], np.where(row == 1)[0][0]) else: one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.00001) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.00001) zero_mean = np.isclose(np.mean(v), 0, atol=0.00001) is_binary = types[k] == identify_types.BINARY self.assertTrue(np.all(np.logical_or(zero_mean, is_binary))) self.assertTrue( np.all( np.logical_or(np.logical_or(one_stddev, zero_stddev), is_binary))) has_boxcox = normalization_parameters[ k].boxcox_lambda is not None is_ctd = types[k] == identify_types.CONTINUOUS # This should be true at the moment self.assertTrue(is_ctd == has_boxcox)
def test_prepare_normalization_and_normalize(self): feature_value_map = preprocessing_util.read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( values, 10 ) for k, v in normalization_parameters.items(): if k == 'normal': self.assertEqual(v.feature_type, 'CONTINUOUS') self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif k == 'boxcox': self.assertEqual(v.feature_type, 'CONTINUOUS') self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: self.assertEqual(v.feature_type, k) features = list(feature_value_map.keys()) norm_net = core.Net("net") blobname_template = '{}_blob' blob_map = prepare_normalization( norm_net, normalization_parameters, features, blobname_template, False ) normalized_features = normalize_feature_map( feature_value_map, norm_net, features, blob_map, blobname_template ) self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and( np.greater(sigmoidv, 0), np.less(sigmoidv, 1) ) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0] ) elif feature_type == identify_types.QUANTILE: quantiles = normalization_parameters[k].quantiles for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] count = 0 for quantile in quantiles: if original_feature >= quantile: count += 1 count /= float(len(quantiles)) self.assertAlmostEqual(feature, count, 2) elif feature_type == identify_types.BINARY: pass elif feature_type == identify_types.CONTINUOUS: one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), 'mean of feature {} is {}, not 0'.format(k, np.mean(v)) ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) else: raise NotImplementedError()