def test_filtering(self): name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1) f3 = ft.FeatureSource('test-feature-3', ft.FEATURE_TYPE_STRING) f4 = ft.FeatureOneHot('test-feature-4', ft.FEATURE_TYPE_INT_8, f3) f5 = ft.FeatureSource('test-feature-5', ft.FEATURE_TYPE_FLOAT) f6 = ft.FeatureNormalizeScale('test-feature-6', ft.FEATURE_TYPE_FLOAT, f5) f7 = ft.FeatureNormalizeStandard('test-feature-7', ft.FEATURE_TYPE_FLOAT, f5) f8 = ft.FeatureLabelBinary('test-feature-8', ft.FEATURE_TYPE_INT_8, f2) t = ft.TensorDefinition(name_t, [f1, f2, f3, f4, f5, f6, f7, f8]) self.assertEqual( len(t.learning_categories), 4, f'Should be 4 categories. Got {len(t.learning_categories)}') self.assertListEqual(t.categorical_features(), [f2]) self.assertListEqual(t.binary_features(), [f4]) self.assertListEqual(t.continuous_features(), [f5, f6, f7]) self.assertListEqual(t.label_features(), [f8]) # Should fail because the Tensor Definition is ready for inference. with self.assertRaises(ft.TensorDefinitionException): t.categorical_features(True) t.binary_features(True) t.continuous_features(True) t.label_features(True) t.filter_features(ft.LEARNING_CATEGORY_CATEGORICAL, True)
def test_types(self): f = ft.FeatureSource('Source', ft.FEATURE_TYPE_STRING) self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeString), True, f'Should have been a StringType') f = ft.FeatureSource('Source', ft.FEATURE_TYPE_FLOAT) self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeFloat), True, f'Should have been a FloatType') self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeNumerical), True, f'Should have been a NumericalType') f = ft.FeatureSource('Source', ft.FEATURE_TYPE_INTEGER) self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeInteger), True, f'Should have been an IntegerType') self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeNumerical), True, f'Should have been a NumericalType') f = ft.FeatureSource('Source', ft.FEATURE_TYPE_BOOL) self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeBool), True, f'Should have been a BoolType') f = ft.FeatureSource('Source', ft.FEATURE_TYPE_DATE) self.assertEqual( ft.FeatureHelper.is_feature_of_type(f, ft.FeatureTypeTimeBased), True, f'Should have been a TimeBaseType')
def test_creation_base(self): name = 'test' f_type = ft.FEATURE_TYPE_FLOAT fa = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT) fs = ft.FeatureSource('Source', ft.FEATURE_TYPE_STRING) ff = ft.FeatureFilter('Filter', ft.FEATURE_TYPE_BOOL, feature_expression, [fs]) tp = ft.TIME_PERIOD_DAY tw = 3 ag = ft.AGGREGATOR_COUNT f = ft.FeatureGrouper(name, f_type, fa, fs, ff, tp, tw, ag) self.assertIsInstance(f, ft.FeatureGrouper, f'Unexpected Type {type(f)}') self.assertEqual(f.name, name, f'Feature Name should be {name}') self.assertEqual(f.type, f_type, f'Feature Type should be {f_type}') self.assertEqual(f.group_feature, fs, f'Group should have been {fs}') self.assertEqual(f.filter_feature, ff, f'Filter should have been {ff}') self.assertEqual(f.time_period, tp, f'TimePeriod should have been {tp}') self.assertEqual(f.time_window, tw, f'TimeWindow should have been {tw}') self.assertEqual(f.aggregator, ag, f'Aggregator should have been {ag}') self.assertEqual(len(f.embedded_features), 3, 'Should have had 3 embedded features') self.assertEqual(f.learning_category, ft.LEARNING_CATEGORY_CONTINUOUS, f'String should have learning type cont')
def test_equality(self): name_1 = 'test_1' name_2 = 'test_2' f_type_1 = ft.FEATURE_TYPE_FLOAT_64 f_type_2 = ft.FEATURE_TYPE_FLOAT_32 fa_1 = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT) fa_2 = ft.FeatureSource('Amount2', ft.FEATURE_TYPE_FLOAT) fs_1 = ft.FeatureSource('Source', ft.FEATURE_TYPE_STRING) fs_2 = ft.FeatureSource('Source2', ft.FEATURE_TYPE_STRING) ff_1 = ft.FeatureFilter('Filter', ft.FEATURE_TYPE_BOOL, feature_expression, [fs_1]) ff_2 = ft.FeatureFilter('Filter2', ft.FEATURE_TYPE_BOOL, feature_expression, [fs_2]) tp_1 = ft.TIME_PERIOD_DAY tp_2 = ft.TIME_PERIOD_WEEK tw_1 = 3 tw_2 = 4 ag_1 = ft.AGGREGATOR_COUNT ag_2 = ft.AGGREGATOR_STDDEV fg_1 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_1, tp_1, tw_1, ag_1) fg_9 = ft.FeatureGrouper(name_2, f_type_1, fa_1, fs_1, ff_1, tp_1, tw_1, ag_2) fg_2 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_1, tp_1, tw_1, ag_1) fg_3 = ft.FeatureGrouper(name_1, f_type_2, fa_1, fs_1, ff_1, tp_1, tw_1, ag_1) fg_4 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_2, ff_1, tp_1, tw_1, ag_1) fg_5 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_2, tp_1, tw_1, ag_1) fg_6 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_1, tp_2, tw_1, ag_1) fg_7 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_1, tp_1, tw_2, ag_1) fg_8 = ft.FeatureGrouper(name_1, f_type_1, fa_1, fs_1, ff_1, tp_1, tw_1, ag_2) fg_10 = ft.FeatureGrouper(name_1, f_type_1, fa_2, fs_1, ff_1, tp_1, tw_1, ag_1) self.assertEqual(fg_1, fg_2, f'Should have been equal') self.assertNotEqual(fg_1, fg_9, f'Should not have been equal. Different Name') self.assertNotEqual(fg_1, fg_3, f'Should have been not equal. Different Type') self.assertNotEqual( fg_1, fg_10, f'Should have been not equal. Different Base Feature') self.assertNotEqual( fg_1, fg_4, f'Should not have been equal. Different Group Feature') self.assertNotEqual( fg_1, fg_5, f'Should not have been equal. Different Filter Feature') self.assertNotEqual( fg_1, fg_6, f'Should not have been equal. Different Time Period') self.assertNotEqual( fg_1, fg_7, f'Should not have been equal. Different Time Window') self.assertNotEqual( fg_1, fg_8, f'Should not have been equal. Different Aggregator')
def test_remove(self): name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) t = ft.TensorDefinition(name_t, [f1, f2]) t.remove(f2) self.assertNotIn(f2, t.features, f'Tensor Definition Feature Removal failed')
def test_len(self): name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) t = ft.TensorDefinition(name_t, [f1, f2]) self.assertEqual( len(t), len([f1, f2]), f'Tensor definition length not working. Got {len(t)}')
def test_creation_non_float(self): name = 'standard' f_type_str = ft.FEATURE_TYPE_STRING f_type_flt = ft.FEATURE_TYPE_FLOAT sf_flt = ft.FeatureSource('Source', f_type_flt) sf_str = ft.FeatureSource('Source', f_type_str) with self.assertRaises(ft.FeatureDefinitionException): ft.FeatureNormalizeStandard(name, f_type_str, sf_flt) with self.assertRaises(ft.FeatureDefinitionException): ft.FeatureNormalizeStandard(name, f_type_flt, sf_str)
def test_highest_precision(self): name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureSource('test-feature-4', ft.FEATURE_TYPE_FLOAT) f3 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1) t = ft.TensorDefinition(name_t, [f1, f2, f3]) self.assertEqual(t.highest_precision_feature, f2, f'Wrong HP feature {t.highest_precision_feature}') t.remove(f2) t.remove(f3) with self.assertRaises(ft.TensorDefinitionException): _ = t.highest_precision_feature
def test_equality(self): name_1 = 'test_1' name_2 = 'test_2' f_type_1 = ft.FEATURE_TYPE_STRING f_type_2 = ft.FEATURE_TYPE_FLOAT f1 = ft.FeatureSource(name_1, f_type_1) f2 = ft.FeatureSource(name_1, f_type_1) f3 = ft.FeatureSource(name_2, f_type_1) f4 = ft.FeatureSource(name_1, f_type_2) self.assertEqual(f1, f2, f'Should have been equal') self.assertNotEqual(f1, f3, f'Should have been not equal') self.assertNotEqual(f1, f4, f'Should not have been equal. Different Type')
def test_creation_bad(self): name_t1 = 'test-tensor-1' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT) f2 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f1) t1 = ft.TensorDefinition(name_t1, [f1, f2]) name_t2 = 'test-tensor-2' f3 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT) f4 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) f5 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f3) t2 = ft.TensorDefinition(name_t2, [f3, f4, f5]) # 2 TensorDefinitions with labels with self.assertRaises(ft.TensorDefinitionException): _ = ft.TensorDefinitionMulti([t1, t2])
def test_creation_bad_lambda(self): name = 'expr' f_type = ft.FEATURE_TYPE_INT_16 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par = [sf] with self.assertRaises(ft.FeatureDefinitionException): _ = ft.FeatureExpressionSeries(name, f_type, lambda x: x + 1, par)
def creation_not_float_bad(self): name = 'test' f_type = ft.FEATURE_TYPE_STRING fa = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT) fs = ft.FeatureSource('Source', ft.FEATURE_TYPE_STRING) ff = ft.FeatureFilter('Filter', ft.FEATURE_TYPE_BOOL, feature_expression, [fs]) tp = ft.TIME_PERIOD_DAY tw = 3 ag = ft.AGGREGATOR_COUNT with self.assertRaises(TypeError): _ = ft.FeatureGrouper(name, f_type, fa, fs, ff, tp, tw, ag) with self.assertRaises(ft.FeatureDefinitionException): # base is not a float _ = ft.FeatureGrouper(name, ft.FEATURE_TYPE_FLOAT, fs, fs, ff, tp, tw, ag)
def test_creation_base(self): name = 'OneHot' sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_STRING) oh = ft.FeatureOneHot(name, ft.FEATURE_TYPE_INT_8, sf) self.assertIsInstance(oh, ft.FeatureOneHot, f'Not expected type {type(oh)}') self.assertEqual(oh.name, name, f'Feature Name should be {name}') self.assertEqual(oh.base_feature, sf, f'Base Feature not set correctly') self.assertEqual( len(oh.embedded_features), 1, f'Should only have 1 emb feature {len(oh.embedded_features)}') self.assertIn(sf, oh.embedded_features, 'Base Feature should be in emb feature list') self.assertEqual(oh.inference_ready, False, 'Should be not inference ready upon creation') self.assertIsNone(oh.expand_names, f'Expand Names should be None {oh.expand_names}') self.assertEqual(len(oh.expand()), 0, f'Expand should yields empty list') self.assertEqual(oh.type, ft.FEATURE_TYPE_INT_8, 'Must always be int-8 type. Smallest possible') self.assertEqual(oh.learning_category, ft.LEARNING_CATEGORY_BINARY, f'Must have learning category Binary') self.assertIsInstance(hash(oh), int, f'Hash function not working')
def test_bad_non_bool_type(self): name = 'filter' f_type = ft.FEATURE_TYPE_INT_8 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par = [sf] with self.assertRaises(ft.FeatureDefinitionException): _ = ft.FeatureFilter(name, f_type, feature_expression, par)
def test_creation_bad_type(self): name = 'Bin' nr_bin = 10 f_type = ft.FEATURE_TYPE_FLOAT sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_FLOAT) with self.assertRaises(ft.FeatureDefinitionException): _ = ft.FeatureBin(name, f_type, sf, nr_bin)
def test_creation_base_lambda(self): name = 'expr' f_type = ft.FEATURE_TYPE_INT_16 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par = [sf] ef = ft.FeatureExpression(name, f_type, lambda x: x + 1, par) self.assertEqual(ef.is_lambda, True, f'Should been lambda')
def test_overlap_base_feature(self): # Should fail because the base feature is shared name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1) f3 = ft.FeatureOneHot('test-feature-3', ft.FEATURE_TYPE_INT_8, f1) with self.assertRaises(ft.TensorDefinitionException): _ = ft.TensorDefinition(name_t, [f1, f2, f3])
def test_creation(self): name_t = 'test-tensor' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) t = ft.TensorDefinition(name_t, [f1, f2]) self.assertIsInstance(t, ft.TensorDefinition, f'TensorDefinition creation failed') self.assertEqual(t.name, name_t, f'Tensor Definition name not correct. Got {name_t}') self.assertListEqual([f1, f2], t.features, f'Tensor def feature list incorrect {t.features}') self.assertEqual( t.inference_ready, True, f'Tensor should ready for inference, feature have no inf attributes' ) with self.assertRaises(ft.TensorDefinitionException): _ = t.rank
def test_equality(self): s_name_1 = 's_test_1' s_name_2 = 's_test_2' l_name_1 = 'l_test_1' l_name_2 = 'l_test_2' f_type_1 = ft.FEATURE_TYPE_INT_16 f_type_2 = ft.FEATURE_TYPE_INT_8 fs1 = ft.FeatureSource(s_name_1, f_type_1) fs2 = ft.FeatureSource(s_name_2, f_type_2) fl1 = ft.FeatureLabelBinary(l_name_1, ft.FEATURE_TYPE_INT_8, fs1) fl2 = ft.FeatureLabelBinary(l_name_1, ft.FEATURE_TYPE_INT_8, fs1) fl3 = ft.FeatureLabelBinary(l_name_2, ft.FEATURE_TYPE_INT_8, fs1) fl4 = ft.FeatureLabelBinary(l_name_1, ft.FEATURE_TYPE_INT_8, fs2) fl5 = ft.FeatureLabelBinary(l_name_1, ft.FEATURE_TYPE_INT_8, fs1) self.assertEqual(fl1, fl2, f'Should have been equal') self.assertNotEqual(fl1, fl3, f'Should have been not equal') self.assertNotEqual(fl1, fl4, f'Should have been equal') self.assertEqual(fl1, fl5, f'Should have been equal')
def test_creation_bad_not_expression(self): name = 'expr' f_type = ft.FEATURE_TYPE_INT_16 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par = [sf] bad: Any = 'bad' # Not an expression with self.assertRaises(TypeError): _ = ft.FeatureExpression(name, f_type, bad, par)
class TestClassSampler(unittest.TestCase): """Class Sampler test cases """ fraud = ft.FeatureSource('Fraud', ft.FEATURE_TYPE_INT_8) s_features = [ ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT), ft.FeatureSource('Card', ft.FEATURE_TYPE_STRING), ft.FeatureSource('MCC', ft.FEATURE_TYPE_CATEGORICAL), ft.FeatureSource('Country', ft.FEATURE_TYPE_CATEGORICAL), fraud ] d_features = [ ft.FeatureNormalizeScale('Amount_Scale', ft.FEATURE_TYPE_FLOAT_32, s_features[0]), ft.FeatureOneHot('MCC_OH', s_features[2]), ft.FeatureIndex('Country_Index', ft.FEATURE_TYPE_INT_16, s_features[3]), ft.FeatureLabelBinary('Fraud', fraud) ] def test_creation_base(self): file = FILES_DIR + 'engine_test_base_comma.csv' tdb = ft.TensorDefinition('Base', self.s_features) tdd = ft.TensorDefinition('Derived', self.d_features) with en.EnginePandasNumpy() as e: df = e.from_csv(tdb, file, inference=False) df = e.from_df(tdd, df, inference=False) npl = e.to_numpy_list(tdd, df) cs = pt.ClassSampler(tdd, npl) self.assertIsInstance(cs, pt.ClassSampler, f'Was expecting ClassSampler type {type(cs)}') sm = cs.over_sampler(replacement=False) self.assertIsInstance(sm, data.WeightedRandomSampler, f'Was expecting Weighted Random Sampler {type(sm)}') self.assertEqual(len(sm), len(npl), f'Length not correct {len(sm)}') self.assertListEqual(sorted(list(sm)), list(range(len(npl))), f'Each index should be in the weight list') def test_creation_bad(self): file = FILES_DIR + 'engine_test_base_comma.csv' tdb = ft.TensorDefinition('Base', self.s_features) tdd = ft.TensorDefinition('Derived', self.d_features) with en.EnginePandasNumpy() as e: df = e.from_csv(tdb, file, inference=False) df = e.from_df(tdd, df, inference=False) npl = e.to_numpy_list(tdd, df) # Should fail because wrong tensor definition. It does not match the numpy list with self.assertRaises(pt.PyTorchTrainException): _ = pt.ClassSampler(tdb, npl)
def test_equality(self): s_name_1 = 's_test_1' s_name_2 = 's_test_2' i_name_1 = 'i_test_1' i_name_2 = 'i_test_2' f_type_1 = ft.FEATURE_TYPE_INT_16 f_type_2 = ft.FEATURE_TYPE_INT_8 fs1 = ft.FeatureSource(s_name_1, f_type_1) fs2 = ft.FeatureSource(s_name_2, f_type_1) fi1 = ft.FeatureIndex(i_name_1, f_type_1, fs1) fi2 = ft.FeatureIndex(i_name_1, f_type_1, fs1) fi3 = ft.FeatureIndex(i_name_2, f_type_1, fs1) fi4 = ft.FeatureIndex(i_name_1, f_type_1, fs2) fi5 = ft.FeatureIndex(i_name_1, f_type_2, fs1) self.assertEqual(fi1, fi2, f'Should have been equal') self.assertNotEqual(fi1, fi3, f'Should have been not equal') self.assertNotEqual( fi1, fi4, f'Should not have been equal. Different Base Feature') self.assertNotEqual(fi1, fi5, f'Should not have been equal. Different Type')
def test_creation(self): name_t1 = 'test-tensor-1' f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING) f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) t1 = ft.TensorDefinition(name_t1, [f1, f2]) name_t2 = 'test-tensor-2' f3 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT) f4 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING) f5 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f3) t2 = ft.TensorDefinition(name_t2, [f3, f4, f5]) t3 = ft.TensorDefinitionMulti([t1, t2]) self.assertIsInstance(t3, ft.TensorDefinitionMulti, f'Creation failed. Not correct type {type(t3)}') t4, t5 = t3.tensor_definitions self.assertEqual( t1, t4, f'First Tensor Def don not match {t1.name} {t4.name}') self.assertEqual( t2, t5, f'Second Tensor Def don not match {t1.name} {t5.name}') self.assertEqual(t3.label_tensor_definition, t2, f'That is not the tensor def with the label')
def test_equality(self): s_name_1 = 's_test_1' s_name_2 = 's_test_2' b_name_1 = 'b_test_1' b_name_2 = 'b_test_2' f_type_1 = ft.FEATURE_TYPE_FLOAT f_type_2 = ft.FEATURE_TYPE_INT_8 f_type_3 = ft.FEATURE_TYPE_INT_16 fs1 = ft.FeatureSource(s_name_1, f_type_1) fs2 = ft.FeatureSource(s_name_2, f_type_1) fb1 = ft.FeatureBin(b_name_1, f_type_2, fs1, 10) fb2 = ft.FeatureBin(b_name_1, f_type_2, fs1, 10) fb3 = ft.FeatureBin(b_name_2, f_type_2, fs1, 10) fb4 = ft.FeatureBin(b_name_1, f_type_2, fs2, 10) fb5 = ft.FeatureBin(b_name_1, f_type_3, fs1, 10) self.assertEqual(fb1, fb2, f'Should have been equal') self.assertNotEqual(fb1, fb3, f'Should have been not equal') self.assertNotEqual( fb1, fb4, f'Should not have been equal. Different Base Feature') self.assertNotEqual(fb1, fb5, f'Should not have been equal. Different Type')
def test_creation_w_default(self): name = 'test' f_type = ft.FEATURE_TYPE_STRING default = 'NA' f = ft.FeatureSource(name, f_type, default=default) self.assertIsInstance(f, ft.FeatureSource, f'Unexpected Type {type(f)}') self.assertEqual(f.name, name, f'Feature Name should be {name}') self.assertEqual(f.type, f_type, f'Feature Type should be {f_type}') self.assertEqual(f.default, default, f'Default should be {default}') self.assertIsNone(f.format_code, 'Should not have format code') self.assertEqual(len(f.embedded_features), 0, 'Should not have embedded features')
def test_filter(self): file = FILES_DIR + 'engine_test_base_comma.csv' fa = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT_32) ff = ft.FeatureSource('Fraud', ft.FEATURE_TYPE_FLOAT_32) fl = ft.FeatureLabelBinary('Fraud_Label', ft.FEATURE_TYPE_INT_8, ff) tb = ft.TensorDefinition('base-features', [fa, ff]) td = ft.TensorDefinition('derived-features', [fa, fl]) with en.EnginePandasNumpy() as e: df = e.from_csv(tb, file, inference=False) df = e.from_df(td, df, tb, inference=False) nl = e.to_numpy_list(td, df) rows = df[df['Fraud_Label'] == 0].index amounts = df[df['Fraud_Label'] == 0]['Amount'] r = nl.filter_label(td, 0) self.assertEqual( len(rows), len(r), f'Lengths do not match. Got {len(rows)}. Expected {len(r)}') self.assertNotIn( 1, list(r.lists[1]), f'There should not have been "1"/Fraud entries entries') self.assertEqual(list(amounts), list(r.lists[0]), 'Amounts do not seem to be filtered')
def test_creation_w_format_code(self): name = 'test' f_type = ft.FEATURE_TYPE_STRING code = 'anything' f = ft.FeatureSource(name, f_type, code) self.assertIsInstance(f, ft.FeatureSource, f'Unexpected Type {type(f)}') self.assertEqual(f.name, name, f'Feature Name should be {name}') self.assertEqual(f.type, f_type, f'Feature Type should be {f_type}') self.assertIsNone(f.default, 'Should not have a default') self.assertEqual(f.format_code, code, f'Format code should have been {code}') self.assertEqual(len(f.embedded_features), 0, 'Should not have embedded features')
def test_creation_base(self): name = 'test' f_type = ft.FEATURE_TYPE_STRING f = ft.FeatureSource(name, f_type) self.assertIsInstance(f, ft.FeatureSource, f'Unexpected Type {type(f)}') self.assertEqual(f.name, name, f'Feature Name should be {name}') self.assertEqual(f.type, f_type, f'Feature Type should be {f_type}') self.assertIsNone(f.default, 'Should not have a default') self.assertIsNone(f.format_code, 'Should not have format code') self.assertEqual(len(f.embedded_features), 0, 'Should not have embedded features') self.assertEqual(f.learning_category, ft.LEARNING_CATEGORY_NONE, f'String should have learning type NONE') self.assertIsInstance(hash(f), int, f'Hash function not working')
def test_creation_bad_param(self): name = 'expr' f_type = ft.FEATURE_TYPE_INT_16 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par_1: Any = '' # Param not a list with self.assertRaises(TypeError): _ = ft.FeatureExpression(name, f_type, feature_expression, par_1) par_2: Any = [''] # Not list with Feature objects with self.assertRaises(TypeError): _ = ft.FeatureExpression(name, f_type, feature_expression, par_2) par_3 = [sf, sf] # Incorrect Number of Parameters with self.assertRaises(ft.FeatureDefinitionException): _ = ft.FeatureExpression(name, f_type, feature_expression, par_3)
def test_learning_features(self): name = 'expr' f_type_1 = ft.FEATURE_TYPE_INT_16 sf = ft.FeatureSource('Source', ft.FEATURE_TYPE_INT_16) par = [sf] ef = ft.FeatureExpression(name, f_type_1, feature_expression, par) self.assertEqual(ef.learning_category, ft.LEARNING_CATEGORY_CATEGORICAL) f_type_2 = ft.FEATURE_TYPE_FLOAT ef = ft.FeatureExpression(name, f_type_2, feature_expression, par) self.assertEqual(ef.learning_category, ft.LEARNING_CATEGORY_CONTINUOUS) f_type_3 = ft.FEATURE_TYPE_BOOL ef = ft.FeatureExpression(name, f_type_3, feature_expression, par) self.assertEqual(ef.learning_category, ft.LEARNING_CATEGORY_BINARY) f_type_4 = ft.FEATURE_TYPE_STRING ef = ft.FeatureExpression(name, f_type_4, feature_expression, par) self.assertEqual(ef.learning_category, ft.LEARNING_CATEGORY_NONE)