Exemplo n.º 1
0
 def test_creation_bad(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         # Should fail because wrong tensor definition. It does not match the numpy list
         with self.assertRaises(pt.PyTorchTrainException):
             _ = pt.ClassSampler(tdb, npl)
Exemplo n.º 2
0
 def test_creation_bad(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         # Try building off of the wrong tensor definition
         with self.assertRaises(pt.PyTorchTrainException):
             _ = pt.NumpyListDataSet(tdb, npl)
Exemplo n.º 3
0
 def test_creation_bad(self):
     name_t1 = 'test-tensor-1'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT)
     f2 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f1)
     t1 = ft.TensorDefinition(name_t1, [f1, f2])
     name_t2 = 'test-tensor-2'
     f3 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT)
     f4 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     f5 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f3)
     t2 = ft.TensorDefinition(name_t2, [f3, f4, f5])
     # 2 TensorDefinitions with labels
     with self.assertRaises(ft.TensorDefinitionException):
         _ = ft.TensorDefinitionMulti([t1, t2])
Exemplo n.º 4
0
 def test_creation_base(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         cs = pt.ClassSampler(tdd, npl)
         self.assertIsInstance(cs, pt.ClassSampler, f'Was expecting ClassSampler type {type(cs)}')
         sm = cs.over_sampler(replacement=False)
         self.assertIsInstance(sm, data.WeightedRandomSampler, f'Was expecting Weighted Random Sampler {type(sm)}')
         self.assertEqual(len(sm), len(npl), f'Length not correct {len(sm)}')
         self.assertListEqual(sorted(list(sm)), list(range(len(npl))), f'Each index should be in the weight list')
Exemplo n.º 5
0
 def test_filtering(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1)
     f3 = ft.FeatureSource('test-feature-3', ft.FEATURE_TYPE_STRING)
     f4 = ft.FeatureOneHot('test-feature-4', ft.FEATURE_TYPE_INT_8, f3)
     f5 = ft.FeatureSource('test-feature-5', ft.FEATURE_TYPE_FLOAT)
     f6 = ft.FeatureNormalizeScale('test-feature-6', ft.FEATURE_TYPE_FLOAT,
                                   f5)
     f7 = ft.FeatureNormalizeStandard('test-feature-7',
                                      ft.FEATURE_TYPE_FLOAT, f5)
     f8 = ft.FeatureLabelBinary('test-feature-8', ft.FEATURE_TYPE_INT_8, f2)
     t = ft.TensorDefinition(name_t, [f1, f2, f3, f4, f5, f6, f7, f8])
     self.assertEqual(
         len(t.learning_categories), 4,
         f'Should be 4 categories. Got {len(t.learning_categories)}')
     self.assertListEqual(t.categorical_features(), [f2])
     self.assertListEqual(t.binary_features(), [f4])
     self.assertListEqual(t.continuous_features(), [f5, f6, f7])
     self.assertListEqual(t.label_features(), [f8])
     # Should fail because the Tensor Definition is ready for inference.
     with self.assertRaises(ft.TensorDefinitionException):
         t.categorical_features(True)
         t.binary_features(True)
         t.continuous_features(True)
         t.label_features(True)
         t.filter_features(ft.LEARNING_CATEGORY_CATEGORICAL, True)
Exemplo n.º 6
0
 def test_len(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     t = ft.TensorDefinition(name_t, [f1, f2])
     self.assertEqual(
         len(t), len([f1, f2]),
         f'Tensor definition length not working. Got {len(t)}')
Exemplo n.º 7
0
 def test_remove(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     t = ft.TensorDefinition(name_t, [f1, f2])
     t.remove(f2)
     self.assertNotIn(f2, t.features,
                      f'Tensor Definition Feature Removal failed')
Exemplo n.º 8
0
 def test_overlap_base_feature(self):
     # Should fail because the base feature is shared
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1)
     f3 = ft.FeatureOneHot('test-feature-3', ft.FEATURE_TYPE_INT_8, f1)
     with self.assertRaises(ft.TensorDefinitionException):
         _ = ft.TensorDefinition(name_t, [f1, f2, f3])
Exemplo n.º 9
0
 def test_creation_data_loader(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     bs = 3
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         ds = pt.NumpyListDataSet(tdd, npl)
         dl = ds.data_loader(torch.device('cpu'), bs)
         t = next(iter(dl))
         self.assertEqual(len(t), len(tdd.learning_categories))
         # Test data types.
         for i, d in enumerate(d373c7.pytorch.data._DTypeHelper.get_dtypes(tdd)):
             self.assertEqual(t[i].dtype, d, f'Default data types don not match {i}, expected {d}')
         # Check batch-size
         for i, te in enumerate(t):
             self.assertEqual(te.shape[0], bs, f'Batch size does not match item {i}. Got {te.shape[0]}')
Exemplo n.º 10
0
 def test_creation(self):
     name_t1 = 'test-tensor-1'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     t1 = ft.TensorDefinition(name_t1, [f1, f2])
     name_t2 = 'test-tensor-2'
     f3 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_FLOAT)
     f4 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     f5 = ft.FeatureLabelBinary('test-feature-3', ft.FEATURE_TYPE_INT_8, f3)
     t2 = ft.TensorDefinition(name_t2, [f3, f4, f5])
     t3 = ft.TensorDefinitionMulti([t1, t2])
     self.assertIsInstance(t3, ft.TensorDefinitionMulti,
                           f'Creation failed. Not correct type {type(t3)}')
     t4, t5 = t3.tensor_definitions
     self.assertEqual(
         t1, t4, f'First Tensor Def don not match {t1.name} {t4.name}')
     self.assertEqual(
         t2, t5, f'Second Tensor Def don not match {t1.name} {t5.name}')
     self.assertEqual(t3.label_tensor_definition, t2,
                      f'That is not the tensor def with the label')
Exemplo n.º 11
0
 def test_highest_precision(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureSource('test-feature-4', ft.FEATURE_TYPE_FLOAT)
     f3 = ft.FeatureIndex('test-feature-2', ft.FEATURE_TYPE_INT_8, f1)
     t = ft.TensorDefinition(name_t, [f1, f2, f3])
     self.assertEqual(t.highest_precision_feature, f2,
                      f'Wrong HP feature {t.highest_precision_feature}')
     t.remove(f2)
     t.remove(f3)
     with self.assertRaises(ft.TensorDefinitionException):
         _ = t.highest_precision_feature
Exemplo n.º 12
0
 def test_filter(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     fa = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT_32)
     ff = ft.FeatureSource('Fraud', ft.FEATURE_TYPE_FLOAT_32)
     fl = ft.FeatureLabelBinary('Fraud_Label', ft.FEATURE_TYPE_INT_8, ff)
     tb = ft.TensorDefinition('base-features', [fa, ff])
     td = ft.TensorDefinition('derived-features', [fa, fl])
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tb, file, inference=False)
         df = e.from_df(td, df, tb, inference=False)
         nl = e.to_numpy_list(td, df)
     rows = df[df['Fraud_Label'] == 0].index
     amounts = df[df['Fraud_Label'] == 0]['Amount']
     r = nl.filter_label(td, 0)
     self.assertEqual(
         len(rows), len(r),
         f'Lengths do not match. Got {len(rows)}. Expected {len(r)}')
     self.assertNotIn(
         1, list(r.lists[1]),
         f'There should not have been "1"/Fraud entries entries')
     self.assertEqual(list(amounts), list(r.lists[0]),
                      'Amounts do not seem to be filtered')
Exemplo n.º 13
0
 def test_creation_base(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         ds = pt.NumpyListDataSet(tdd, npl)
         self.assertEqual(len(ds), len(npl), f'Length of DS is wrong. Got {len(ds)}. Expected {len(npl)}')
         t = ds[0]
         self.assertIsInstance(t, list, f'__get_item__ should have returned a list')
         self.assertIsInstance(t[0], torch.Tensor, f'__get_item__ should have returned a list of Tensors')
         self.assertEqual(len(t), len(tdd.learning_categories), f'Number of list must be number of Learning cats')
         # Test Shapes
         for n, t in zip(npl.lists, ds[0]):
             ns = n.shape[1] if len(n.shape) > 1 else 0
             ts = 0 if len(list(t.shape)) == 0 else list(t.shape)[0]
             self.assertEqual(ns, ts)
         # Test data types.
         for i, d in enumerate(d373c7.pytorch.data._DTypeHelper.get_dtypes(tdd)):
             self.assertEqual(ds[0][i].dtype, d, f'Default data types don not match {i}, expected {d}')
Exemplo n.º 14
0
 def test_creation(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     f2 = ft.FeatureSource('test-feature-2', ft.FEATURE_TYPE_STRING)
     t = ft.TensorDefinition(name_t, [f1, f2])
     self.assertIsInstance(t, ft.TensorDefinition,
                           f'TensorDefinition creation failed')
     self.assertEqual(t.name, name_t,
                      f'Tensor Definition name not correct. Got {name_t}')
     self.assertListEqual([f1, f2], t.features,
                          f'Tensor def feature list incorrect {t.features}')
     self.assertEqual(
         t.inference_ready, True,
         f'Tensor should ready for inference, feature have no inf attributes'
     )
     with self.assertRaises(ft.TensorDefinitionException):
         _ = t.rank
Exemplo n.º 15
0
 def test_duplicate_bad(self):
     name_t = 'test-tensor'
     f1 = ft.FeatureSource('test-feature-1', ft.FEATURE_TYPE_STRING)
     with self.assertRaises(ft.TensorDefinitionException):
         _ = ft.TensorDefinition(name_t, [f1, f1])