Exemplo n.º 1
0
 def test_creation_bad(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         # Try building off of the wrong tensor definition
         with self.assertRaises(pt.PyTorchTrainException):
             _ = pt.NumpyListDataSet(tdb, npl)
Exemplo n.º 2
0
 def test_creation_bad(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         # Should fail because wrong tensor definition. It does not match the numpy list
         with self.assertRaises(pt.PyTorchTrainException):
             _ = pt.ClassSampler(tdb, npl)
Exemplo n.º 3
0
 def test_creation_base(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         cs = pt.ClassSampler(tdd, npl)
         self.assertIsInstance(cs, pt.ClassSampler, f'Was expecting ClassSampler type {type(cs)}')
         sm = cs.over_sampler(replacement=False)
         self.assertIsInstance(sm, data.WeightedRandomSampler, f'Was expecting Weighted Random Sampler {type(sm)}')
         self.assertEqual(len(sm), len(npl), f'Length not correct {len(sm)}')
         self.assertListEqual(sorted(list(sm)), list(range(len(npl))), f'Each index should be in the weight list')
Exemplo n.º 4
0
 def test_creation_data_loader(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     bs = 3
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         ds = pt.NumpyListDataSet(tdd, npl)
         dl = ds.data_loader(torch.device('cpu'), bs)
         t = next(iter(dl))
         self.assertEqual(len(t), len(tdd.learning_categories))
         # Test data types.
         for i, d in enumerate(d373c7.pytorch.data._DTypeHelper.get_dtypes(tdd)):
             self.assertEqual(t[i].dtype, d, f'Default data types don not match {i}, expected {d}')
         # Check batch-size
         for i, te in enumerate(t):
             self.assertEqual(te.shape[0], bs, f'Batch size does not match item {i}. Got {te.shape[0]}')
Exemplo n.º 5
0
 def test_filter(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     fa = ft.FeatureSource('Amount', ft.FEATURE_TYPE_FLOAT_32)
     ff = ft.FeatureSource('Fraud', ft.FEATURE_TYPE_FLOAT_32)
     fl = ft.FeatureLabelBinary('Fraud_Label', ft.FEATURE_TYPE_INT_8, ff)
     tb = ft.TensorDefinition('base-features', [fa, ff])
     td = ft.TensorDefinition('derived-features', [fa, fl])
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tb, file, inference=False)
         df = e.from_df(td, df, tb, inference=False)
         nl = e.to_numpy_list(td, df)
     rows = df[df['Fraud_Label'] == 0].index
     amounts = df[df['Fraud_Label'] == 0]['Amount']
     r = nl.filter_label(td, 0)
     self.assertEqual(
         len(rows), len(r),
         f'Lengths do not match. Got {len(rows)}. Expected {len(r)}')
     self.assertNotIn(
         1, list(r.lists[1]),
         f'There should not have been "1"/Fraud entries entries')
     self.assertEqual(list(amounts), list(r.lists[0]),
                      'Amounts do not seem to be filtered')
Exemplo n.º 6
0
 def test_creation_base(self):
     file = FILES_DIR + 'engine_test_base_comma.csv'
     tdb = ft.TensorDefinition('Base', self.s_features)
     tdd = ft.TensorDefinition('Derived', self.d_features)
     with en.EnginePandasNumpy() as e:
         df = e.from_csv(tdb, file, inference=False)
         df = e.from_df(tdd, df, inference=False)
         npl = e.to_numpy_list(tdd, df)
         ds = pt.NumpyListDataSet(tdd, npl)
         self.assertEqual(len(ds), len(npl), f'Length of DS is wrong. Got {len(ds)}. Expected {len(npl)}')
         t = ds[0]
         self.assertIsInstance(t, list, f'__get_item__ should have returned a list')
         self.assertIsInstance(t[0], torch.Tensor, f'__get_item__ should have returned a list of Tensors')
         self.assertEqual(len(t), len(tdd.learning_categories), f'Number of list must be number of Learning cats')
         # Test Shapes
         for n, t in zip(npl.lists, ds[0]):
             ns = n.shape[1] if len(n.shape) > 1 else 0
             ts = 0 if len(list(t.shape)) == 0 else list(t.shape)[0]
             self.assertEqual(ns, ts)
         # Test data types.
         for i, d in enumerate(d373c7.pytorch.data._DTypeHelper.get_dtypes(tdd)):
             self.assertEqual(ds[0][i].dtype, d, f'Default data types don not match {i}, expected {d}')