def test_mcmc(self): nlevels = 5 self.create_base_dataset( sample_size=1000, num_numeric=3, num_cat=4, num_levels=nlevels) ac = R.AutoClean() niter = 100 nclusters = 7 ac.train_model(self._data, nclusters=nclusters, niter=niter) beta = self._beta xdim = beta.shape[0] ydim = beta.shape[1] self.assertEqual(ac.coefficients.shape, (niter, xdim, ydim)) self.assertEqual(ac.residual_variance.shape, (niter, ydim, ydim)) self.assertEqual(len(ac.atom_probs), ydim) self.assertEqual(ac.atom_probs["X1"].shape, (niter, nclusters, 1)) self.assertTrue( np.allclose(ac.atom_probs["X1"].sum(axis=2), np.ones((niter, nclusters))) ) self.assertEqual(len(ac.atom_error_probs), ydim) self.assertEqual(ac.atom_error_probs["X1"].shape, (niter, nclusters, 1, 2)) self.assertTrue(np.allclose( ac.atom_error_probs["X1"].sum(axis=3), np.ones((niter, nclusters, 1)))) self.assertEqual(len(ac.level_probs), self._ncat) self.assertEqual(ac.level_probs["cat1"].shape, (niter, nclusters, nlevels)) self.assertTrue(np.allclose( ac.level_probs["cat1"].sum(axis=2), np.ones((niter, nclusters)))) self.assertEqual(len(ac.level_observation_probs), self._ncat) self.assertEqual(ac.level_observation_probs["cat1"].shape, (niter, nclusters, nlevels, nlevels+1)) self.assertTrue(np.allclose( ac.level_observation_probs["cat1"].sum(axis=3), np.ones((niter, nclusters, nlevels)))) to_impute = self._data.iloc[[3, 9, 24], :] iterations = [50, 60, 70, 75, 90] imputed = ac.impute_rows(to_impute, iterations) self.assertEqual(len(imputed), len(iterations)) self.assertEqual(imputed[0].shape, to_impute.shape)
def test_autoclean_construction(self): ac = R.AutoClean() ac.train_model(self._data, nclusters=3, niter=10)