Exemplo n.º 1
0
class TestData(unittest.TestCase):

    def setUp(self):
        self.y = np.array([1, 2, 3, 4, 5])
        self.X = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 1, 1, 1], "c": [1, 2, 3, 3, 4]})
        self.data = Data(self.X, self.y, normalize=True)

    def test_unnormalization(self):
        self.assertListEqual(list(self.data.unnormalized_y), list(self.y))
        self.assertListEqual(list(self.data.unnormalize_y(np.array([0, 0.25, 0.5, 0.75]))), [3, 4, 5, 6])

    def test_unique_proportion_of_value_in_variable(self):
        self.assertEqual(self.data.proportion_of_value_in_variable(0, 1), 0.2)

    def test_non_unique_proportion_of_value_in_variable(self):
        self.assertEqual(self.data.proportion_of_value_in_variable(2, 1), 0.2)
        self.assertEqual(self.data.proportion_of_value_in_variable(2, 3), 0.4)

    def test_unique_columns(self):
        self.assertEqual(self.data.unique_columns, [0])

    def test_covariates_stored_as_matrix(self):
        self.assertEqual(type(self.data.X), np.ndarray)

    def test_is_not_constant(self):
        self.assertTrue(is_not_constant(np.array([1, 1, 2, 3])))
        self.assertFalse(is_not_constant(np.array([1, 1, 1, 1])))

    def test_n_obsv(self):
        self.assertEqual(self.data.n_obsv, 5)

    def test_normalization(self):
        self.assertEqual(-0.5, self.data.y.min())
        self.assertEqual(0.5, self.data.y.max())

    def test_splittable_variables(self):
        self.assertListEqual(list(self.data.splittable_variables()), [0, 2])

    def test_random_splittable_value(self):
        for a in range(10000):
            self.assertIn(self.data.random_splittable_value(0), [1, 2, 3, 4])
        self.assertIsNone(self.data.random_splittable_value(1))

    def test_random_splittable_variable(self):
        for a in range(100):
            self.assertIn(self.data.random_splittable_variable(), [0, 2])
        self.filtered_data = Data(self.data.X[:,[1]], self.data.y)
        with self.assertRaises(NoSplittableVariableException):
            self.filtered_data.random_splittable_variable()

    def test_n_splittable_variables(self):
        self.assertEqual(self.data.n_splittable_variables, 2)

    def test_variables(self):
        self.assertEqual(self.data.variables, [0, 1, 2])
Exemplo n.º 2
0
class TestDataNormalization(unittest.TestCase):
    def setUp(self):
        self.y = np.array([1, 2, 3, 4, 5])
        self.X = pd.DataFrame({
            "a": [1, 2, 3, 4, 5],
            "b": [1, 1, 1, 1, 1],
            "c": [1, 2, 3, 3, 4]
        })
        self.X = format_covariate_matrix(self.X)
        self.data = Data(self.X, self.y, normalize=True)

    def test_unnormalization(self):
        self.assertListEqual(list(self.data.unnormalized_y), list(self.y))
        self.assertListEqual(
            list(self.data.unnormalize_y(np.array([0, 0.25, 0.5, 0.75]))),
            [3, 4, 5, 6])

    def test_normalization(self):
        self.assertEqual(-0.5, self.data.y.min())
        self.assertEqual(0.5, self.data.y.max())