示例#1
0
    def test_subsample_regression_4k(self):

        rows = 4000
        X = np.random.rand(rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(rows), name="target")

        gft3 = GoldenFeaturesTransformer(self.automl_dir, REGRESSION)
        X_train, X_test, y_train, y_test = gft3._subsample(X, y)

        self.assertTrue(X_train.shape[0], 2000)
        self.assertTrue(X_test.shape[0], 2000)
        self.assertTrue(y_train.shape[0], 2000)
        self.assertTrue(y_test.shape[0], 2000)
    def test_subsample_multiclass_4k(self):

        rows = 4000
        X = np.random.rand(rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.randint(0, 4, rows), name="target")

        gft3 = GoldenFeaturesTransformer(self.automl_dir, MULTICLASS_CLASSIFICATION)
        X_train, X_test, y_train, y_test = gft3._subsample(X, y)

        self.assertTrue(X_train.shape[0], 2000)
        self.assertTrue(X_test.shape[0], 2000)
        self.assertTrue(y_train.shape[0], 2000)
        self.assertTrue(y_test.shape[0], 2000)

        for uni in [np.unique(y_train), np.unique(y_test)]:
            for i in range(4):
                self.assertTrue(i in uni)