Пример #1
0
 def setUp(self):
     self.X, self.y = rand_df(shape=(100, 10))
     self.test_X = rand_df(shape=(100, 10), labeled=False)
     self.steps = [
         EmptyStep(),
         PearsonCorrStep(4),
         PCAStep(kwargs={'n_components': 2})
     ]
Пример #2
0
 def setUp(self):
     self.X = rand_df(shape=(100, 10), labeled=False)
     self.test_X = rand_df(shape=(100, 10), labeled=False)
     self.steps = [
         EmptyStep(),
         StandardScalerStep(append_input=True),
         PCAStep(kwargs={'n_components': 2})
     ]
class ListTests(unittest.TestCase, StepTest):
    step = ListSelectionStep(features=['11', '22'])
    X, y = rand_df()
    test_X = rand_df(labeled=False)

    # Tests that a key error is raised when the features do not exist
    def test_key_error(self):
        s = ListSelectionStep(features=['11', 'not_a_feature'])
        tr_X, tr_y = rand_df(shape=(10, 15))
        with self.assertRaises(KeyError):
            s.fit(tr_X, y=tr_y)
Пример #4
0
 def test_pipeline_step(self):
     tr_data_X = rand_df(shape=(100, 20), labeled=False)
     te_data = rand_df(shape=(100, 20), labeled=False)
     scale_step = StandardScalerStep()
     pca_step = PCAStep(append_input=False, kwargs={'n_components': 5})
     poly_step = PolyStep(kwargs={'degree': 3, 'include_bias': False})
     pipeline = Pipeline([
         scale_step,
         Pipeline([pca_step, poly_step], append_input=True),
         EmptyStep()
     ])
     r = pipeline.fit_transform(tr_data_X)
     self.assertEqual(type(r), pd.DataFrame)
     r = pipeline.transform(te_data)
     self.assertEqual(type(r), pd.DataFrame)
Пример #5
0
 def test_pipeline_step(self):
     tr_data_X, tr_data_y = rand_df_classification(shape=(100, 20),
                                                   classes=3)
     te_data = rand_df(shape=(100, 20), labeled=False)
     scale_step = StandardScalerStep()
     chi_step = ChiSqSelectionStep(select_kwargs={'k': 20})
     corr_step = PearsonCorrStep(num_features=0.1)
     pca_step = PCAStep(append_input=False, kwargs={'n_components': 5})
     poly_step = PolyStep(kwargs={'degree': 3, 'include_bias': False})
     pipeline = Pipeline([
         scale_step,
         Pipeline([pca_step, poly_step], append_input=True), chi_step
     ])
     r, _ = pipeline.fit_transform(tr_data_X, y=tr_data_y)
     self.assertEqual(type(r), pd.DataFrame)
     r = pipeline.transform(te_data)
     self.assertEqual(type(r), pd.DataFrame)
class ADASYNTests1(unittest.TestCase, StepTest):
    step = ADASYNStep(kwargs={'ratio': {0.0: 100, 1.0: 100}})
    X, y = rand_df_classification(val_range=(0, 100))
    test_X = rand_df(val_range=(0, 100))
class SinTests3(unittest.TestCase, StepTest):
    step = SinStep(append_input=True, columns=['1', '2', '3'])
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class SinTests1(unittest.TestCase, StepTest):
    step = SinStep()
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class PolyTests1(unittest.TestCase, StepTest):
    step = PolyStep()
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class LogTests3(unittest.TestCase, StepTest):
    step = LogStep(append_input=True, columns=['1', '10'], log_func=np.log10)
    X, y = rand_df(val_range=(0, 100))
    test_X = rand_df(val_range=(0, 100), labeled=False)
class LogTests1(unittest.TestCase, StepTest):
    step = LogStep()
    X, y = rand_df(val_range=(0, 100))
    test_X = rand_df(val_range=(0, 100), labeled=False)
class TreeTests2(unittest.TestCase, StepTest):
    step = TreeSelectionStep(tree_model=ExtraTreesClassifier)
    X, y = rand_df_classification()
    test_X = rand_df(labeled=False)
class TreeTests1(unittest.TestCase, StepTest):
    step = TreeSelectionStep()
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class PearsonCorrTests2(unittest.TestCase, StepTest):
    step = PearsonCorrStep(num_features=50)
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class ListTests2(unittest.TestCase, StepTest):
    step = ListSelectionStep(features=['11', '22'])
    X = rand_df(labeled=False)
    y = None
    test_X = rand_df(labeled=False)
class LDATransform1(unittest.TestCase, StepTest):
    step = LDATransformStep()
    X, y = rand_df_classification()
    test_X = rand_df(labeled=False)
class LDATransform2(unittest.TestCase, StepTest):
    step = LDATransformStep(append_input=True)
    X, y = rand_df_classification()
    test_X = rand_df(labeled=False)
Пример #18
0
class ABODTests(unittest.TestCase, StepTest):
    step = ABODStep(num_remove=1)
    X, y = rand_df_classification(outlier=True)
    test_X = rand_df(labeled=False, outlier=True)
class LogTests2(unittest.TestCase, StepTest):
    step = LogStep()
    X = rand_df(val_range=(0, 100), labeled=False)
    y = None
    test_X = rand_df(val_range=(0, 100), labeled=False)
Пример #20
0
class IsoForestDefault(unittest.TestCase, StepTest):
    step = IsoForestStep()
    X, y = rand_df(outlier=True)
    test_X = rand_df(labeled=False, outlier=True)
class PCATests2(unittest.TestCase, StepTest):
    step = PCAStep(append_input=True)
    X = rand_df(labeled=False)
    y = None
    test_X = rand_df(labeled=False)
Пример #22
0
class IsoForestIncludeY(unittest.TestCase, StepTest):
    step = IsoForestStep(include_y=False)
    X, y = rand_df(outlier=True)
    test_X = rand_df(labeled=False, outlier=True)
class PolyTests3(unittest.TestCase, StepTest):
    step = PolyStep(append_input=True)
    X = rand_df(shape=(100, 10), labeled=False)
    y = None
    test_X = rand_df(shape=(50, 10), labeled=False)
Пример #24
0
class LFODefault(unittest.TestCase, StepTest):
    step = LOFStep()
    X, y = rand_df_classification(outlier=True)
    test_X = rand_df(labeled=False, outlier=True)
class SinTests2(unittest.TestCase, StepTest):
    step = SinStep()
    X = rand_df(labeled=False)
    y = None
    test_X = rand_df(labeled=False)
Пример #26
0
class LFOIncludeY(unittest.TestCase, StepTest):
    step = LOFStep(include_y=False)
    X, y = rand_df(outlier=True)
    test_X = rand_df(labeled=False, outlier=True)
class StandardScalerTests1(unittest.TestCase, StepTest):
    step = StandardScalerStep()
    X, y = rand_df()
    test_X = rand_df(labeled=False)
class StandardScalerTests2(unittest.TestCase, StepTest):
    step = StandardScalerStep()
    X = rand_df(labeled=False)
    y = None
    test_X = rand_df(labeled=False)
class SMOTETests1(unittest.TestCase, StepTest):
    step = SMOTEStep()
    X, y = rand_df_classification(val_range=(0, 100))
    test_X = rand_df(val_range=(0, 100))
class StandardScalerTests3(unittest.TestCase, StepTest):
    step = StandardScalerStep(append_input=True)
    X, y = rand_df()
    test_X = rand_df(labeled=False)