Пример #1
0
 def test_add(self):
     df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
     _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
     params = {
         "col_1": "A",
         "test_size": 0.2,
     }
     pipeline = Pipeline(
         df_path="./datasets/configs/dataset.csv",
         steps=[times_two, split],
         params=params,
     )
     pipeline.process()
     assert pipeline.params["df"].loc[42, "A"] == df.loc[42, "A"] * 2
     pipeline.add(
         squared,
         {
             "col_2": "A",
         },
         before="times_two",
     )
     pipeline.process()
     num_0 = pipeline.params["df"].loc[42, "A"]
     num_1 = df.loc[42, "A"]
     assert num_0 == (num_1**2) * 2
     pipeline.remove("squared")
     pipeline.add(squared, {"col_2": "A"}, after="read_file")
     pipeline.process()
     num_0 = pipeline.params["df"].loc[42, "A"]
     num_1 = df.loc[42, "A"]
     assert num_0 == (num_1**2) * 2
Пример #2
0
 def test_remove(self):
     df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
     _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
     params = {
         "col_1": "A",
         "col_2": "B",
         "test_size": 0.2,
     }
     pipeline = Pipeline(
         df_path="./datasets/configs/dataset.csv",
         steps=[times_two, squared, split],
         params=params,
     )
     pipeline.process()
     assert len(pipeline.params["X_train"]) == 80
     pipeline.remove("split")
     pipeline.process()
     assert pipeline.params["df"].shape[0] == df.shape[0]
Пример #3
0
def test_config():
    df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)})
    _ = df.to_csv("./datasets/configs/dataset.csv", index=False)
    params = {
        "df": "./datasets/configs/dataset.csv",
        "col_1": "A",
        "col_2": "B",
        "test_size": 0.2,
    }
    config_path = "./datasets/configs/pipeline_config.json"
    save_config(config_path, params)
    pipeline = Pipeline(
        df_path="./datasets/configs/dataset.csv",
        steps=[times_two, squared, split],
        config_file=config_path,
        custom_reader=custom_read,
    )
    pipeline.process()
    assert len(pipeline.params["X_train"]) == 80
    pipeline.remove("split")
    pipeline.process()
    assert (
        pipeline.params["df"].shape[0] == pipeline.params["df_copy"].shape[0])