def test_add(self): df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)}) _ = df.to_csv("./datasets/configs/dataset.csv", index=False) params = { "col_1": "A", "test_size": 0.2, } pipeline = Pipeline( df_path="./datasets/configs/dataset.csv", steps=[times_two, split], params=params, ) pipeline.process() assert pipeline.params["df"].loc[42, "A"] == df.loc[42, "A"] * 2 pipeline.add( squared, { "col_2": "A", }, before="times_two", ) pipeline.process() num_0 = pipeline.params["df"].loc[42, "A"] num_1 = df.loc[42, "A"] assert num_0 == (num_1**2) * 2 pipeline.remove("squared") pipeline.add(squared, {"col_2": "A"}, after="read_file") pipeline.process() num_0 = pipeline.params["df"].loc[42, "A"] num_1 = df.loc[42, "A"] assert num_0 == (num_1**2) * 2
def test_remove(self): df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)}) _ = df.to_csv("./datasets/configs/dataset.csv", index=False) params = { "col_1": "A", "col_2": "B", "test_size": 0.2, } pipeline = Pipeline( df_path="./datasets/configs/dataset.csv", steps=[times_two, squared, split], params=params, ) pipeline.process() assert len(pipeline.params["X_train"]) == 80 pipeline.remove("split") pipeline.process() assert pipeline.params["df"].shape[0] == df.shape[0]
def test_config(): df = pd.DataFrame({"A": np.arange(1, 100), "B": np.arange(1, 100)}) _ = df.to_csv("./datasets/configs/dataset.csv", index=False) params = { "df": "./datasets/configs/dataset.csv", "col_1": "A", "col_2": "B", "test_size": 0.2, } config_path = "./datasets/configs/pipeline_config.json" save_config(config_path, params) pipeline = Pipeline( df_path="./datasets/configs/dataset.csv", steps=[times_two, squared, split], config_file=config_path, custom_reader=custom_read, ) pipeline.process() assert len(pipeline.params["X_train"]) == 80 pipeline.remove("split") pipeline.process() assert ( pipeline.params["df"].shape[0] == pipeline.params["df_copy"].shape[0])