def test_StandardScaler_output(test_input): scaler = Scaler() scaler.execute(params=test_input) if "Distance" in test_input["train_df"].keys(): assert round(test_input["train_df"]["Distance"][0], 5) == 1.08006 else: assert round(test_input["train_df"]["Negatives"][0], 5) == 0.57771
def test_BinaryScaler_output(test_input): scaler = Scaler() scaler.execute(params=test_input) assert (test_input["df"]["Negatives"].values.any() == 1 or test_input["df"]["Negatives"].values.any() == 0) assert not (test_input["df"]["Negatives"].between(0, 1, inclusive=False).any()) assert test_input["df"]["Negatives"][0] == 1
def test_MinMaxScaler_output(test_input): scaler = Scaler() scaler.execute(params=test_input) if "Distance" in test_input["train_df"].keys(): assert test_input["train_df"]["Distance"].values.all() >= 0 assert test_input["train_df"]["Distance"].values.all() <= 1 else: assert test_input["train_df"]["Negatives"].values.all() >= 0 assert test_input["train_df"]["Negatives"].values.all() <= 1
def test_BinaryScaler_output(test_input): scaler = Scaler() scaler.execute(params=test_input) assert (test_input["train_df"]["Negatives"].values.any() == 1 or test_input["train_df"]["Negatives"].values.any() == 0) assert not (test_input["train_df"]["Negatives"].between( 0, 1, inclusive="neither").any()) if test_input["threshold"]["Negatives"] != -1: assert test_input["train_df"]["Negatives"][0] == 1 else: assert test_input["train_df"]["Negatives"][0] == 0
def __init__( self, train_df_path=None, test_df_path=None, steps=None, config_file=None, params=None, custom_reader=None, ): steps = [ Parser().parse_dataset, NullValuesHandler().execute, Encoder().encode, HandleOutlier().handle_outliers, Scaler().execute, SelectKBest().fit_transform, Split().train_test_split, ] super().__init__( train_df_path=train_df_path, test_df_path=test_df_path, steps=steps, config_file=config_file, params=params, custom_reader=custom_reader, )
def test_incorrect_input_type(test_input, error): with pytest.raises(error): scaler = Scaler() scaler.execute(params=test_input)
def test_MinMaxScaler_output(test_input): scaler = Scaler() scaler.execute(params=test_input) assert test_input["df"]["Distance"].values.all() >= 0 assert test_input["df"]["Distance"].values.all() <= 1