Python Imputer示例

编程语言: Python

命名空间/包名称: atom.data_cleaning

类/类型: Imputer

hotexamples.com的示例: 15

Python Imputer - 已找到15个示例。这些是从开源项目中提取的最受好评的atom.data_cleaning.Imputer现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Imputer(15)

fit_transform(11)

示例#1

显示文件

def test_imputing_all_missing_values_categorical(missing):
    """Assert that all missing values are imputed in categorical columns."""
    X = [[missing, "a", "a"], ["b", "c", missing], ["b", "a", "c"],
         ["c", "a", "a"]]
    y = [1, 1, 0, 0]
    imputer = Imputer(strat_cat="most_frequent")
    X, y = imputer.fit_transform(X, y)
    assert X.isna().sum().sum() == 0

示例#2

显示文件

def test_imputing_all_missing_values_numeric(missing):
    """Assert that all missing values are imputed in numeric columns."""
    X = [[missing, 1, 1], [2, 5, 2], [4, missing, 1], [2, 1, 1]]
    y = [1, 1, 0, 0]
    imputer = Imputer(strat_num="mean")
    imputer.missing.append(99)
    X, y = imputer.fit_transform(X, y)
    assert X.isna().sum().sum() == 0

示例#3

显示文件

def test_cols_too_many_nans():
    """Assert that columns with too many missing values are dropped."""
    X = X_bin.copy()
    for i in range(5):  # Add 5 cols with all NaN values
        X["col " + str(i)] = [np.nan for _ in range(X.shape[0])]
    impute = Imputer(strat_num="mean",
                     strat_cat="most_frequent",
                     min_frac_cols=0.5)
    X, y = impute.fit_transform(X, y_bin)
    assert len(X.columns) == 30  # Original number of columns
    assert X.isna().sum().sum() == 0

示例#4

显示文件

def test_rows_too_many_nans():
    """Assert that rows with too many missing values are dropped."""
    X = X_bin.copy()
    for i in range(5):  # Add 5 rows with all NaN values
        X.loc[len(X)] = [np.nan for _ in range(X.shape[1])]
    y = [np.random.randint(2) for _ in range(len(X))]
    impute = Imputer(strat_num="mean",
                     strat_cat="most_frequent",
                     min_frac_rows=0.5)
    X, y = impute.fit_transform(X, y)
    assert len(X) == 569  # Original size
    assert X.isna().sum().sum() == 0

示例#5

显示文件

def test_imputing_non_numeric_most_frequent():
    """Assert that the most_frequent strategy for non-numerical works."""
    imputer = Imputer(strat_cat="most_frequent")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert X.iloc[0, 2] == "d"
    assert X.isna().sum().sum() == 0

示例#6

显示文件

def test_imputing_non_numeric_drop():
    """Assert that the drop strategy for non-numerical works."""
    imputer = Imputer(strat_cat="drop")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert len(X) == 9
    assert X.isna().sum().sum() == 0

示例#7

显示文件

def test_imputing_non_numeric_string():
    """Assert that imputing a string for non-numerical values works."""
    imputer = Imputer(strat_cat="missing")
    X, y = imputer.fit_transform(X10_sn, y10)
    assert X.iloc[0, 2] == "missing"
    assert X.isna().sum().sum() == 0

示例#8

显示文件

def test_imputing_numeric_most_frequent():
    """Assert that imputing the most_frequent for numerical values works."""
    imputer = Imputer(strat_num="most_frequent")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == 3
    assert X.isna().sum().sum() == 0

示例#9

显示文件

def test_imputing_numeric_mean():
    """Assert that imputing the mean for numerical values works."""
    imputer = Imputer(strat_num="mean")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == pytest.approx(2.577778, rel=1e-6, abs=1e-12)
    assert X.isna().sum().sum() == 0

示例#10

显示文件

def test_imputing_numeric_number():
    """Assert that imputing a number for numerical values works."""
    imputer = Imputer(strat_num=3.2)
    X, y = imputer.fit_transform(X10_nan, y10)
    assert X.iloc[0, 0] == 3.2
    assert X.isna().sum().sum() == 0

示例#11

显示文件

def test_imputing_numeric_drop():
    """Assert that imputing drop for numerical values works."""
    imputer = Imputer(strat_num="drop")
    X, y = imputer.fit_transform(X10_nan, y10)
    assert len(X) == 8
    assert X.isna().sum().sum() == 0

示例#12

显示文件

def test_imputer_is_fitted():
    """Assert that an error is raised if the instance is not fitted."""
    pytest.raises(NotFittedError, Imputer().transform, X_bin, y_bin)

示例#13

显示文件

def test_invalid_min_frac_cols():
    """Assert that an error is raised for invalid min_frac_cols."""
    imputer = Imputer(min_frac_cols=5.2)
    pytest.raises(ValueError, imputer.fit, X_bin, y_bin)

示例#14

显示文件

def test_strat_num_parameter():
    """Assert that the strat_num parameter is set correctly."""
    imputer = Imputer(strat_num="invalid")
    pytest.raises(ValueError, imputer.fit, X_bin, y_bin)

示例#15

显示文件

文件： test_api.py 项目： hado2020/ATOM

def test_load_data_with_no_trainer():
    """Assert that an error is raised when data is provided without a trainer."""
    Imputer().save(FILE_DIR + "imputer")
    pytest.raises(TypeError, ATOMLoader, FILE_DIR + "imputer", data=(X_bin, ))