def test_iterative_imputer_train_test_with_low_rank_random_matrix():
    XY_incomplete_train = XY_incomplete[:250]
    XY_incomplete_test = XY_incomplete[250:]
    XY_test = XY[250:]
    imputer = IterativeImputer(n_iter=50, random_state=0)
    imputer.fit(XY_incomplete_train)
    XY_completed_test = imputer.transform(XY_incomplete_test)
    _, missing_mae = reconstruction_error(XY_test,
                                          XY_completed_test,
                                          missing_mask,
                                          name="IterativeImputer Train/Test")
    assert missing_mae < 0.1, "Error too high with IterativeImputer train/test method!"
Exemplo n.º 2
0
class DFIterativeImputer(BaseEstimator, TransformerMixin):
    def __init__(self, max_iter=10):
        self.imputer = None
        self.max_iter = max_iter

    def fit(self, X, y=None):
        self.imputer = IterativeImputer(max_iter=self.max_iter)
        self.imputer.fit(X)
        return self

    def transform(self, X):
        X_filled = self.imputer.transform(X)
        X_filled = pd.DataFrame(X_filled, index=X.index, columns=X.columns)
        return X_filled
def mice_imputation(train, test):

    data_mice_train = np.copy(train)
    data_mice_test = np.copy(test)

    for ind in range(data_mice_train[:, 0, :].shape[0]):
        data_mice_train[ind, 0, :][np.argwhere(
            data_mice_train[ind, 1, :] == 1.0)] = np.nan

    for ind in range(data_mice_test[:, 0, :].shape[0]):
        data_mice_test[ind,
                       0, :][np.argwhere(data_mice_test[ind,
                                                        1, :] == 1.0)] = np.nan

    mice_impute = IterativeImputer()

    #check if all columns have values if not impute 0
    for col in range(data_mice_train[:, 0, :].shape[1]):
        if (np.all(np.isnan(data_mice_train[:, 0, :][:, col]))):
            data_mice_train[:, 0, :][:, col] = 0.0

    mice_impute.fit(data_mice_train[:, 0, :])
    return mice_impute.transform(data_mice_test[:, 0, :])
Exemplo n.º 4
0
#from fancyimpute import KNN, NuclearNormMinimization, SoftImpute, IterativeImputer, BiScaler

## iterative imputation:
## [[?]] probably only works for continuous variables only...
mod_impute = IterativeImputer(
    imputation_order="ascending",
    n_iter=10,
    #predictor = sklearn.linear.RidgeCV(), ## default
    random_state=21)

## fit on training data:
mod_impute.fit(dat_train_x)

## impute training data:
dat_train_x_nparray = mod_impute.transform(dat_train_x)
#type(dat_train_x_nparray)  ## numpy.ndarray (!)

## transform back into a pandas dataframe:
dat_train_x = pd.DataFrame(data=dat_train_x_nparray,
                           index=dat_train_x.index,
                           columns=dat_train_x.columns)

## impute test data:
dat_test_x_nparray = mod_impute.transform(dat_test_x)
#type(dat_train_x_nparray)  ## numpy.ndarray (!)

## transform back into a pandas dataframe:
dat_test_x = pd.DataFrame(data=dat_test_x_nparray,
                          index=dat_test_x.index,
                          columns=dat_test_x.columns)
Exemplo n.º 5
0
##################################################################
df.columns
# Multivariate feature imputation
df_impute = df.drop([
    'Obs_ID', 'atty_firm_name', 'employ_status', 'how_injury_occur',
    'jurisdiction', 'detail_cause', 'handling_office', 'injury_postal',
    'length_how_injury', 'time_injury', 'Dependent', 'policy_yr'
],
                    axis=1)
df_impute = pd.get_dummies(df_impute)
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imp = IterativeImputer(max_iter=10, random_state=0)
imp.fit(df_impute)
imputed_df = imp.transform(df_impute)
imputed_df = pd.DataFrame(imputed_df, columns=df_impute.columns)

imputed_df.to_csv('combined_impute.csv')
# IterativeImputer(random_state=0)
X_test = [[np.nan, 2], [6, np.nan], [np.nan, 6]]
# the model learns that the second feature is double the first
print(np.round(imp.transform(X_test)))

##################################################################
#############  CORRELATED MISSING VALUES IMPUTATION  #############
##################################################################

############################################################################################
################### ENCODING AND MISSING VALUE IMPUTATIONS #################################
############################################################################################