def test_output_file_exists(test_data, results_path): data = test_data(SHAPE) labels = np.array([1, 0, 1, 1, 0]) imputed_mode = [] imputed_mode.append(["mode", (impy.mode(np.copy(data)), labels)]) imputed_mode.append(["mean", (impy.mean(np.copy(data)), labels)]) impy.util.compare(imputed_mode, log_path=results_path) with open(results_path, 'r') as fin: expected = {'mode': [('SVC', 0.0)], 'mean': [('SVC', 0.0)]} assert ast.literal_eval(next(fin)) == expected
def setUp(self): """ self.data_c: Complete dataset/No missing values self.data_m: Incommplete dataset/Has missing values """ mask = np.zeros((5, 5), dtype=bool) mask[0][0] = True data_m = impy.dataset.test_data(mask=mask) labels = np.array([1, 0, 1, 1, 0]) self.imputed_mode = [] self.imputed_mode.append( ["mode", (impy.mode(np.copy(data_m)), labels)]) self.imputed_mode.append( ["mean", (impy.mean(np.copy(data_m)), labels)])
def test_mode_impute_missing_values(): """ After imputation, no NaN's should exist""" imputed = impy.mode(data_m) assert not np.isnan(imputed).any()
def test_median_return_type(): """ Check return type, should return an np.ndarray""" imputed = impy.mode(data_m) assert isinstance(imputed, np.ndarray)
def test_mode_impute_missing_values(self): """ After imputation, no NaN's should exist""" imputed = impy.mode(self.data_m) self.assertFalse(np.isnan(imputed).any())
def test_median_return_type(self): """ Check return type, should return an np.ndarray""" imputed = impy.mode(self.data_m) self.assertTrue(isinstance(imputed, np.ndarray))
"""test_compare.py""" import numpy as np import impyute as impy mask = np.zeros((5, 5), dtype=bool) mask[0][0] = True data_m = impy.dataset.test_data(mask=mask) labels = np.array([1, 0, 1, 1, 0]) imputed_mode = [] imputed_mode.append(["mode", (impy.mode(np.copy(data_m)), labels)]) imputed_mode.append(["mean", (impy.mean(np.copy(data_m)), labels)]) def test_output_file_exists(): """ Small test to just check that it runs without fialing""" path = "./results.txt" impy.util.compare(imputed_mode, log_path=path)
def __init__(self, T, mask, algo, miss_info, kf, notobj, obj, target): try: self.miss_info = miss_info self.columns = notobj self.ord_num_col = self.miss_info["ord_col"] + self.miss_info[ "num_col"] metric = {"rmse": {}, "nrmse": {}} self.rawT = T self.target = target if target is not None: self.target_y = T[target] else: self.target_y = None self.cv = {} self.cv.update(deepcopy(metric)) self.kf = kf self.MSE = {} self.MSE.update(deepcopy(metric)) self.result = {} self.time_ck = {} X = deepcopy(T) mask = pd.DataFrame(mask, columns=T.columns.tolist()) self.rawmask = mask X[(mask == 1).values] = np.nan if obj in [None, []]: obj = None else: pass ########################################## self.X = X[notobj] self.T = T[notobj] self.mask = mask[notobj] self.notobj = notobj ########################################## if obj is not None: ############ Numeric + Category ################# cat_impute = SimpleImputer(strategy="most_frequent") X[obj] = cat_impute.fit_transform(X[obj]) self.true_obj = T[obj] self.pd_obj = X[obj] ################################################### TT = deepcopy(T) cat_encoder = miss_info["ce_encoder"] for k in cat_encoder.category_mapping: col, map_ = k["col"], k["mapping"] TT[col] = TT[col].replace( dict(zip(k["mapping"].index, k["mapping"].values))) self.full_miss_data = TT self.full_miss_data[(mask == 1).values] = np.nan mice_data = deepcopy(T) for obj_col in obj: mice_data[obj_col] = "Cols_" + mice_data[obj_col] self.full_mice_data = mice_data self.full_mice_data[(mask == 1).values] = np.nan else: ########## Numeric ############################### num_data = deepcopy(self.X) num_data[(self.mask == 1).values] = np.nan self.full_miss_data = deepcopy(num_data) self.full_mice_data = deepcopy(num_data) ################################################### self.algo = algo self.method = { "MissForest" : lambda x : MissForest(verbose = 0, n_jobs = -1 ).fit(x) , "mean" : lambda x : impy.mean(x) , "median" : lambda x : impy.median(x) , "mode" : lambda x : impy.mode(x) , "knn" : lambda x : impy.fast_knn(x) , "MICE" : lambda x : impy.mice(x) , "EM" : lambda x : impy.em(x), "MultipleImputer" : lambda x : MultipleImputer(n=1, return_list = True).\ fit_transform(pd.DataFrame(x)).values, } except Exception as e: print(e) pass
def test_mode(test_data): data = test_data(SHAPE) imputed = impy.mode(data) return_na_check(imputed)