def imputeMethodMICE(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "MICE" try: if dataType != 'continuous': imputedData = mice.MICE().complete(missData) mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) else: imputedData = mice.MICE().complete(missData) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def imputeMethod(result,loss,firstImputedMethod,autoMethod,originData,missData,missRate,missPattern,dataType='continuous',firstImputedData="None"): imputationMethod = "{}_{}_{}".format(firstImputedMethod, loss, autoMethod) try: imputedData, firstImputedData = TAI(first_imputation_method=firstImputedMethod, firstImputedData=firstImputedData, batch_size=len(missData), epochs=500, theta=int(len(missData[0]) / 3), iterations=1000, Autoencoder_method=autoMethod, loss=loss, use_cuda=False ).complete(missData) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) return result, imputedData except Exception as e: print(e) result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,firstImputedData
def imputeMethodMedain(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "median" try: imputedData = SimpleFill("median").fit_transform(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def evaluate(self, X_mis, X_full): missing_index = evaluate.get_missing_index(np.isnan(X_mis)) original_arr = X_full[missing_index] em_X_filled = EM().complete(copy.copy(X_mis)) em_filled_arr = em_X_filled[missing_index] rmse_em_score = evaluate.RMSE(original_arr, em_filled_arr) return rmse_em_score
def imputeMethod2(result, originData, missData, missRate, missPattern): imputationMethod = "ycimpute KNN" try: imputedData = yKNN(k=int(math.sqrt(len(missData)))).complete(missData) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result
def imputeMethodMR(result,originData,missData,missRate,missPattern,dataType='continuous'): imputationMethod = "RandomForest" try: imputer = predictive_imputer.PredictiveImputer(f_model='RandomForest') imputedData = imputer.fit(missData).transform(missData.copy()) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,imputedData
def imputeMethodFixed(result, originData, missData, missRate, missPattern, dataType='continuous'): """ 默认值插补,插补为0 :param result: :param originData: :param missData: :param missRate: :param missPattern: :param dataType: :return: """ imputationMethod = "Fixed" try: imputedData = fixedImpute(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
missData = gene_missingdata_chara_bias(rate=i, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=i, data=originData) else: raise Exception( "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] try: imputedData = mice.MICE().complete(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) mice_misc[0].append(score) mice_misc[1].append(MAE(originData, imputedData)) mice_misc[2].append(masked_mape_np(originData, imputedData)) mice_misc[3].append(TF(originData, imputedData)) logger.info("MICE missing rate:{},RMSE:{}".format(i, score)) except: mice_misc[0].append(np.inf) mice_misc[1].append(np.inf) mice_misc[2].append(np.inf) mice_misc[3].append(np.inf) try: imputedData = IterativeImputer().fit_transform(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) ii_misc[0].append(score)