def imputeMethod(result,loss,firstImputedMethod,autoMethod,originData,missData,missRate,missPattern,dataType='continuous',firstImputedData="None"): imputationMethod = "{}_{}_{}".format(firstImputedMethod, loss, autoMethod) try: imputedData, firstImputedData = TAI(first_imputation_method=firstImputedMethod, firstImputedData=firstImputedData, batch_size=len(missData), epochs=500, theta=int(len(missData[0]) / 3), iterations=1000, Autoencoder_method=autoMethod, loss=loss, use_cuda=False ).complete(missData) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) return result, imputedData except Exception as e: print(e) result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,firstImputedData
def imputeMethodMICE(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "MICE" try: if dataType != 'continuous': imputedData = mice.MICE().complete(missData) mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) else: imputedData = mice.MICE().complete(missData) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def imputeMethodMedain(result, originData, missData, missRate, missPattern, dataType='continuous'): imputationMethod = "median" try: imputedData = SimpleFill("median").fit_transform(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
def imputeMethod2(result, originData, missData, missRate, missPattern): imputationMethod = "ycimpute KNN" try: imputedData = yKNN(k=int(math.sqrt(len(missData)))).complete(missData) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result
def imputeMethodMR(result,originData,missData,missRate,missPattern,dataType='continuous'): imputationMethod = "RandomForest" try: imputer = predictive_imputer.PredictiveImputer(f_model='RandomForest') imputedData = imputer.fit(missData).transform(missData.copy()) if dataType!='continuous': mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result,imputedData
def imputeMethodFixed(result, originData, missData, missRate, missPattern, dataType='continuous'): """ 默认值插补,插补为0 :param result: :param originData: :param missData: :param missRate: :param missPattern: :param dataType: :return: """ imputationMethod = "Fixed" try: imputedData = fixedImpute(missData) if dataType != 'continuous': mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] imputedData = modifier(imputedData, mark) result = addResult(result, missRate, missPattern, imputationMethod, evaluate.RMSE(originData, imputedData), MAE(originData, imputedData), masked_mape_np(originData, imputedData)) except Exception as e: print(e) imputedData = 'none' result = addResult(result, missRate, missPattern, imputationMethod, np.inf, np.inf, np.inf) return result, imputedData
missData = gene_missingdata_block_bias(rate=i, data=originData) else: raise Exception( "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] try: imputedData = mice.MICE().complete(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) mice_misc[0].append(score) mice_misc[1].append(MAE(originData, imputedData)) mice_misc[2].append(masked_mape_np(originData, imputedData)) mice_misc[3].append(TF(originData, imputedData)) logger.info("MICE missing rate:{},RMSE:{}".format(i, score)) except: mice_misc[0].append(np.inf) mice_misc[1].append(np.inf) mice_misc[2].append(np.inf) mice_misc[3].append(np.inf) try: imputedData = IterativeImputer().fit_transform(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) ii_misc[0].append(score) ii_misc[1].append(MAE(originData, imputedData)) ii_misc[2].append(masked_mape_np(originData, imputedData))
epochs=300, theta=int(len(missData[0]) / 2), iterations=30, Autoencoder_method=method, loss=loss, use_cuda=False ).complete(missData) logger.info("训练耗时:{}".format(time.time()-start)) score = RMSE(originData, imputedData) score1 = RMSE(originData, first_imputedData) logger.info("{}_{}_{}_{}_{} first missing rate:{},RMSE:{}".format(file,missPattern,first_imputed_method,loss,method,i, score1)) logger.info("{}_{}_{}_{}_{} missing rate:{},RMSE:{}".format(file,missPattern,first_imputed_method,loss,method,i, score)) globals()[varname][0].append(score) globals()[varname][1].append(MAE(originData, imputedData)) globals()[varname][2].append(masked_mape_np(originData, imputedData)) except Exception as e: logger.error(e) globals()[varname][0].append(np.nan) globals()[varname][1].append(np.nan) globals()[varname][2].append(np.nan) #将三个指标在各个缺失状态下的结果求和 logger.error("*" * 30) logger.error("file:{}".format(file)) logger.error("pattern :{}".format(missPattern)) for varname in methed_names_half: half.append([sum(globals()[varname][0][0:3]), sum(globals()[varname][1][0:3]), sum(globals()[varname][2][0:3])]) logger.error("half {} rmse:{} ,MAE:{},MAPE:{}".format(varname, sum(globals()[varname][0][0:3]),
missData = gene_missingdata_taxa_bias(rate=i, data=originData) elif missPattern == 'chara': missData = gene_missingdata_chara_bias(rate=i, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=i, data=originData) else: raise Exception("缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values] try: imputedData = mice.MICE().complete(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) mice_misc[0].append(score) mice_misc[1].append(MAE(originData, imputedData)) mice_misc[2].append(masked_mape_np(originData, imputedData)) mice_misc[3].append(TF(originData, imputedData)) logger.info("MICE missing rate:{},RMSE:{}".format(i, score)) except: mice_misc[0].append(np.inf) mice_misc[1].append(np.inf) mice_misc[2].append(np.inf) mice_misc[3].append(np.inf) try: imputedData = IterativeImputer().fit_transform(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) ii_misc[0].append(score) ii_misc[1].append(MAE(originData, imputedData)) ii_misc[2].append(masked_mape_np(originData, imputedData))