def mainWork(path,savePath): pbar = tqdm.tqdm(os.listdir(path), desc='dirs') for file in pbar: pbar.set_description("Processing %s" % file) if file.endswith('xlsx') or file.endswith('csv'): originData = readAllTypeFile(os.path.join(path, file)) for missPattern in ['normal']: result = {} for missRate in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]: if missPattern == 'normal': missData = gene_missingdata(rate=missRate, data=originData) elif missPattern == 'taxa': missData = gene_missingdata_taxa_bias(rate=missRate, data=originData) elif missPattern == 'chara': missData = gene_missingdata_chara_bias(rate=missRate, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=missRate, data=originData) else: raise Exception("缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") result, MICEImputedData = MICE(result, originData, missData, missRate, missPattern) for firstImputedMethod in ['mice']: if firstImputedMethod=='mice': firstImputedData = MICEImputedData for loss in ['MSELoss']: #for autoMethod in ['Autoencoder','ResAutoencoder','StockedAutoencoder','StockedResAutoencoder']: for autoMethod in ['StockedResAutoencoder']: start=time.time() result,_=TAI(result=result,firstImputedMethod=firstImputedMethod, firstImputedData=firstImputedData, loss=loss,autoMethod=autoMethod, originData=originData,missData=missData, missRate=missRate,missPattern=missPattern, ) logger.info("{}-{}-{}训练耗时:{}".format(firstImputedMethod,loss,autoMethod,time.time() - start)) if not os.path.exists(savePath): os.makedirs(savePath) saveJson(result, os.path.join(savePath,"{}_{}_{}_{}.json".format("allMethod", missPattern,file, datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))))
varname = "{}_{}_{}".format(first_imputed_method, loss, method) globals()[varname] = [[] for _ in range(4)] methed_names_half.append(varname) methed_names_all.append(varname) # for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]: if missPattern == 'normal': missData = gene_missingdata(rate=i, data=originData) elif missPattern == 'taxa': missData = gene_missingdata_taxa_bias(rate=i, data=originData) elif missPattern == 'chara': missData = gene_missingdata_chara_bias(rate=i, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=i, data=originData) else: raise Exception( "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") mark = [ temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna( axis=0).values ] try: imputedData = mice.MICE().complete(missData) imputedData = modifier(imputedData, mark) score = evaluate.RMSE(originData, imputedData) mice_misc[0].append(score) mice_misc[1].append(MAE(originData, imputedData))
def mainWork(path, savePath): for file in os.listdir(path): originData = readAllTypeFile(os.path.join(path, file)) for missPattern in ['normal']: # for missPattern in ['normal','block', 'taxa', 'chara']: # for i in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: result = {} for missRate in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]: if missPattern == 'normal': missData = gene_missingdata(rate=missRate, data=originData) elif missPattern == 'taxa': missData = gene_missingdata_taxa_bias(rate=missRate, data=originData) elif missPattern == 'chara': missData = gene_missingdata_chara_bias(rate=missRate, data=originData) elif missPattern == 'block': missData = gene_missingdata_block_bias(rate=missRate, data=originData) else: raise Exception( "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式") result, _ = Random(result, originData, missData, missRate, missPattern, 'disperse') result, _ = Medain(result, originData, missData, missRate, missPattern, 'disperse') result, KNNImputedData = KNN(result, originData, missData, missRate, missPattern, 'disperse') result, EMImputedData = EM(result, originData, missData, missRate, missPattern, 'disperse') result, IIImputedData = II(result, originData, missData, missRate, missPattern, 'disperse') result, _ = GAIN(result, originData, missData, missRate, missPattern, 'disperse') result, _ = MIDA(result, originData, missData, missRate, missPattern, 'disperse') result, MICEImputedData = MICE(result, originData, missData, missRate, missPattern, 'disperse') # for firstImputedMethod in ['ii', 'mice']: for firstImputedMethod in ['knn', 'ii', 'mice']: if firstImputedMethod == 'knn': firstImputedData = KNNImputedData elif firstImputedMethod == 'ii': firstImputedData = IIImputedData elif firstImputedMethod == 'mice': firstImputedData = MICEImputedData for loss in ['MSELoss']: # for autoMethod in ['Autoencoder','ResAutoencoder','StockedAutoencoder','StockedResAutoencoder']: for autoMethod in ['Autoencoder']: start = time.time() result = TAI( result=result, firstImputedMethod=firstImputedMethod, firstImputedData=firstImputedData.copy(), loss=loss, autoMethod=autoMethod, originData=originData, missData=missData, missRate=missRate, missPattern=missPattern) logger.info("改后{}-{}-{}训练耗时:{}".format( firstImputedMethod, loss, autoMethod, time.time() - start)) saveJson( result, os.path.join( savePath, "{}_{}_{}_{}.json".format( "allmethod", missPattern, file, datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))))