示例#1
0
def imputeMethod(result,loss,firstImputedMethod,autoMethod,originData,missData,missRate,missPattern,dataType='continuous',firstImputedData="None"):
    imputationMethod =  "{}_{}_{}".format(firstImputedMethod, loss, autoMethod)
    try:
        imputedData, firstImputedData = TAI(first_imputation_method=firstImputedMethod,
                                            firstImputedData=firstImputedData,
                                               batch_size=len(missData),
                                               epochs=500,
                                               theta=int(len(missData[0]) / 3),
                                               iterations=1000,
                                               Autoencoder_method=autoMethod,
                                               loss=loss,
                                               use_cuda=False
                                               ).complete(missData)
        
        if dataType!='continuous':
            mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values]
            imputedData = modifier(imputedData, mark)

        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))

        return result, imputedData
    except Exception as e:
        print(e)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf,
                           np.inf,
                           np.inf)

        return result,firstImputedData
def imputeMethodMICE(result,
                     originData,
                     missData,
                     missRate,
                     missPattern,
                     dataType='continuous'):
    imputationMethod = "MICE"
    try:

        if dataType != 'continuous':
            imputedData = mice.MICE().complete(missData)
            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        else:
            imputedData = mice.MICE().complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
def imputeMethodMedain(result,
                       originData,
                       missData,
                       missRate,
                       missPattern,
                       dataType='continuous'):
    imputationMethod = "median"
    try:
        imputedData = SimpleFill("median").fit_transform(missData)
        if dataType != 'continuous':

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
示例#4
0
def imputeMethod2(result, originData, missData, missRate, missPattern):
    imputationMethod = "ycimpute KNN"
    try:
        imputedData = yKNN(k=int(math.sqrt(len(missData)))).complete(missData)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result
def imputeMethodMR(result,originData,missData,missRate,missPattern,dataType='continuous'):
    imputationMethod = "RandomForest"
    try:
        imputer = predictive_imputer.PredictiveImputer(f_model='RandomForest')
        imputedData = imputer.fit(missData).transform(missData.copy())
        if dataType!='continuous':
            mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf,
                           np.inf,
                           np.inf)
    return result,imputedData
def imputeMethodFixed(result,
                      originData,
                      missData,
                      missRate,
                      missPattern,
                      dataType='continuous'):
    """
    默认值插补,插补为0
    :param result:
    :param originData:
    :param missData:
    :param missRate:
    :param missPattern:
    :param dataType:
    :return:
    """
    imputationMethod = "Fixed"

    try:
        imputedData = fixedImpute(missData)
        if dataType != 'continuous':

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]
            imputedData = modifier(imputedData, mark)
        result = addResult(result, missRate, missPattern, imputationMethod,
                           evaluate.RMSE(originData, imputedData),
                           MAE(originData, imputedData),
                           masked_mape_np(originData, imputedData))
    except Exception as e:
        print(e)
        imputedData = 'none'
        result = addResult(result, missRate, missPattern, imputationMethod,
                           np.inf, np.inf, np.inf)
    return result, imputedData
示例#7
0
                missData = gene_missingdata_block_bias(rate=i, data=originData)
            else:
                raise Exception(
                    "缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

            mark = [
                temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(
                    axis=0).values
            ]

            try:
                imputedData = mice.MICE().complete(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                mice_misc[0].append(score)
                mice_misc[1].append(MAE(originData, imputedData))
                mice_misc[2].append(masked_mape_np(originData, imputedData))
                mice_misc[3].append(TF(originData, imputedData))
                logger.info("MICE missing rate:{},RMSE:{}".format(i, score))
            except:
                mice_misc[0].append(np.inf)
                mice_misc[1].append(np.inf)
                mice_misc[2].append(np.inf)
                mice_misc[3].append(np.inf)
            try:
                imputedData = IterativeImputer().fit_transform(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                ii_misc[0].append(score)
                ii_misc[1].append(MAE(originData, imputedData))
                ii_misc[2].append(masked_mape_np(originData, imputedData))
示例#8
0
                                                                   epochs=300,
                                                                   theta=int(len(missData[0]) / 2),
                                                                   iterations=30,
                                                                   Autoencoder_method=method,
                                                                   loss=loss,
                                                                   use_cuda=False
                                                                   ).complete(missData)
                            logger.info("训练耗时:{}".format(time.time()-start))
                            score = RMSE(originData, imputedData)
                            score1 = RMSE(originData, first_imputedData)
                            logger.info("{}_{}_{}_{}_{} first missing rate:{},RMSE:{}".format(file,missPattern,first_imputed_method,loss,method,i, score1))
                            logger.info("{}_{}_{}_{}_{} missing rate:{},RMSE:{}".format(file,missPattern,first_imputed_method,loss,method,i, score))


                            globals()[varname][0].append(score)
                            globals()[varname][1].append(MAE(originData, imputedData))
                            globals()[varname][2].append(masked_mape_np(originData, imputedData))
                        except Exception as e:
                            logger.error(e)
                            globals()[varname][0].append(np.nan)
                            globals()[varname][1].append(np.nan)
                            globals()[varname][2].append(np.nan)

        #将三个指标在各个缺失状态下的结果求和
        logger.error("*" * 30)
        logger.error("file:{}".format(file))
        logger.error("pattern :{}".format(missPattern))
        for varname in methed_names_half:
            half.append([sum(globals()[varname][0][0:3]), sum(globals()[varname][1][0:3]),
                         sum(globals()[varname][2][0:3])])
            logger.error("half {} rmse:{} ,MAE:{},MAPE:{}".format(varname, sum(globals()[varname][0][0:3]),
                missData = gene_missingdata_taxa_bias(rate=i, data=originData)
            elif missPattern == 'chara':
                missData = gene_missingdata_chara_bias(rate=i, data=originData)
            elif missPattern == 'block':
                missData = gene_missingdata_block_bias(rate=i, data=originData)
            else:
                raise Exception("缺失模式错误,请在'normal','taxa','chara','block'中选择对应模式")

            mark = [temp[0] for temp in pd.DataFrame(np.unique(missData)).dropna(axis=0).values]

            try:
                imputedData = mice.MICE().complete(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                mice_misc[0].append(score)
                mice_misc[1].append(MAE(originData, imputedData))
                mice_misc[2].append(masked_mape_np(originData, imputedData))
                mice_misc[3].append(TF(originData, imputedData))
                logger.info("MICE missing rate:{},RMSE:{}".format(i, score))
            except:
                mice_misc[0].append(np.inf)
                mice_misc[1].append(np.inf)
                mice_misc[2].append(np.inf)
                mice_misc[3].append(np.inf)
            try:
                imputedData = IterativeImputer().fit_transform(missData)
                imputedData = modifier(imputedData, mark)
                score = evaluate.RMSE(originData, imputedData)
                ii_misc[0].append(score)
                ii_misc[1].append(MAE(originData, imputedData))
                ii_misc[2].append(masked_mape_np(originData, imputedData))