Пример #1
0
def main(args):
    '''Main function for UCI letter and spam datasets.
  
  Args:
    - data_name: letter or spam
    - miss_rate: probability of missing components
    - batch:size: batch size
    - hint_rate: hint rate
    - alpha: hyperparameter
    - iterations: iterations
    
  Returns:
    - imputed_data_x: imputed data
    - rmse: Root Mean Squared Error
  '''

    data_name = args.data_name
    miss_rate = args.miss_rate
    random = args.seed
    time = args.time

    x1 = data_name
    x = x1.split("+")
    print(x)
    gain_parameters = {
        'batch_size': args.batch_size,
        'hint_rate': args.hint_rate,
        'alpha': args.alpha,
        'iterations': args.iterations,
        'time': args.time
    }
    # Load data and introduce missingness

    #ori_data_x, miss_data_x, data_m = data_loader2(data_name, miss_rate,random)
    miss_rate_caption = "{}% Missing".format(int(miss_rate * 100))
    col1 = [
        miss_rate_caption, 'RMSE', 'RMSE', 'RMSE', 'RMSE', 'RMSE', 'RMSE',
        'RMSPE', 'RMSPE', 'RMSPE', 'RMSPE', 'RMSPE', 'RMSPE', '', 'MLP', 'MLP',
        'D.Tree', 'D.Tree', 'LogisticR', 'LogisticR', 'LogisticR', 'LogisticR',
        'LogisticR', 'LogisticR', 'SVC', 'SVC', 'SVC', 'SVC', 'SVC', 'SVC',
        'SGD', 'SGD', 'SGD', 'SGD', 'SGD', 'SGD'
    ]
    col2 = [
        'Method', 'EGAIN', 'GAIN', 'MEAN', 'KNN', 'MICE', 'M.FORE', 'EGAIN',
        'GAIN', 'MEAN', 'KNN', 'MICE', 'M.FORE', '', 'EGAIN', 'GAIN', 'EGAIN',
        'GAIN', 'EGAIN', 'GAIN', 'MEAN', 'KNN', 'MICE', 'M.FORE', 'EGAIN',
        'GAIN', 'MEAN', 'KNN', 'MICE', 'M.FORE', 'EGAIN', 'GAIN', 'MEAN',
        'KNN', 'MICE', 'M.FORE'
    ]
    result = [col1, col2]

    for data_train in x:
        data_name = data_train
        dataset = [
            'obesity', 'hepatitisC', 'audit', 'letter', 'spam', 'breast',
            'credit', 'news', 'blood', 'vowel', 'ecoli', 'ionosphere',
            'parkinsons', 'seedst', 'vehicle', 'vertebral', 'wine', 'banknote',
            'balance', 'yeast', 'bean', 'shill', 'phishing', 'firewall',
            'iBeacon', 'steel'
        ]
        if (data_name not in dataset):
            print("Wrong name: {} Dataset. Skip this datasets".format(
                data_train))
            break
        col3 = []
        col3.append(data_name)

        print("****** {} Dataset ******".format(data_train))
        gan_rs, egain_rs, mice_rs,miss_rs, gan_mlp, gan_dt, egan_mlp, egan_dt = [],[],[],[],[],[],[],[]
        gan_svc, egan_svc, gan_lr, egan_lr, gan_sgd, egan_sgd, gan_gau, egan_gau = [],[],[],[],[],[],[],[]
        knn_rmse, mean_rmse, miss_rmse, mice_rmse = [], [], [], []
        gan_rmspe, egan_rmspe, knn_rmspe , mean_rmspe, miss_rmspe, mice_rmspe =  [],[],[],[],[],[]
        knn_lr, knn_svc, knn_sgd, mean_lr, mean_svc, mean_sgd =    [],[],[],[],[],[]
        miss_lr, miss_svc, miss_sgd, mice_lr, mice_svc, mice_sgd = [],[],[],[],[],[]

        for i in range(time):
            # Load data and introduce missingness
            # Fix loader i=42
            ori_data_x, miss_data_x, data_m, y = data_loader3(
                data_name, miss_rate, 42)  # 7) #i) block i
            train_idx, test_idx = train_test_split(
                range(len(y)), test_size=0.2, stratify=y,
                random_state=i)  #7) #i) block i
            miss_data_x2 = miss_data_x  #* 10000
            if i % 5 == 0:
                print('=== Working on {}/{} ==='.format(i, time))

            # Impute missing data
            imputed_data_x1 = gain(miss_data_x2, gain_parameters)
            imputed_data_x_e1 = egain(miss_data_x2, gain_parameters)
            imputed_data_x = imputed_data_x1  #* 1/10000
            imputed_data_x_e = imputed_data_x_e1  #* 1/10000

            imp_MEAN = SimpleImputer(missing_values=np.nan, strategy='mean')
            imputed_data_x_mean = imp_MEAN.fit_transform(miss_data_x)
            imputed_data_x_mean = imputed_data_x_mean.round()
            #imputed_data_x_mean = imp_MEAN.fit_transform(miss_data_x2)  *1/10000

            imp_KNN = KNNImputer(missing_values=np.nan, n_neighbors=3)
            imputed_data_x_knn = imp_KNN.fit_transform(miss_data_x)  # *1/10000
            imputed_data_x_knn = imputed_data_x_knn.round()

            # ExtraTreesRegressor: similar to missForest in R; DecisionTreeRegressor()
            imp_mf = IterativeImputer(estimator=ExtraTreesRegressor(),
                                      max_iter=1,
                                      initial_strategy="constant",
                                      n_nearest_features=1,
                                      imputation_order='descending')  #20
            imputed_data_mf = imp_mf.fit_transform(miss_data_x)  #*1/10000
            imputed_data_mf = imputed_data_mf.round()
            #imp_mf = MissForest(max_iter=1)
            #imputed_data_mf = imp_mf.fit_transform(miss_data_x)

            imp_mice = IterativeImputer(
                estimator=BayesianRidge(),
                max_iter=1,
                initial_strategy='constant',
                n_nearest_features=1,
                imputation_order='descending')  # 'mean') #20
            imputed_data_mice = imp_mice.fit_transform(miss_data_x)  #*1/10000
            imputed_data_mice = imputed_data_mice.round()

            # Report the RMSE performance
            rmse = rmse_loss(ori_data_x, imputed_data_x, data_m)
            rmse_e = rmse_loss(ori_data_x, imputed_data_x_e, data_m)
            rmse_mean = rmse_loss(ori_data_x, imputed_data_x_mean, data_m)
            rmse_knn = rmse_loss(ori_data_x, imputed_data_x_knn, data_m)
            rmse_mf = rmse_loss(ori_data_x, imputed_data_mf, data_m)
            rmse_mice = rmse_loss(ori_data_x, imputed_data_mice, data_m)

            gan_rs.append(rmse)
            egain_rs.append(rmse_e)

            mean_rmse.append(rmse_mean)
            knn_rmse.append(rmse_knn)
            mice_rmse.append(rmse_mice)
            miss_rmse.append(rmse_mf)

            # Report the RMSPE performance
            rmspe = rmspe_loss(ori_data_x, imputed_data_x, data_m)
            rmspe_e = rmspe_loss(ori_data_x, imputed_data_x_e, data_m)
            rmspe_mean = rmspe_loss(ori_data_x, imputed_data_x_mean, data_m)
            rmspe_knn = rmspe_loss(ori_data_x, imputed_data_x_knn, data_m)
            rmspe_mf = rmspe_loss(ori_data_x, imputed_data_mf, data_m)
            rmspe_mice = rmspe_loss(ori_data_x, imputed_data_mice, data_m)

            #gan_rmspe, egan_rmspe, knn_rmspe , mean_rmspe, miss_rmspe, mice_rmspe
            gan_rmspe.append(rmspe)
            egan_rmspe.append(rmspe_e)
            mean_rmspe.append(rmspe_mean)
            knn_rmspe.append(rmspe_knn)
            mice_rmspe.append(rmspe_mice)
            miss_rmspe.append(rmspe_mf)

            mi_data = miss_data_x.astype(float)
            no, dim = imputed_data_mice.shape
            miss_data = np.reshape(mi_data, (no, dim))
            np.savetxt("data/{}missing_data.csv".format(i),
                       mi_data,
                       delimiter=',',
                       fmt='%1.2f')
            np.savetxt("data/{}imputed_data_gain.csv".format(i),
                       imputed_data_x,
                       delimiter=',',
                       fmt='%d')
            np.savetxt("data/{}imputed_data_egain.csv".format(i),
                       imputed_data_x_e,
                       delimiter=',',
                       fmt='%d')

            imputed_data_x, _ = normalization(imputed_data_x)
            imputed_data_x_e, _ = normalization(imputed_data_x_e)

            imputed_data_x_mean, _ = normalization(imputed_data_x_mean)
            imputed_data_x_knn, _ = normalization(imputed_data_x_knn)
            imputed_data_mf, _ = normalization(imputed_data_mf)
            imputed_data_mice, _ = normalization(imputed_data_mice)

            gan_score_mlp = clf_MLP(imputed_data_x, y, train_idx, test_idx)
            egan_score_mlp = clf_MLP(imputed_data_x_e, y, train_idx, test_idx)
            gan_mlp.append(gan_score_mlp)
            egan_mlp.append(egan_score_mlp)

            gan_score_dt = clf_DT(imputed_data_x, y, train_idx, test_idx)
            egan_score_dt = clf_DT(imputed_data_x_e, y, train_idx, test_idx)
            gan_dt.append(gan_score_dt)
            egan_dt.append(egan_score_dt)

            gan_score_lr = clf_LR(imputed_data_x, y, train_idx, test_idx)
            egan_score_lr = clf_LR(imputed_data_x_e, y, train_idx, test_idx)

            mean_score_lr = clf_LR(imputed_data_x_mean, y, train_idx, test_idx)
            knn_score_lr = clf_LR(imputed_data_x_knn, y, train_idx, test_idx)
            miss_score_lr = clf_LR(imputed_data_mf, y, train_idx, test_idx)
            mice_score_lr = clf_LR(imputed_data_mice, y, train_idx, test_idx)

            gan_lr.append(gan_score_lr)
            egan_lr.append(egan_score_lr)
            mean_lr.append(mean_score_lr)
            knn_lr.append(knn_score_lr)
            miss_lr.append(miss_score_lr)
            mice_lr.append(mice_score_lr)

            mean_score_svc = clf_SVC(imputed_data_x_mean, y, train_idx,
                                     test_idx)
            knn_score_svc = clf_SVC(imputed_data_x_knn, y, train_idx, test_idx)
            miss_score_svc = clf_SVC(imputed_data_mf, y, train_idx, test_idx)
            mice_score_svc = clf_SVC(imputed_data_mice, y, train_idx, test_idx)
            mean_svc.append(mean_score_svc)
            knn_svc.append(knn_score_svc)
            miss_svc.append(miss_score_svc)
            mice_svc.append(mice_score_svc)

            gan_score_svc = clf_SVC(imputed_data_x, y, train_idx, test_idx)
            egan_score_svc = clf_SVC(imputed_data_x_e, y, train_idx, test_idx)
            gan_svc.append(gan_score_svc)
            egan_svc.append(egan_score_svc)

            mean_score_sgd = clf_SGD(imputed_data_x_mean, y, train_idx,
                                     test_idx)
            knn_score_sgd = clf_SGD(imputed_data_x_knn, y, train_idx, test_idx)
            miss_score_sgd = clf_SGD(imputed_data_mf, y, train_idx, test_idx)
            mice_score_sgd = clf_SGD(imputed_data_mice, y, train_idx, test_idx)
            mean_sgd.append(mean_score_sgd)
            knn_sgd.append(knn_score_sgd)
            miss_sgd.append(miss_score_sgd)
            mice_sgd.append(mice_score_sgd)

            gan_score_sgd = clf_SGD(imputed_data_x, y, train_idx, test_idx)
            egan_score_sgd = clf_SGD(imputed_data_x_e, y, train_idx, test_idx)
            gan_sgd.append(gan_score_sgd)
            egan_sgd.append(egan_score_sgd)

            #gan_score_gau   = clf_GAU(imputed_data_x    , y, train_idx, test_idx)
            #egan_score_gau  = clf_GAU(imputed_data_x_e  , y, train_idx, test_idx)
            #gan_gau.append(gan_score_gau)
            #egan_gau.append(egan_score_gau)

        print()
        print("Datasets: ", data_name)
        #print(gan_rs,egain_rs, mice_rs,miss_rs)
        col3.append(
            f"{round(np.mean(egain_rs)*1,2)} ± {round(np.std(egain_rs),4)}")
        col3.append(
            f"{round(np.mean(gan_rs)*1,2)} ± {round(np.std(gan_rs),4)}")
        col3.append(
            f"{round(np.mean(mean_rmse)*1,2)} ± {round(np.std(mean_rmse),4)}")
        col3.append(
            f"{round(np.mean(knn_rmse)*1,2)} ± {round(np.std(knn_rmse),4)}")
        col3.append(
            f"{round(np.mean(mice_rmse)*1,2)} ± {round(np.std(mice_rmse),4)}")
        col3.append(
            f"{round(np.mean(miss_rmse)*1,2)} ± {round(np.std(miss_rmse),4)}")

        ##gan_rmspe, egan_rmspe, knn_rmspe , mean_rmspe, miss_rmspe, mice_rmspe
        col3.append(
            f"{round(np.mean(egan_rmspe)*1,2)} ± {round(np.std(egan_rmspe),4)}"
        )
        col3.append(
            f"{round(np.mean(gan_rmspe)*1,2)} ± {round(np.std(gan_rmspe),4)}")
        col3.append(
            f"{round(np.mean(mean_rmspe)*1,2)} ± {round(np.std(mean_rmspe),4)}"
        )
        col3.append(
            f"{round(np.mean(knn_rmspe)*1,2)} ± {round(np.std(knn_rmspe),4)}")
        col3.append(
            f"{round(np.mean(mice_rmspe)*1,2)} ± {round(np.std(mice_rmspe),4)}"
        )
        col3.append(
            f"{round(np.mean(miss_rmspe)*1,2)} ± {round(np.std(miss_rmspe),4)}"
        )
        col3.append([])

        col3.append(
            f"{round(np.mean(egan_mlp)*1,2)} ± {round(np.std(egan_mlp),4)}")
        col3.append(
            f"{round(np.mean(gan_mlp)*1,2)} ± { round(np.std(gan_mlp),4)}")
        col3.append(
            f"{round(np.mean(egan_dt)*1,2)} ± { round(np.std(egan_dt),4)}")
        col3.append(
            f"{round(np.mean(gan_dt)*1,2)} ± { round(np.std(gan_dt),4)}")

        col3.append(
            f"{round(np.mean(egan_lr)*1,2)} ± {round(np.std(egan_lr),4)}")
        col3.append(
            f"{round(np.mean(gan_lr)*1,2)} ± {round(np.std(gan_lr),4)}")
        col3.append(
            f"{round(np.mean(mean_lr)*1,2)} ± {round(np.std(mean_lr),4)}")
        col3.append(
            f"{round(np.mean(knn_lr)*1,2)} ± {round(np.std(knn_lr),4)}")
        col3.append(
            f"{round(np.mean(mice_lr)*1,2)} ± {round(np.std(mice_lr),4)}")
        col3.append(
            f"{round(np.mean(miss_lr)*1,2)} ± {round(np.std(miss_lr),4)}")

        col3.append(
            f"{round(np.mean(egan_svc)*1,2)} ± { round(np.std(egan_svc),4)}")
        col3.append(
            f"{round(np.mean(gan_svc)*1,2)} ± { round(np.std(gan_svc),4)}")
        col3.append(
            f"{round(np.mean(mean_svc)*1,2)} ± { round(np.std(mean_svc),4)}")
        col3.append(
            f"{round(np.mean(knn_svc)*1,2)} ± {round(np.std(knn_svc),4)}")
        col3.append(
            f"{round(np.mean(mice_svc)*1,2)} ± {round(np.std(mice_svc),4)}")
        col3.append(
            f"{round(np.mean(miss_svc)*1,2)} ± { round(np.std(miss_svc),4)}")

        col3.append(
            f"{round(np.mean(egan_sgd)*1,2)} ± { round(np.std(egan_sgd),4)}")
        col3.append(
            f"{round(np.mean(gan_sgd)*1,2)} ± { round(np.std(gan_sgd),4)}")
        col3.append(
            f"{round(np.mean(mean_sgd)*1,2)} ± { round(np.std(mean_sgd),4)}")
        col3.append(
            f"{round(np.mean(knn_sgd)*1,2)} ± {round(np.std(knn_sgd),4)}")
        col3.append(
            f"{round(np.mean(mice_sgd)*1,2)} ± { round(np.std(mice_sgd),4)}")
        col3.append(
            f"{round(np.mean(miss_sgd)*1,2)} ± {round(np.std(miss_sgd),4)}")
        '''
      print('RMSE  GAIN: {} ± {}'.format(round(np.mean(gan_rs)*1,2), round(np.std(gan_rs),4)))
      #print(gan_rs)
      print('RMSE EGAIN: {} ± {}'.format(round(np.mean(egain_rs)*1,2), round(np.std(egain_rs),4)))
      #print(egain_rs)
      print('RMSE  MEAN: {} ± {}'.format(round(np.mean(mean_rmse)*1,2), round(np.std(mean_rmse),4)))
      #print(knn_rmse)
      print('RMSE   KNN: {} ± {}'.format(round(np.mean(knn_rmse)*1,2), round(np.std(knn_rmse),4)))
      #print(mice_rmse)
      print('RMSE  MICE: {} ± {}'.format(round(np.mean(mice_rmse)*1,2), round(np.std(mice_rmse),4)))
      #print(miss_rmse)
      print('RMSE MFORE: {} ± {}'.format(round(np.mean(miss_rmse)*1,2), round(np.std(miss_rmse),4)))
      #print(miss_rmse)
      print()
      print('MLP   GAIN: {} ± {}'.format(round(np.mean(gan_mlp)*1,2), round(np.std(gan_mlp),4)))
      print('MLP  EGAIN: {} ± {}'.format(round(np.mean(egan_mlp)*1,2), round(np.std(egan_mlp),4)))
      print()
      print('DT    GAIN: {} ± {}'.format(round(np.mean(gan_dt)*1,2), round(np.std(gan_dt),4)))
      print('DT   EGAIN: {} ± {}'.format(round(np.mean(egan_dt)*1,2), round(np.std(egan_dt),4)))
      print()

      print('LR    GAIN: {} ± {}'.format(round(np.mean(gan_lr)*1,2), round(np.std(gan_lr),4)))
      #print(gan_lr)
      print('LR   EGAIN: {} ± {}'.format(round(np.mean(egan_lr)*1,2), round(np.std(egan_lr),4)))
      #print(egan_lr)
      print('LR    MEAN: {} ± {}'.format(round(np.mean(mean_lr)*1,2), round(np.std(mean_lr),4)))
      #print(mean_lr)
      print('LR     KNN: {} ± {}'.format(round(np.mean(knn_lr)*1,2), round(np.std(knn_lr),4)))
      #print(knn_lr)
      print('LR    MICE: {} ± {}'.format(round(np.mean(mice_lr)*1,2), round(np.std(mice_lr),4)))
      #print(mice_lr)
      print('LR MISSFOR: {} ± {}'.format(round(np.mean(miss_lr)*1,2), round(np.std(miss_lr),4)))
      #print(miss_lr)
      print()
      print('SVC   GAIN: {} ± {}'.format(round(np.mean(gan_svc)*1,2), round(np.std(gan_svc),4)))
      #print(gan_svc)
      print('SVC  EGAIN: {} ± {}'.format(round(np.mean(egan_svc)*1,2), round(np.std(egan_svc),4)))
      #print(egan_svc)
      print('SVC   MEAN: {} ± {}'.format(round(np.mean(mean_svc)*1,2), round(np.std(mean_svc),4)))
      #print(mean_svc)
      print('SVC    KNN: {} ± {}'.format(round(np.mean(knn_svc)*1,2), round(np.std(knn_svc),4)))
      #print(knn_svc)
      print('SVC   MICE: {} ± {}'.format(round(np.mean(mice_svc)*1,2), round(np.std(mice_svc),4)))
      #print(mice_svc)
      print('SVC   MISS: {} ± {}'.format(round(np.mean(miss_svc)*1,2), round(np.std(miss_svc),4)))
      #print(miss_svc)
      print()
      print('SGD   GAIN: {} ± {}'.format(round(np.mean(gan_sgd)*1,2), round(np.std(gan_sgd),4)))
      #print(gan_sgd)
      print('SGD  EGAIN: {} ± {}'.format(round(np.mean(egan_sgd)*1,2), round(np.std(egan_sgd),4)))
      #print(egan_sgd)
      print('SGD   MEAN: {} ± {}'.format(round(np.mean(mean_sgd)*1,2), round(np.std(mean_sgd),4)))
      #print(mean_sgd)
      print('SGD    KNN: {} ± {}'.format(round(np.mean(knn_sgd)*1,2), round(np.std(knn_sgd),4)))
      #print(knn_sgd)
      print('SGD   MICE: {} ± {}'.format(round(np.mean(mice_sgd)*1,2), round(np.std(mice_sgd),4)))
      #print(mice_sgd)
      print('SGD   MISS: {} ± {}'.format(round(np.mean(miss_sgd)*1,2), round(np.std(miss_sgd),4)))
      
      '''
        result.append(col3)
        my_array = np.asarray(result)
        #print(my_array)
        df_result = pd.DataFrame(my_array)
        df_result_tran = df_result.transpose()
        print(df_result_tran.to_string(index=False, header=False))
        #df_result_tran.to_csv("result.csv", encoding='utf-8', index=False, header=False)
        df_result_tran.to_csv("result.csv", index=False, header=False)
        df_result_tran.to_excel("result.xls",
                                encoding='utf-8',
                                index=False,
                                header=False)

        #print(miss_sgd)
        #print()
        #print('GAU   GAIN: {} ± {}'.format(round(np.mean(gan_gau)*1,2), round(np.std(gan_dt),4)))
        #print('GAU  EGAIN: {} ± {}'.format(round(np.mean(egan_gau)*1,2), round(np.std(egan_dt),4)))

        # MissForest

        #print()
        #print('=== MissForest RMSE ===')
        #data = miss_data_x
        #imp_mean = MissForest(max_iter = 1)
        #miss_f = imp_mean.fit_transform(data)
        #miss_f = pd.DataFrame(imputed_train_df)
        #rmse_MF = rmse_loss (ori_data_x, miss_f, data_m)
        #print('RMSE Performance: ' + str(np.round(rmse_MF, 6)))
        #np.savetxt("data/imputed_data_MF.csv",miss_f, delimiter=',',  fmt='%d')
        #print( 'Save results in Imputed_data_MF.csv')

        # MICE From Auto Impute
        #print()
        #print('=== MICE of Auto Impute RMSE ===')
        #data_mice = pd.DataFrame(miss_data_x)
        #mi = MiceImputer(k=1, imp_kwgs=None, n=1, predictors='all', return_list=True,
        #      seed=None, strategy='interpolate', visit='default')
        #mice_out = mi.fit_transform(data_mice)
        #c = [list(x) for x in mice_out]
        #c1= c[0]
        #c2=c1[1]
        #c3=np.asarray(c2)
        #mice_x=c3
        #print('here :', mice_x, miss_f, miss_f.shape)
        #rmse_MICE = rmse_loss (ori_data_x, mice_x, data_m)
        #print('=== MICE of Auto Impute RMSE ===')
        #print('RMSE Performance: ' + str(np.round(rmse_MICE, 6)))
        #np.savetxt("data/imputed_data_MICE.csv",mice_x, delimiter=',',  fmt='%d')
        #print( 'Save results in Imputed_data_MICE.csv')

    return imputed_data_mf, rmse_mf
Пример #2
0
def main(args):
    '''Main function for UCI letter and spam datasets.
  
  Args:
    - data_name: letter or spam
    - miss_rate: probability of missing components
    - batch:size: batch size
    - hint_rate: hint rate
    - alpha: hyperparameter
    - iterations: iterations
    
  Returns:
    - imputed_data_x: imputed data
    - rmse: Root Mean Squared Error
  '''

    data_name = args.data_name
    miss_rate = args.miss_rate
    random = args.seed
    time = args.time

    gain_parameters = {
        'batch_size': args.batch_size,
        'hint_rate': args.hint_rate,
        'alpha': args.alpha,
        'iterations': args.iterations,
        'time': args.time
    }
    # Load data and introduce missingness

    #ori_data_x, miss_data_x, data_m = data_loader2(data_name, miss_rate,random)


    gan_rs, egain_rs, mice_rs,miss_rs, gan_mlp, gan_dt, egan_mlp, egan_dt = [],[],[],[],[],[],[],[]
    gan_svc, egan_svc, gan_lr, egan_lr, gan_sgd, egan_sgd, gan_gau, egan_gau = [],[],[],[],[],[],[],[]
    knn_rmse, mean_rmse, miss_rmse, mice_rmse = [], [], [], []
    knn_lr, knn_svc, knn_sgd, mean_lr, mean_svc, mean_sgd =    [],[],[],[],[],[]
    miss_lr, miss_svc, miss_sgd, mice_lr, mice_svc, mice_sgd = [],[],[],[],[],[]

    for i in range(time):
        # Load data and introduce missingness
        ori_data_x, miss_data_x, data_m, y = data_loader3(
            data_name, miss_rate, i)
        train_idx, test_idx = train_test_split(range(len(y)),
                                               test_size=0.3,
                                               stratify=y,
                                               random_state=42)
        miss_data_x2 = miss_data_x * 10000
        if i % 5 == 0:
            print('=== Working on {}/{} ==='.format(i, time))

        # Impute missing data
        imputed_data_x1 = gain(miss_data_x2, gain_parameters)
        imputed_data_x_e1 = egain(miss_data_x2, gain_parameters)
        imputed_data_x = imputed_data_x1 * 1 / 10000
        imputed_data_x_e = imputed_data_x_e1 * 1 / 10000

        imp_MEAN = SimpleImputer(missing_values=np.nan, strategy='mean')
        imputed_data_x_mean = imp_MEAN.fit_transform(miss_data_x)
        #imputed_data_x_mean = imp_MEAN.fit_transform(miss_data_x2)  *1/10000

        imp_KNN = KNNImputer(missing_values=np.nan)
        imputed_data_x_knn = imp_KNN.fit_transform(miss_data_x)  # *1/10000

        imp_mf = IterativeImputer(estimator=DecisionTreeRegressor(),
                                  max_iter=3)  #20
        imputed_data_mf = imp_mf.fit_transform(miss_data_x)  #*1/10000

        imp_mice = IterativeImputer(estimator=BayesianRidge(), max_iter=3)  #20
        imputed_data_mice = imp_mice.fit_transform(miss_data_x)  #*1/10000

        # Report the RMSE performance
        rmse = rmse_loss(ori_data_x, imputed_data_x, data_m)
        rmse_e = rmse_loss(ori_data_x, imputed_data_x_e, data_m)
        rmse_mean = rmse_loss(ori_data_x, imputed_data_x_mean, data_m)
        rmse_knn = rmse_loss(ori_data_x, imputed_data_x_knn, data_m)
        rmse_mf = rmse_loss(ori_data_x, imputed_data_mf, data_m)
        rmse_mice = rmse_loss(ori_data_x, imputed_data_mice, data_m)

        gan_rs.append(rmse)
        egain_rs.append(rmse_e)

        mean_rmse.append(rmse_mean)
        knn_rmse.append(rmse_knn)
        mice_rmse.append(rmse_mice)
        miss_rmse.append(rmse_mf)

        mi_data = miss_data_x.astype(float)
        no, dim = imputed_data_mice.shape
        miss_data = np.reshape(mi_data, (no, dim))
        np.savetxt("data/missing_data.csv",
                   mi_data,
                   delimiter=',',
                   fmt='%1.2f')
        np.savetxt("data/imputed_data_gain.csv",
                   imputed_data_x,
                   delimiter=',',
                   fmt='%d')
        np.savetxt("data/imputed_data_egain.csv",
                   imputed_data_x_e,
                   delimiter=',',
                   fmt='%d')

        imputed_data_x, _ = normalization(imputed_data_x)
        imputed_data_x_e, _ = normalization(imputed_data_x_e)

        imputed_data_x_mean, _ = normalization(imputed_data_x_mean)
        imputed_data_x_knn, _ = normalization(imputed_data_x_knn)
        imputed_data_mf, _ = normalization(imputed_data_mf)
        imputed_data_mice, _ = normalization(imputed_data_mice)

        #gan_score_mlp  = clf_MLP(imputed_data_x  , y, train_idx, test_idx)
        #egan_score_mlp = clf_MLP(imputed_data_x_e, y, train_idx, test_idx)
        #gan_mlp.append(gan_score_mlp)
        #egan_mlp.append(egan_score_mlp)

        #gan_score_dt   = clf_DT(imputed_data_x    , y, train_idx, test_idx)
        #egan_score_dt  = clf_DT(imputed_data_x_e  , y, train_idx, test_idx)
        #gan_dt.append(gan_score_dt)
        #egan_dt.append(egan_score_dt)

        gan_score_lr = clf_LR(imputed_data_x, y, train_idx, test_idx)
        egan_score_lr = clf_LR(imputed_data_x_e, y, train_idx, test_idx)

        mean_score_lr = clf_LR(imputed_data_x_mean, y, train_idx, test_idx)
        knn_score_lr = clf_LR(imputed_data_x_knn, y, train_idx, test_idx)
        miss_score_lr = clf_LR(imputed_data_mf, y, train_idx, test_idx)
        mice_score_lr = clf_LR(imputed_data_mice, y, train_idx, test_idx)

        gan_lr.append(gan_score_lr)
        egan_lr.append(egan_score_lr)
        mean_lr.append(mean_score_lr)
        knn_lr.append(knn_score_lr)
        miss_lr.append(miss_score_lr)
        mice_lr.append(mice_score_lr)

        mean_score_svc = clf_SVC(imputed_data_x_mean, y, train_idx, test_idx)
        knn_score_svc = clf_SVC(imputed_data_x_knn, y, train_idx, test_idx)
        miss_score_svc = clf_SVC(imputed_data_mf, y, train_idx, test_idx)
        mice_score_svc = clf_SVC(imputed_data_mice, y, train_idx, test_idx)
        mean_svc.append(mean_score_svc)
        knn_svc.append(knn_score_svc)
        miss_svc.append(miss_score_svc)
        mice_svc.append(mice_score_svc)

        gan_score_svc = clf_SVC(imputed_data_x, y, train_idx, test_idx)
        egan_score_svc = clf_SVC(imputed_data_x_e, y, train_idx, test_idx)
        gan_svc.append(gan_score_svc)
        egan_svc.append(egan_score_svc)

        mean_score_sgd = clf_SGD(imputed_data_x_mean, y, train_idx, test_idx)
        knn_score_sgd = clf_SGD(imputed_data_x_knn, y, train_idx, test_idx)
        miss_score_sgd = clf_SGD(imputed_data_mf, y, train_idx, test_idx)
        mice_score_sgd = clf_SGD(imputed_data_mice, y, train_idx, test_idx)
        mean_sgd.append(mean_score_sgd)
        knn_sgd.append(knn_score_sgd)
        miss_sgd.append(miss_score_sgd)
        mice_sgd.append(mice_score_sgd)

        gan_score_sgd = clf_SGD(imputed_data_x, y, train_idx, test_idx)
        egan_score_sgd = clf_SGD(imputed_data_x_e, y, train_idx, test_idx)
        gan_sgd.append(gan_score_sgd)
        egan_sgd.append(egan_score_sgd)

        #gan_score_gau   = clf_GAU(imputed_data_x    , y, train_idx, test_idx)
        #egan_score_gau  = clf_GAU(imputed_data_x_e  , y, train_idx, test_idx)
        #gan_gau.append(gan_score_gau)
        #egan_gau.append(egan_score_gau)

    print()
    print("Datasets: ", data_name)
    #print(gan_rs,egain_rs, mice_rs,miss_rs)
    print('RMSE  GAIN: {} ± {}'.format(round(np.mean(gan_rs) * 1, 2),
                                       round(np.std(gan_rs), 4)))
    print('RMSE EGAIN: {} ± {}'.format(round(np.mean(egain_rs) * 1, 2),
                                       round(np.std(egain_rs), 4)))
    print('RMSE  MEAN: {} ± {}'.format(round(np.mean(mean_rmse) * 1, 2),
                                       round(np.std(mean_rmse), 4)))
    print('RMSE   KNN: {} ± {}'.format(round(np.mean(knn_rmse) * 1, 2),
                                       round(np.std(knn_rmse), 4)))
    print('RMSE  MICE: {} ± {}'.format(round(np.mean(mice_rmse) * 1, 2),
                                       round(np.std(mice_rmse), 4)))
    print('RMSE MFORE: {} ± {}'.format(round(np.mean(miss_rmse) * 1, 2),
                                       round(np.std(miss_rmse), 4)))
    #print()
    #print('MLP   GAIN: {} ± {}'.format(round(np.mean(gan_mlp)*1,2), round(np.std(gan_mlp),4)))
    #print('MLP  EGAIN: {} ± {}'.format(round(np.mean(egan_mlp)*1,2), round(np.std(egan_mlp),4)))
    #print()
    #print('DT    GAIN: {} ± {}'.format(round(np.mean(gan_dt)*1,2), round(np.std(gan_dt),4)))
    #print('DT   EGAIN: {} ± {}'.format(round(np.mean(egan_dt)*1,2), round(np.std(egan_dt),4)))
    print()
    print('LR    GAIN: {} ± {}'.format(round(np.mean(gan_lr) * 1, 2),
                                       round(np.std(gan_lr), 4)))
    print('LR   EGAIN: {} ± {}'.format(round(np.mean(egan_lr) * 1, 2),
                                       round(np.std(egan_lr), 4)))
    print('LR    MEAN: {} ± {}'.format(round(np.mean(mean_lr) * 1, 2),
                                       round(np.std(mean_lr), 4)))
    print('LR     KNN: {} ± {}'.format(round(np.mean(knn_lr) * 1, 2),
                                       round(np.std(knn_lr), 4)))
    print('LR    MICE: {} ± {}'.format(round(np.mean(mice_lr) * 1, 2),
                                       round(np.std(mice_lr), 4)))
    print('LR MISSFOR: {} ± {}'.format(round(np.mean(miss_lr) * 1, 2),
                                       round(np.std(miss_lr), 4)))
    print()
    print('SVC   GAIN: {} ± {}'.format(round(np.mean(gan_svc) * 1, 2),
                                       round(np.std(gan_svc), 4)))
    print('SVC  EGAIN: {} ± {}'.format(round(np.mean(egan_svc) * 1, 2),
                                       round(np.std(egan_svc), 4)))
    print('SVC   MEAN: {} ± {}'.format(round(np.mean(mean_svc) * 1, 2),
                                       round(np.std(mean_svc), 4)))
    print('SVC    KNN: {} ± {}'.format(round(np.mean(knn_svc) * 1, 2),
                                       round(np.std(knn_svc), 4)))
    print('SVC   MICE: {} ± {}'.format(round(np.mean(mice_svc) * 1, 2),
                                       round(np.std(mice_svc), 4)))
    print('SVC   MISS: {} ± {}'.format(round(np.mean(miss_svc) * 1, 2),
                                       round(np.std(miss_svc), 4)))
    print()
    print('SGD   GAIN: {} ± {}'.format(round(np.mean(gan_sgd) * 1, 2),
                                       round(np.std(gan_sgd), 4)))
    print('SGD  EGAIN: {} ± {}'.format(round(np.mean(egan_sgd) * 1, 2),
                                       round(np.std(egan_sgd), 4)))
    print('SGD   MEAN: {} ± {}'.format(round(np.mean(mean_sgd) * 1, 2),
                                       round(np.std(mean_sgd), 4)))
    print('SGD    KNN: {} ± {}'.format(round(np.mean(knn_sgd) * 1, 2),
                                       round(np.std(knn_sgd), 4)))
    print('SGD   MICE: {} ± {}'.format(round(np.mean(mice_sgd) * 1, 2),
                                       round(np.std(mice_sgd), 4)))
    print('SGD   MISS: {} ± {}'.format(round(np.mean(miss_sgd) * 1, 2),
                                       round(np.std(miss_sgd), 4)))
    #print()
    #print('GAU   GAIN: {} ± {}'.format(round(np.mean(gan_gau)*1,2), round(np.std(gan_dt),4)))
    #print('GAU  EGAIN: {} ± {}'.format(round(np.mean(egan_gau)*1,2), round(np.std(egan_dt),4)))

    # MissForest

    #print()
    #print('=== MissForest RMSE ===')
    #data = miss_data_x
    #imp_mean = MissForest(max_iter = 1)
    #miss_f = imp_mean.fit_transform(data)
    #miss_f = pd.DataFrame(imputed_train_df)
    #rmse_MF = rmse_loss (ori_data_x, miss_f, data_m)
    #print('RMSE Performance: ' + str(np.round(rmse_MF, 6)))
    #np.savetxt("data/imputed_data_MF.csv",miss_f, delimiter=',',  fmt='%d')
    #print( 'Save results in Imputed_data_MF.csv')

    # MICE From Auto Impute
    #print()
    #print('=== MICE of Auto Impute RMSE ===')
    #data_mice = pd.DataFrame(miss_data_x)
    #mi = MiceImputer(k=1, imp_kwgs=None, n=1, predictors='all', return_list=True,
    #      seed=None, strategy='interpolate', visit='default')
    #mice_out = mi.fit_transform(data_mice)
    #c = [list(x) for x in mice_out]
    #c1= c[0]
    #c2=c1[1]
    #c3=np.asarray(c2)
    #mice_x=c3
    #print('here :', mice_x, miss_f, miss_f.shape)
    #rmse_MICE = rmse_loss (ori_data_x, mice_x, data_m)
    #print('=== MICE of Auto Impute RMSE ===')
    #print('RMSE Performance: ' + str(np.round(rmse_MICE, 6)))
    #np.savetxt("data/imputed_data_MICE.csv",mice_x, delimiter=',',  fmt='%d')
    #print( 'Save results in Imputed_data_MICE.csv')

    return imputed_data_mf, rmse_mf