def verify_maldist_equivalence(dataset):
    # 马氏距离的初始定义
    dist_original = mahal_dist(dataset)
    # 根据数值大小,对数据集索引降序排列
    indices_desc_original = np.argsort(-dist_original)
    
    # 马氏距离的变体
    dist_variant = mahal_dist_variant(dataset)
    # 根据数值大小,对数据集索引降序排列
    indices_desc_variant = np.argsort(-dist_variant)
    
    assert not np.allclose(dist_original, dist_variant), '马氏距离及其变体返回的数值一般不相等'
    indices_verify_result = np.allclose(indices_desc_original, indices_desc_variant)
    return indices_verify_result
def predict_anomaly_indices(X, contamination):
    # 孤立森林
    iforest = IsolationForest(n_estimators=125,
                              contamination=contamination,
                              behaviour='new',
                              random_state=2018,
                              n_jobs=-1)
    iforest_result = iforest.fit_predict(X)
    anomaly_num = len(np.where(iforest_result == -1)[0])
    # 分数越小于0,越有可能是异常值
    anomaly_score = iforest.decision_function(X)
    if_idx = np.argsort(anomaly_score)[:anomaly_num]

    # LOF
    lof = LocalOutlierFactor(contamination=contamination,
                             p=2,
                             novelty=False,
                             n_jobs=-1)
    lof.fit(X)
    score = -lof.negative_outlier_factor_
    lof_idx = np.argsort(-score)[:anomaly_num]

    # RobustPCC
    rpcc = rp.RobustPCC(X, X, gamma=0.01, quantile=99)
    rpcc_idx = rpcc.test_anomaly_idx()[:anomaly_num]

    # 马氏距离
    dist = md.mahal_dist(X)
    md_idx = np.argsort(-dist)[:anomaly_num]

    # LinearPCA重构
    pre = rep.PCA_Recon_Error(X, contamination=contamination)
    pre_idx = pre.anomaly_idx()

    # KernelPCA重构
    kre = rek.KPCA_Recon_Error(X, contamination=contamination)
    kre_idx = kre.anomaly_idx()

    # 返回预测出的异常样本索引
    anomaly_indices = [if_idx, lof_idx, rpcc_idx, md_idx, kre_idx, pre_idx]
    return np.array(anomaly_indices)
def predict_anomaly_indices(X, contamination):

    # 孤立森林
    iforest = IsolationForest(n_estimators=125, contamination=contamination, 
                              behaviour='new', random_state=2018, n_jobs=-1)
    # Returns -1 for outliers and 1 for inliers.
    iforest_pred = iforest.fit_predict(X)
    iforest_result = np.array([1 if pred==-1 else 0 for pred in iforest_pred])

    # LOF
    lof = LocalOutlierFactor(contamination=contamination, p=2, novelty=False, n_jobs=-1)
    # Returns -1 for outliers and 1 for inliers.
    lof_pred = lof.fit_predict(X)
    lof_result = np.array([1 if pred==-1 else 0 for pred in lof_pred])

    # 马氏距离
    dist = md.mahal_dist(X)
    anomaly_num = int(np.ceil(contamination * len(X)))
    md_idx = np.argsort(-dist)[:anomaly_num]
    mahal_result = np.array([1 if i in md_idx else 0 for i in range(len(X))])
       
    # RobustPCC
    rpcc = rp.RobustPCC(X, X, gamma=0.01, quantile=99, contamination=contamination)
    rpcc_result = rpcc.predict()  
         
    #LinearPCA重构
    pre = rep.PCA_Recon_Error(X, contamination=contamination)
    pre_result = pre.predict()
     
    ##KernelPCA重构
    kre = rek.KPCA_Recon_Error(X, contamination=contamination, kernel='linear')
    print('KernelPCA starts.')
    start = time.time()
    kre_result = kre.predict()
    end = time.time()
    print("KernelPCA cost time: {:.2f}s".format(end-start))
    
    anomaly_pred = [iforest_result, lof_result, mahal_result, pre_result, kre_result, rpcc_result]
    return np.array(anomaly_pred)