示例#1
0
        data[3 * index2].append(mahalanobis[0])
        data[3 * index2 + 1].append(mean(mahalanobis[:3]))
        data[3 * index2 + 2].append(mean(mahalanobis[:5]))

for index, m in enumerate(last[2:]):
    for index2, (o, to) in enumerate(zip(outliers, true_outliers)):
        filename = constant + o + m
        print(filename)
        headers = [
            "Distance", "file", "k", "m2", "time", "precision", "reported"
        ]
        df = pd.read_csv(filename, header=None, names=headers)
        df["recall"] = df["precision"] * df["reported"] / to
        df["f1"] = 2 * df["precision"] * df["recall"] / (df["precision"] +
                                                         df["recall"])

        group_by_distance = df.groupby(df.Distance)
        mahalanobis_df = group_by_distance.get_group("mahalanobis")
        mahalanobis = sorted(list(mahalanobis_df["f1"]), reverse=True)
        print(mahalanobis)
        data[3 * index2].append(mahalanobis[0])
        data[3 * index2 + 1].append(mean(mahalanobis[:3]))
        data[3 * index2 + 2].append(mean(mahalanobis[:5]))

results = pd.DataFrame(index=names, columns=methods, data=data)
ranks = autorank(results, alpha=0.01)
create_report(ranks)
x = plot_stats(ranks, allow_insignificant=True)
x.get_figure().savefig("output2/results/mahalanobis_small_insignificant.eps",
                       format="eps",
                       bbox_inches="tight")
示例#2
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from autorank import autorank, create_report, plot_stats, latex_table

np.random.seed(42)
pd.set_option('display.max_columns', 7)
std = 0.3
means = [0.2, 0.3, 0.5, 0.8, 0.85, 0.9]
sample_size = 50
data = pd.DataFrame()
for i, mean in enumerate(means):
    data['pop_%i' % i] = np.random.normal(mean, std, sample_size).clip(0, 1)

res = autorank(data, alpha=0.05, verbose=False)
print(res)
create_report(res)
plot_stats(res)
plt.show()
latex_table(res)