from util.stats import cdf_fit from math import log # algorithms = {} algorithms = {"DelaunayP1", "NeuralNetwork"} BOX_PLOTS = False MAX_ERROR_LINE = True if MAX_ERROR_LINE: test_func_name = "oscillatory" test_size = 1000 d = None for dimension in [2, 20]: file_name = f"final-{dimension}D-{test_size}-{test_func_name}-analytic.pkl" data = Data.load(file_name) data.add_column([dimension] * len(data), "Dimension", index=data.names.index("Train")) # Generate interesting extra columns. data["Abs Errors"] = ([float(abs(v)) for v in l] for l in data["Errors"]) data["Mean Abs Error"] = (sum(l) / len(l) for l in data["Abs Errors"]) data["Min Abs Error"] = (min(l) for l in data["Abs Errors"]) data["Max Abs Error"] = (max(l) for l in data["Abs Errors"]) if (d is None): d = data else: d += data if (len(algorithms) > 0): d = d[d["Algorithm"] == algorithms] print(d) d.save("all-data.pkl") print()
from util.plot import Plot, multiplot from util.stats import cdf_fit_func import numpy as np from util.data import Data # d = Data.load("knn_results_[yelp-mnist].pkl") d = Data.load("prediction_results.pkl") names = d.names.copy() names = [names[0], names[6], names[1]] + names[2:6] + names[7:] d.reorder([names[0], names[6], names[1]]) d.sort() d["Errors"] = ([float(v) if (v <= 5) else 5.0 for v in e] for e in d["Errors"]) d["Mean Error"] = (sum(e) / len(e) for e in d["Errors"]) d["Mean Squared Error"] = (sum(v**2 for v in e) / len(e) for e in d["Errors"]) d["Error Variance"] = (float(np.var(e)) for e in d["Errors"]) d._max_display = 1000 print(d) all_data = d # Get the unique dimensions and algorithms. dims = sorted(set(all_data["Dimension"])) algs = sorted(set(all_data["Algorithm"])) data_sets = sorted(set(all_data["Data"])) for ds in data_sets: for alg in algs: d = all_data[all_data["Data"] == ds] d = d[d["Algorithm"] == alg] min_index = int(np.argmin(d["Mean Error"])) m = d[min_index, "Method"]
from util.approximate import Voronoi from util.data import Data from util.system import save, load try: lengths = load() except: d = Data.load("raw_data.csv") lengths = [] for i in range(1, 11): print("i:", i) train = Data.load(f"{i:02d}-10_train.csv", sample=None) test = Data.load(f"{i:02d}-10_test.csv", sample=None) # Get the column ranges for inputs and outputs. in_idxs = list(range(len( train.names)))[:train.names.index("Recidivism Likelihood")] out_idxs = list(range(len( train.names)))[train.names.index("Recidivism Likelihood"):] # in_cols = train.names[:train.names.index("Recidivism Likelihood")] # out_cols = train.names[train.names.index("Recidivism Likelihood"):] # Get the matrices. train_mat = train.to_matrix() test_mat = test.to_matrix() train_x = train_mat[:, in_idxs] test_x = test_mat[:, out_idxs] m = Voronoi() m.fit(train_x) lengths += [len(ids) for ids, wts in m(test_x)] save(lengths) print("len(lengths): ", len(lengths))