print("a") np.set_printoptions(suppress=True) print("b") xy, y_score = load_data(config["task"]) print("c1") #y_score, best_score = normalize(y_score) print("c2") model, i, modelhistory = best_feedforward_model(xy, y_score, True) print("d") print("We take:") print(i) print("Best score for normalization scale:") #print(best_score) WineData = BaseData('data/winequality-red.csv', ';', 11, 10, config["nr_base_columns"], rifs=True) WineData.load() _ = evaluation_wrapper(config["task"], model, 'data/winequality-red.csv', True, WineData)
def evaluation_wrapper(task, model, _path, _print, eval_dataset): if task == Task.multivariate_time_series: BirthDeaths3 = BaseData(_path, ';', 3, 1, base_size=config["nr_base_columns"]) BirthDeaths3.load() evaluations = evaluation(BirthDeaths3.base_xy, add_eval_columns, eval_dataset.base_dataset[:, 11], model) if task == Task.regression: def get_wine_column(): content = open('data/wineexport.csv').read().split("\n") values = list(map(lambda line: line.split(";")[1], content)) return np.array(values).astype(float) wc = get_wine_column() import pdb pdb.set_trace() if config["budget_join"]: add_eval_columns = [ eval_dataset.base_dataset[:, 0], eval_dataset.base_dataset[:, 1], eval_dataset.base_dataset[:, 2], eval_dataset.base_dataset[:, 3], eval_dataset.base_dataset[:, 11], ] for i in np.arange(config["nr_add_columns_budget"] - len(add_eval_columns)): add_eval_columns.append( np.random.normal(0, 1, eval_dataset.base_dataset[:, 0].shape)) #add_eval_columns.append(np.ones(eval_dataset.base_dataset[:, 0].shape)) else: add_eval_columns = [eval_dataset.base_dataset[:, 0]] import pdb pdb.set_trace() evaluations = evaluation( eval_dataset.base_xy, #5 add_eval_columns, #1200 eval_dataset.base_dataset[:, 11], # model #model ) import pdb pdb.set_trace() import pdb pdb.set_trace() import pdb pdb.set_trace() #plot_performance([evaluations[1]], [evaluations[3]]) regr = linear_model.LinearRegression() regr.fit(eval_dataset.base_x, eval_dataset.base_y) regression_score_base_data = regr.score(eval_dataset.base_x, eval_dataset.base_y) indices = np.where(evaluations[0] > config["nfs_output_threshold"]) regr = linear_model.LinearRegression() add_columns = np.array(list(map(lambda c: c[0], eval_dataset.add_columns)))[indices[0], :] augmented_x = np.transpose( np.concatenate((np.transpose(eval_dataset.base_x, (1, 0)), add_columns), axis=0), (1, 0)) regr.fit(augmented_x, eval_dataset.base_y) regression_score_augmented = regr.score(augmented_x, eval_dataset.base_y) output = "" output += "Path: " + str(_path) output += "\nBase DataSet size: " + str(eval_dataset.base_x.shape) output += "\nScore for Base DataSet: " + str(regression_score_base_data) output += "\nColumns presented to NFS: " + str( config["nr_add_columns_budget"]) output += "\nColumns chosen by NFS by threshold: " + str(len(indices[0])) output += "\nAugmented DataSet size: " + str(augmented_x.shape) output += "\nScore for Augmented DataSet: " + str( regression_score_augmented) output += "\nNFS Time: " + str(evaluations[1]) output += "\nPearson Time: " + str(evaluations[3]) output += "\nConfig: " + str(config) f = open("output/" + str(time.time()).split(".")[0] + ".txt", "a") f.write(output) f.close() return _path, evaluations, regression_score_base_data, regression_score_augmented