x_set = preprocess.matrixify(data, 160) y_set = preprocess.get_y(data) error_list_closed = [] error_list_gradient = [] time_list_closed = [] time_list_gradient = [] # Computing our results feature_range = np.arange(160) for x in feature_range: current_x_set = x_set[:, :3 + x] print("using " + str(x) + " features") time_closed, performance_close = Evaluator.evaluate_closed_form( current_x_set, y_set) time_grad, performance_grad = Evaluator.gradient_evaluator( current_x_set, y_set) error_list_closed.append(performance_close) error_list_gradient.append(performance_grad) time_list_closed.append(time_closed) time_list_gradient.append(time_grad) # Plotting our results size = 5 plt.subplot(211) plt.title('MSE vs number of features') plt.scatter(feature_range, error_list_gradient,
from Evaluator import Evaluator from preprocess import Preprocess import json import feature_selector with open("../src/proj1_data.json") as fp: data = json.load(fp) preprocess1 = Preprocess() Preprocess.preprocess(data) num_words = 60 preprocess1.matrixify(data, num_words) y_set = Preprocess.get_y(data) children_length_inter = preprocess1.children_length_interaction(data) log_children_list = preprocess1.log_children(data) preprocess1.add_features(children_length_inter) preprocess1.add_features(log_children_list) x_set = preprocess1.feature_set x_optimal = feature_selector.backwardElimination(x_set,y_set,0.15) time, mse = Evaluator.evaluate_closed_form(x_optimal, y_set) print(mse) print(time)