def run_nonlinear_transformation(): ''' Run linear regression on standard and transformed feature set, and compute Ein, Ein for the transformed set, and Eout for the transformed set ''' t_set = read_data_set() N_points = len(t_set) wlin,X,y = linear_regression(N_points,t_set) Ein = compute_Ein(wlin, X, y) #transform the training data into the following nonlinear feature vector: #(1; x1; x2; x1^2; x2^2; x1x2; |x1-x2|; |x1+x2|) t_set_trans = transform_t_set(t_set) wtrans, Xtrans, ytrans = linear_regression(N_points,t_set_trans) Eintrans = compute_Ein(wtrans,Xtrans,ytrans) h_vector = sign(dot(Xtrans,wtrans)) Eout = compute_Eout_nonlineartrans(wtrans,N_points) print 'Ein: ' + str(Ein) print 'Ein Transformed: ' + str(Eintrans) print 'Eout: ' + str(Eout)
def cross_validation(x, y, partition, regularizer, degree=30): ''' For each element in partition we split the data (x, y) into a training partition and a test partition. Then we perform linear regression for the given regularization parameter to calculate the optimal coefficients for the current training set and test set. Finally we average all coefficients and errors. Remark: degree is the maximal degree of our interpolation polynomial. ''' averaged_coeff = 0 averaged_training_error = 0 averaged_test_error = 0 for count, test_subset in enumerate(partition): ''' Create training set by combining all non-test sets ''' train_subset = [] for k in range(len(partition)): if k != count: train_subset += partition[k] x_train, y_train = x[train_subset], y[train_subset] x_test, y_test = x[test_subset], y[test_subset] coefficients = linear_regression(x_train, y_train, degree, regularizer=regularizer) train_error = RMSerror(x_train, y_train, coefficients) test_errors = RMSerror(x_test, y_test, coefficients) averaged_coeff += coefficients averaged_training_error += train_error averaged_test_error += test_errors averaged_coeff /= len(partition) averaged_training_error /= len(partition) averaged_test_error /= len(partition) return averaged_coeff, averaged_training_error, averaged_test_error
def run_LR_validation(t_set, v_set, k): ''' Run linear regression on given feature set, and compute Ein and Eout ''' N_points = len(t_set) 'run LR on the training set' w, Xtrans, y = linear_regression(N_points, t_set) ' compute Ein using the validation set' valid_set = input_data_matrix(v_set) y_vector = target_vector(v_set) Eval = compute_Eval(w, valid_set, y_vector) 'using the weights, compute Eval' Eout = compute_Eout(w, k) print 'k:' + str(k) print 'Eval: ' + str(Eval) print 'Eout: ' + str(Eout)
def linear_time_invariant(): # define closed_track track_spec = np.array([ [3, 0], [np.pi / 2 * 1.5, -1.5], [2, 0], [np.pi / 2 * 1.5, -1.5], [6, 0], [np.pi / 2 * 1.5, -1.5], [2.0, 0], [np.pi / 2 * 1.5, -1.5], ]) track_width = 1.0 track = racing_env.ClosedTrack(track_spec, track_width) # setup ego car ego = racing_car.DynamicBicycleModel( name="ego", param=racing_car.CarParam(edgecolor="black")) ego.set_state_curvilinear(np.array([0.3, 0, 0, 0, 0, 0])) ego.set_state_global(np.array([0.3, 0, 0, 0, 0, 0])) ego.set_ctrl_policy(policy.PIDTracking(vt=0.5)) ego.ctrl_policy.set_timestep(0.1) # setup simulation simulator = racing_sim.CarRacingSim() simulator.set_timestep(0.1) simulator.set_track(track) simulator.add_vehicle(ego) simulator.sim(sim_time=500.0) # calculate linearized dynamics xdata = np.stack(simulator.vehicles["ego"].closedloop_x, axis=0) udata = np.stack(simulator.vehicles["ego"].closedloop_u, axis=0) lamb = 1e-9 matrix_A, matrix_B, error = utils.linear_regression(xdata, udata, lamb) np.savetxt("data/track_spec/default.csv", track_spec, delimiter=",") np.savetxt("data/sys/LTI/matrix_A.csv", matrix_A, delimiter=",") np.savetxt("data/sys/LTI/matrix_B.csv", matrix_B, delimiter=",") print(matrix_A) print(matrix_B)
def run_linear_regression(N_samples,N_points): '''runs on N_samples and with N_points a linear regression computes Ein by average of the samples as well as Eout ''' t_set = read_data_set() N_points = len(t_set) print 'Running Linear Regression on %s samples' %str(N_samples) print 'Each sample has %s data points' %str(N_points) Ein = 0 Eout = 0 for i in range(N_samples): wlin,X,y = linear_regression(N_points,t_set) Ein = compute_Ein(wlin,X,y) Eout = compute_Eout(wlin,N_points) print 'Ein: ' + str(Ein) print 'Eout: ' + str(Eout)
df = pd.concat([features, tr], axis=1) df = remove_outliers(df, q=10) y_fr = df["PERDAY FRENCH"] y_sp = df["PERDAY SPANISH"] X = df.iloc[:, :-7] """ French only """ y = y_fr ols, scaler, X_test, y_test = linear_regression(X, y, test_size=0.1, random_state=210, plots=False) X_test_s = scaler.transform(X_test) y_pred = ols.predict(X_test_s) ols_residuals = y_test - y_pred #print("Coefficients: \n{}".format(ols.coef_)) print("French only") print(ols.score(X_test_s, y_test)) plt.figure() plt.scatter(y_test, y_pred) plt.plot(range(100, 5000), range(100, 5000), 'k-') plt.xlabel('True values')