def sjlt_error_vs_iterations(): n = 6_000 d = 200 gamma_vals = [5] #[4,6,8] sketch_size = int(gamma_vals[0] * d) col_sparsities = [1, 4, 16] number_iterations = 20 # 40 #np.asarray(np.linspace(5,40,8), dtype=np.int) # Output dictionaries error_to_lsq = {} #{sketch_name : {} for sketch_name in sketches} error_to_truth = {} #{sketch_name : {} for sketch_name in sketches} for s in col_sparsities: error_to_lsq[s] = [] error_to_truth[s] = [] print(error_to_lsq) print(error_to_truth) X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0) # Least squares estimator x_opt = np.linalg.lstsq(X, y)[0] lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star))) for s in col_sparsities: col_sparsity = s print("Testing col sparsity: {}, num_iterations: {}".format( col_sparsity, number_iterations)) for sketch_method in sketches: #lsq_error, truth_error = 0,0 lsq_error = np.zeros((number_iterations, )) truth_error = np.zeros_like(lsq_error) my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity) for trial in range(NTRIALS): print('*' * 80) print("{}, trial: {}".format(sketch_method, trial)) x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors( number_iterations) for _ in range(x_iters.shape[1]): lsq_error[_] += prediction_error(X, x_iters[:, _], x_opt) truth_error[_] += prediction_error(X, x_iters[:, _], x_star) print(lsq_error) # lsq_error += prediction_error(X,x_ihs, x_opt) # truth_error += prediction_error(X,x_ihs, x_star) mean_lsq_error = lsq_error / NTRIALS mean_truth_error = truth_error / NTRIALS print(mean_lsq_error) # error_to_lsq[sketch_method][gamma].append(mean_lsq_error) # error_to_truth[sketch_method][gamma].append(mean_truth_error) error_to_lsq[s] = mean_lsq_error error_to_truth[s] = mean_truth_error pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_lsq) pretty.pprint(error_to_truth) # Save the dictionaries save_dir = '../../output/ihs_baselines//' np.save(save_dir + 'sjlt_error_sparsity_opt', error_to_lsq) np.save(save_dir + 'sjlt_error_sparsity_truth', error_to_truth)
def error_vs_dimensionality(): dimension = [2**i for i in range(4, 9)] METHODS = sketches + ['Exact', 'Sketch & Solve'] # Output dictionaries error_to_truth = {_: {} for _ in METHODS} for _ in METHODS: for d in dimension: error_to_truth[_][d] = 0 print(error_to_truth) for d in dimension: n = 100 * d print(f'TESTING {n},{d}') ii = dimension.index(d) sampling_rate = 10 num_iterations = 5 for method in METHODS: if method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 for trial in range(NTRIALS): # Generate the data X, y, x_star = gaussian_design_unconstrained(n, d, 1.0) if method is "Exact": print('Exact method.') x_hat = np.linalg.lstsq(X, y)[0] elif method is "Sketch & Solve": sketch_size = sampling_rate * num_iterations * d print(f"S&S with {sketch_size} sketch size") _sketch = rp(X, sketch_size, 'countSketch', col_sparsity) SA, Sb = _sketch.sketch_data_targets(y) x_hat = np.linalg.lstsq(SA, Sb)[0] else: sketch_size = sampling_rate * d print( f"Using {num_iterations} iterations, sketch_size {sketch_size} and {method}" ) my_ihs = ihs(X, y, method, sketch_size, col_sparsity) x_hat = my_ihs.ols_fit_new_sketch(num_iterations) error = (prediction_error(X, x_star, x_hat))**(0.5) error_to_truth[method][d] += error for _ in METHODS: for d in dimension: error_to_truth[_][d] /= NTRIALS error_to_truth['Dimensions'] = dimension pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_truth) save_dir = '../../output/ihs_baselines/' np.save(save_dir + 'error_vs_dims', error_to_truth)
def test_ols_new_sketch_per_iteration(all_sketch_methods): ''' Test that using IHS and generating a new sketch every iteration yields an approximation close to the true estimator.''' X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5) x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs for sketch_method in all_sketch_methods: my_ihs = ihs(X,y,sketch_method,500) x_ihs = my_ihs.ols_fit_new_sketch(iterations=20) x_ihs_track, error_track = my_ihs.ols_fit_new_sketch_track_errors(iterations=20) print(sketch_method, np.linalg.norm(x_ihs - x_opt)) print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}') assert np.allclose(x_opt,x_ihs) assert np.allclose(x_opt,x_ihs_track)
def test_ols_one_sketch_per_iteration(all_sketch_methods): ''' Test that using IHS and generating *A SINGLE* sketch yields an approximation close to the true estimator. Need a larger sketch compared to the test with a new sketch for every iteration''' X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5) x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs for sketch_method in all_sketch_methods: my_ihs = ihs(X,y,sketch_method,1000) x_ihs = my_ihs.ols_fit_one_sketch(iterations=50) x_ihs_track, error_track = my_ihs.ols_fit_one_sketch_track_errors(iterations=20) print(sketch_method, np.linalg.norm(x_ihs - x_opt)) print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}') #assert np.isclose(x_opt,x_ihs) np.testing.assert_array_almost_equal(x_ihs,x_opt) assert np.allclose(x_opt,x_ihs_track)
def test_lasso_solver_time(all_sketch_methods): ''' Tests that the lasso qp solver gives the same answers as the sklearn linear model. Generate the sklearn solution first, then take then norm and compare. nb. We don't compare to sklearn as there is not a clean matching between the regularising parameters so only check the global and iterative QPs agree. ''' X, y, x_star = gaussian_design_unconstrained(2000, 10, 1.0) n, d = X.shape ell_1_bound = 100.0 # _lambda = 100.0 # lassoModel = Lasso(alpha=1.0 ,max_iter=1000) # sklearn_X, sklearn_y = np.sqrt(n)*X, np.sqrt(n)*y # lassoModel.fit(sklearn_X, sklearn_y) # x_opt = lassoModel.coef_ x_opt = lasso_solver(X, y, ell_1_bound) x0 = np.zeros((d, )) for sketch_method in all_sketch_methods: my_ihs = ihs(X, y, sketch_method, 500) x_ihs_track, error_track = my_ihs.lasso_fit_new_sketch_timing( ell_1_bound, 1.5) final_sol_error = (1 / n) * np.linalg.norm( X @ (x_ihs_track - x_opt))**2 print( f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}' ) print("log Error to opt: {}".format(np.log(final_sol_error))) print(f"{error_track.shape[1]} iterations completed") print(np.c_[x_opt, x_ihs_track]) assert np.allclose(x_opt, x_ihs_track, 1E-1)
def error_vs_iterations(): n = 6_000 d = 200 gamma_vals = [5] number_iterations = 30 # Output dictionaries indexed by: # sketch method (sketches) --> sketch size (gamma_vals) --> STEPSIZE error_to_lsq = {sketch_name: {} for sketch_name in sketches} error_to_truth = {sketch_name: {} for sketch_name in sketches} for sketch_name in sketches: for gamma in gamma_vals: error_to_lsq[sketch_name][gamma] = {} error_to_truth[sketch_name][gamma] = {} for step in STEPSIZE: error_to_lsq[sketch_name][gamma][step] = [] error_to_truth[sketch_name][gamma][step] = [] X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0) # # Least squares estimator x_opt = np.linalg.lstsq(X, y)[0] print('-' * 80) print("Beginning test") lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star))) print(lsq_vs_truth_errors) for gamma in gamma_vals: sketch_size = int(gamma * d) print("Testing gamma: {}, num_iterations: {}".format( gamma, number_iterations)) for sketch_method in sketches: #lsq_error, truth_error = 0,0 lsq_error = np.zeros((number_iterations, )) truth_error = np.zeros_like(lsq_error) if sketch_method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity) for step in STEPSIZE: lsq_error = np.zeros((number_iterations, )) for trial in range(NTRIALS): print('*' * 80) print("{}, trial: {}".format(sketch_method, trial)) print('Step size: ', step) x_ihs, x_iters = my_ihs.ols_fit_one_sketch_track_errors( number_iterations, step) for _ in range(x_iters.shape[1]): residual = prediction_error(X, x_iters[:, _], x_opt) print('Trial {}, residual {}'.format(_, residual)) lsq_error[_] += residual # Sketching Error for this step size. frob_error = my_ihs.frob_error spec_error = my_ihs.spectral_error print('Frobenius error: ', frob_error) print('Spectral error: ', spec_error) mean_lsq_error = lsq_error / NTRIALS error_to_lsq[sketch_method][gamma][step] = mean_lsq_error pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_lsq) ### PLOTTING ### my_markers = ['.', 's', '^', 'D', '*', 'h'] my_colours = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5'] fig, ax = plt.subplots() x_vals = range(1, number_iterations + 1) for gamma in gamma_vals: for sketch_method in sketches: for i, step in enumerate(STEPSIZE): _marker = my_markers[i] _colour = my_colours[i] residual = error_to_lsq[sketch_method][gamma][step] ax.plot(x_vals, residual, label=step, marker=_marker, color=_colour) ax.set_yscale('log') ax.set_xticks(x_vals[1::2]) ax.set_xlabel("Iterations") ax.set_ylabel('$\| x^t - x_{\t{opt}}\|_A^2$') ax.legend(title='Step sizes' ) # nb this only makes sense for one sketch dimension ax.set_title('{}, m={}d, step size varied'.format(sketches[0], gamma)) plt.show()
def solution_error_vs_row_dim(): ''' Increase `n` the input dimension of the problem and measure the solution error in both: (i) Euclidean norm (`mean_square_error`) (ii) Prediction norm (`prediction_error`). Error measurements are taken with respect to: (i) the optimal solution x_opt (ii) the ground truth ''' print('Experimental setup:') print(f'IHS sketch size {SKETCH_SIZE}') print(f'Sketch and solve sketch size {CLASSICAL_SKETCH_SIZE}') print(f'Number of rounds {ROUNDS}') # Output dictionaries MSE_OPT = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } PRED_ERROR_OPT = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } MSE_TRUTH = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } PRED_ERROR_TRUTH = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } MSE_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) MSE_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) MSE_TRUTH['Exact'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_TRUTH['Exact'] = np.zeros(len(ROWDIMS), ) ## Experiment for n in ROWDIMS: print(f'Testing {n} rows') experiment_index = ROWDIMS.index(n) _iters = ROUNDS[experiment_index] ihs_sketch_size = SKETCH_SIZE classic_sketch_size = CLASSICAL_SKETCH_SIZE[experiment_index] for trial in range(NTRIALS): print("TRIAL {}".format(trial)) X, y, x_true = gaussian_design_unconstrained(n, D, variance=1.0) x_opt = np.linalg.lstsq(X, y)[0] for sketch_method in METHODS: print('*' * 80) if sketch_method in sketches or sketch_method == 'Sketch & Solve': if sketch_method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 if sketch_method == 'Sketch & Solve': _sketch = rp(X, classic_sketch_size, 'countSketch', col_sparsity) SA, Sb = _sketch.sketch_data_targets(y) x_ss = np.linalg.lstsq(SA, Sb)[0] MSE_OPT[sketch_method][ experiment_index] += mean_square_error( x_opt, x_ss) PRED_ERROR_OPT[sketch_method][ experiment_index] += prediction_error( X, x_opt, x_ss) MSE_TRUTH[sketch_method][ experiment_index] += mean_square_error( x_true, x_ss) PRED_ERROR_TRUTH[sketch_method][ experiment_index] += prediction_error( X, x_true, x_ss) else: print(f'{sketch_method} IHS') my_ihs = ihs(X, y, sketch_method, ihs_sketch_size, col_sparsity) x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors( _iters) x_errors = x_opt[:, None] - x_iters print(x_errors.shape) MSE_OPT[sketch_method][ experiment_index] += mean_square_error( x_opt, x_ihs) PRED_ERROR_OPT[sketch_method][ experiment_index] += prediction_error( X, x_opt, x_ihs) MSE_TRUTH[sketch_method][ experiment_index] += mean_square_error( x_true, x_ihs) PRED_ERROR_TRUTH[sketch_method][ experiment_index] += prediction_error( X, x_true, x_ihs) else: # solve exactly #x_opt = np.linalg.lstsq(X,y)[0] MSE_TRUTH["Exact"][experiment_index] += mean_square_error( x_opt, x_true) PRED_ERROR_TRUTH["Exact"][ experiment_index] += prediction_error( X, x_opt, x_true) for _dict in [MSE_OPT, PRED_ERROR_OPT, MSE_TRUTH, PRED_ERROR_TRUTH]: for _key in _dict.keys(): _dict[_key] /= NTRIALS pretty = PrettyPrinter(indent=4) pretty.pprint(MSE_OPT) pretty.pprint(PRED_ERROR_OPT) pretty.pprint(MSE_TRUTH) pretty.pprint(PRED_ERROR_TRUTH) save_dir = '../../output/baselines/' np.save(save_dir + 'ihs_ols_mse_OPT', MSE_OPT) np.save(save_dir + 'ihs_ols_pred_error_OPT', PRED_ERROR_OPT) np.save(save_dir + 'ihs_ols_mse_TRUTH', MSE_TRUTH) np.save(save_dir + 'ihs_ols_pred_error_TRUTH', PRED_ERROR_TRUTH)