def example1(): """ Optimize AllRange workload using PIdentity template and report the expected error """ print('Example 1') W = workload.AllRange(256) pid = templates.PIdentity(16, 256) res = pid.optimize(W) err = error.rootmse(W, pid.strategy()) err2 = error.rootmse(W, workload.Identity(256)) print(err, err2)
def calculate_workload_error2(wk, strategy, eps): # Deprecated expected_error = hdmm_error.rootmse(wk, strategy, eps=eps) per_query_error = hdmm_error.per_query_error_sampling(wk, strategy, 100000, eps, normalize=True) if np.var(per_query_error) < 1e-2: pdf = False pdf_x = False else: density = np.histogram(per_query_error, bins='auto', density=True) pdf = density[0].tolist() pdf_x = density[1][0:-1].tolist() res = { 'pdf': pdf, 'pdf_x': pdf_x, 'method': 'HDMM', 'expected_error': expected_error, 'num_query': int(wk.shape[0]), } return res
ax = sns.scatterplot(x=mean, y=std) ax.set(xlabel='Average Error', ylabel='Stdev Error', title=f'Avg vs Stdev of Error over re-optimizations, SF1') plt.savefig('error_variation_std.png') plt.show() if __name__ == '__main__': full = [ 'P1', 'P3', 'P4', 'P5', 'P8', 'P9', 'P10', 'P11', 'P12', 'P12A_I', 'PCT12', 'PCT12A_O' ] w_sf1_full = build_workload(full) print('Query count, full', w_sf1_full.shape[0]) stats = {} trials = 10 #150 strategies = [opt_strategy(w_sf1_full) for i in range(trials)] errors = [ np.sqrt(error.per_query_error(w_sf1_full, a)) for a in strategies ] stats['rootmse '] = [error.rootmse(w_sf1_full, a) for a in strategies] stats['max_query_err'] = [np.max(e) for e in errors] stats['min_query_err'] = [np.min(e) for e in errors] stats['mean_query_err'] = [np.mean(e) for e in errors] # mean_vs_max_error(w_sf1_full) mean_vs_max_error(w_sf1_full, trials=100)
temp = workload.Marginals.approximate(A_manual) A_manual_opt = templates.Marginals(domain, seed=1003) A_manual_opt._params = temp.weights A_manual_opt.optimize(W) A_manual_opt._params = np.clip(A_manual_opt._params, 0, float('inf')) print('Num queries:', W.shape[0]) print('Sensitivity:', W.sensitivity()) print('Marg', '\t\t', f'{error.rootmse(W, A_marg.strategy()):10.3f}') print('Ident_full', '\t', f'{error.rootmse(W, A_identity_full.strategy()):10.3f}') print('Identity', '\t', f'{error.rootmse(W, A_identity):10.3f}') print('Workload', '\t', f'{error.rootmse(W, A_wkld):10.3f}') print('Manual', '\t', f'{error.rootmse(W, A_manual):10.3f}') print('Manual Opt', '\t', f'{error.rootmse(W, A_manual_opt.strategy()):10.3f}') summarize_strategy(W, A_marg, domain) print('') print('') for m in W.matrices: print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_marg.strategy())) for m in W.matrices: print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_manual)) for m in W.matrices: print(m.base.key, m.base.shape[0], '\t', error.rootmse(m, A_manual_opt.strategy()))
A_scen2 = templates.Marginals(domain) A_scen2.optimize(W_scen2) # Here we analyze the error of scenario 2, which is what the user really wants # But we consider the strategy from scenario1 and compare with strategy derived from scenario2 print('Num queries:', W_scen2.shape[0]) print('Sensitivity:', W_scen2.sensitivity()) print('Per query RMSE, A_scen1', '\t\t', f'{error.rootmse(W_scen1, A_scen1.strategy()):10.3f}') print('Per query RMSE, A_scen2', '\t\t', f'{error.rootmse(W_scen1, A_scen2.strategy()):10.3f}') print('') summarize_strategy(W_scen1, A_scen1, domain) print('') summarize_strategy(W_scen1, A_scen2, domain) print('') for m in W_scen1.matrices: print( m.base.key, util.marginal_index_repr(m.base.key, dim, " "), m.base.shape[0], '\t', error.rootmse(m, A_scen1.strategy()), error.rootmse(m, A_scen2.strategy()), )
return A def marginal_strategy(workload=None): template = templates.Marginals((2, 2, 63, 8)) A = template.restart_optimize(workload, 25)[0] return A if __name__ == '__main__': W = pl94_workload(with_full_id=True) print(W.shape) A = opt_p_identity(workload=W) err = error.rootmse(W, A) print('KroneckerPIdentity', err) #W = Marginals.approximate(W) A = marginal_strategy(workload=W) marg_err = error.rootmse(W, A) #marg_err = np.sqrt( error.expected_error(W, A) / (3*3*64*9) ) print('Marginals', marg_err) robert_err = error.rootmse(W, manual_strategy()) print('Robert', robert_err) print(A.weights / A.weights.sum())
rmse = np.sqrt(np.mean(errors) / W.shape[0]) print(rmse) return rmse if __name__ == '__main__': #x = synthetic_data(N=1000000) engine = 'wnnls' W = pl94_workload() A1 = opt_p_identity(W) A2 = marginal_strategy(W) A3 = manual_strategy() err1 = error.rootmse(W, A1) err2 = error.rootmse(W, A2) err3 = error.rootmse(W, A3) trials = 25 Ns = [10**k for k in range(1, 8)] errs1, errs2, errs3 = [], [], [] for N in Ns: x = synthetic_data(N) errs1.append(run_test(W, A1, x, trials=trials, engine=engine)) errs2.append(run_test(W, A2, x, trials=trials, engine=engine)) errs3.append(run_test(W, A3, x, trials=trials, engine=engine)) plt.plot(Ns, [err1] * len(Ns), 'b', label='KronPIdentity+LS') plt.plot(Ns, [err2] * len(Ns), 'r', label='Marginals+LS')