print('alpha = ' + str(alpha)) eta_1 = 1 / p.L eta_2 = 2 / (p.L + p.sigma) n_inner_iters = int(m * 0.05) n_svrg_iters = n_iters * 30 n_dgd_iters = n_iters * 30 batch_size = 5 distributed = [ DGD_tracking(p, n_iters=n_dgd_iters, eta=eta_1/10, x_0=x_0, W=W), EXTRA(p, n_iters=n_dgd_iters, eta=eta_1/2, x_0=x_0, W=W), ADMM(p, n_iters=n_iters, rho=1, x_0=x_0.mean(axis=1)), DANE(p, n_iters=n_iters, mu=mu, x_0=x_0.mean(axis=1)), ] network = [ NetworkSVRG(p, n_iters=n_svrg_iters, n_inner_iters=n_inner_iters, eta=eta_1/10, x_0=x_0, W=W, opt=1), NetworkSARAH(p, n_iters=n_svrg_iters, n_inner_iters=n_inner_iters, eta=eta_1/10, x_0=x_0, W=W, opt=1), NetworkDANE(p, n_iters=n_iters, mu=mu, x_0=x_0, W=W), ] exps = distributed + network res = run_exp(exps, kappa=kappa, max_iter=n_iters, name='linear_regression', n_process=4, save=True) plt.show()
n_iters = 1000 p = LinearRegression(n_agent, m, dim, noise_variance=1, kappa=kappa, prob=0.3) W, alpha = generate_mixing_matrix(p) x_0 = np.random.rand(dim, n_agent) eta = 2 / (p.L + p.sigma) inner_iters = [1, 2, 5, 10, 50, 100] batch_size = [1] params = [(k, 1, 0.05) for k in inner_iters] exps = [NetworkDANE(p, n_iters=n_iters, mu=mu, x_0=x_0, W=W)] \ + [NetworkSVRG(p, n_iters=n_iters, n_inner_iters=x[0], batch_size=x[1], eta=eta*x[2], x_0=x_0, W=W) for x in params] res = run_exp(exps, save=False, plot=False) table = np.zeros((len(inner_iters), len(batch_size) * 2 + 1)) table[:, 0] = inner_iters table[:, 0] /= m inner_iters_dict = {inner_iters[i]: i for i in range(len(inner_iters))} batch_size_dict = {batch_size[i]: i for i in range(len(batch_size))} for x in res[1:]: y = x.get_results() if len(y['func_error']) < n_iters and y['func_error'][-1] < 1: # Converged table[inner_iters_dict[x.n_inner_iters], batch_size_dict[x.batch_size] * 2 + 1] = len(y['func_error']) - 1 table[inner_iters_dict[x.n_inner_iters], batch_size_dict[x.batch_size] * 2 + 2] = y['n_grad'][-1]
W=W, verbose=True), NetworkSARAH(p, n_iters=n_gd_iters, n_inner_iters=n_inner_iters, eta=0.1, x_0=x_0, W=W, verbose=True), ] exps = distributed + network begin = time.time() res_list = run_exp(exps, max_iter=n_iters, name='nn', n_process=1, plot=False, save=True) end = time.time() print('Total {:.2f}s'.format(end - begin)) print("Initial accuracy = " + str(p.accuracy(x_0.mean(axis=1)))) max_iter = max(n_iters, n_gd_iters, n_dsgd_iters) + 1 table = np.zeros((max_iter, len(exps) * 3)) for k in range(len(res_list)): res = res_list[k].get_results() for i in range(len(res['history'])): x = res['history'][i]['x'] if len(x.shape) == 2:
(50, 0.05), (100, 0.05), (300, 0.02), (500, 0.01), (700, 0.01), (900, 0.01), ] inner_iters = [x[0] for x in params] exps = [ NetworkSVRG(p, n_iters, n_inner_iters=x[0], eta=eta * x[1], x_0=x_0, W=W) for x in reversed(params) ] res = run_exp(exps, kappa=kappa, max_iter=n_iters, name='linear_regression') table = np.zeros(len(inner_iters)) inner_iters_dict = {inner_iters[i]: i for i in range(len(inner_iters))} for x in res: y = x.get_results() if len(y['func_error']) < n_iters and y['func_error'][-1] < 1: # Converged table[inner_iters_dict[x.n_inner_iters]] = len(y['func_error']) - 1 else: # Didn't converge table[inner_iters_dict[x.n_inner_iters]] = None plt.figure() plt.semilogy([x / m for x in inner_iters], table) plt.xlabel('K/m') plt.ylabel('#iters till converge')
eta_2 = 2 / (p.L + p.sigma) n_inner_iters = int(m * 0.05) n_mix = list(range(1, 20)) + [20, 25, 30, 35, 50] exps_dane = [ NetworkDANE(p, n_iters=n_iters, n_mix=n, mu=mu, x_0=x_0, W=W) for n in n_mix ] exps_svrg = [ NetworkSVRG(p, n_iters=n_iters, n_mix=n, n_inner_iters=n_inner_iters, eta=eta_1 / 10, x_0=x_0, W=W) for n in n_mix ] exps = exps_dane + exps_svrg res = run_exp(exps, kappa=kappa, max_iter=n_iters, name='extra_comm_alpha_' + str(alpha), save=True) plt.show()
verbose=True), NetworkDANE(p, n_iters=n_iters, mu=mu, x_0=x_0, W=W, verbose=True), ] # exps = [ # NetworkGD(p, n_iters=n_dgd_iters, eta=eta, x_0=x_0, W=W), # NetworkSVRG(p, n_iters=n_svrg_iters, n_inner_iters=n_inner_iters, eta=eta/20, mu=0, x_0=x_0, W=W, batch_size=batch_size, verbose=True), # NetworkSARAH(p, n_iters=n_svrg_iters, n_inner_iters=n_inner_iters, eta=eta/20, mu=0, x_0=x_0, W=W, batch_size=batch_size, verbose=True), # NetworkDANE(p, n_iters=n_iters, mu=1e-2, x_0=x_0, W=W, verbose=True), # ] exps = single_machine + distributed + network res = run_exp(exps, kappa=kappa, max_iter=n_iters, name='gisette', n_process=1, save=True) tmp = [x.get_results() for x in res if x.get_name() == 'NetworkDANE'][0] k = np.exp( np.log(tmp['func_error'][-1] / tmp['func_error'][0]) / len(tmp['func_error'])) print('NetworkDANE\'s convergence rate is: ' + str(k)) print('1 - 1/ (2 kappa) = ' + str(1 - 1 / (2 * kappa))) def accuracy(w): if len(w.shape) > 1: w = w.mean(axis=1) Y_hat = p.X_val.dot(w)
# Star topology p.generate_star_graph() W, alpha = generate_mixing_matrix(p) alpha_list.append(alpha) exps += [ NetworkDANE(p, n_iters=n_iters, mu=1, x_0=x_0, W=W), NetworkSVRG(p, n_iters=n_svrg_iters, n_inner_iters=n_inner_iters, eta=eta_1 / 40, x_0=x_0, W=W), ] res = run_exp(exps, n_process=1, save=False, plot=False) save(res[0:2], 0, 'centered') save(res[2:4], alpha_list[1], 'er') save(res[4:6], alpha_list[2], 'ring') save(res[6:8], alpha_list[3], 'grid') save(res[8:10], alpha_list[4], 'star') end = time.time() print('Total running time is {:.2f}s'.format(end - start)) prop_cycle = plt.rcParams['axes.prop_cycle'] colors = prop_cycle.by_key()['color'] plt.figure(0) plt.figure(1)