def test_bounded(make_quadratic, make_random): random = make_random a, b, c, data, bounds = make_quadratic w0 = np.concatenate((random.randn(2), [1.5])) res = minimize(qobj, w0, args=(data, ), jac=True, bounds=bounds, method='L-BFGS-B') Ea_bfgs, Eb_bfgs, Ec_bfgs = res['x'] res = sgd(qobj, w0, data, bounds=bounds, eval_obj=True, random_state=random) Ea_sgd, Eb_sgd, Ec_sgd = res['x'] assert np.allclose((Ea_bfgs, Eb_bfgs, Ec_bfgs), (Ea_sgd, Eb_sgd, Ec_sgd), atol=5e-2, rtol=0)
def test_unbounded(make_quadratic, make_random): random = make_random a, b, c, data, _ = make_quadratic w0 = random.randn(3) assert_opt = lambda Ea, Eb, Ec: \ np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) for updater in [SGDUpdater, AdaDelta, AdaGrad, Momentum, Adam]: res = sgd(qobj, w0, data, eval_obj=True, updater=updater(), random_state=make_random) assert_opt(*res['x']) res = minimize(qobj, w0, args=(data, ), jac=True, method='L-BFGS-B') assert_opt(*res['x']) res = minimize(qfun, w0, args=(data, ), jac=qgrad, method='L-BFGS-B') assert_opt(*res['x']) res = minimize(qfun, w0, args=(data), jac=False, method=None) assert_opt(*res['x'])
def test_sgd_seqdata(make_quadratic): a, b, c, data, _ = make_quadratic y, x = data[:, 0], data[:, 1] w0 = np.random.randn(3) assert_opt = lambda Ea, Eb, Ec: \ np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) res = sgd(q_seq, w0, data=(x, y), eval_obj=True, random_state=randstate) assert_opt(*res['x'])
def test_sgd_seqdata(make_quadratic, make_random): random = make_random a, b, c, data, _ = make_quadratic y, x = data[:, 0], data[:, 1] w0 = random.randn(3) assert_opt = lambda Ea, Eb, Ec: \ np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) res = sgd(q_seq, w0, data=(x, y), eval_obj=True, random_state=make_random) assert_opt(*res['x'])
def test_bounded(make_quadratic, make_random): random = make_random a, b, c, data, bounds = make_quadratic w0 = np.concatenate((random.randn(2), [1.5])) res = minimize(qobj, w0, args=(data,), jac=True, bounds=bounds, method='L-BFGS-B') Ea_bfgs, Eb_bfgs, Ec_bfgs = res['x'] res = sgd(qobj, w0, data, bounds=bounds, eval_obj=True, random_state=random) Ea_sgd, Eb_sgd, Ec_sgd = res['x'] assert np.allclose((Ea_bfgs, Eb_bfgs, Ec_bfgs), (Ea_sgd, Eb_sgd, Ec_sgd), atol=5e-2, rtol=0)
def test_unbounded(make_quadratic, make_random): random = make_random a, b, c, data, _ = make_quadratic w0 = random.randn(3) assert_opt = lambda Ea, Eb, Ec: \ np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) for updater in [SGDUpdater, AdaDelta, AdaGrad, Momentum, Adam]: res = sgd(qobj, w0, data, eval_obj=True, updater=updater(), random_state=make_random) assert_opt(*res['x']) res = minimize(qobj, w0, args=(data,), jac=True, method='L-BFGS-B') assert_opt(*res['x']) res = minimize(qfun, w0, args=(data,), jac=qgrad, method='L-BFGS-B') assert_opt(*res['x']) res = minimize(qfun, w0, args=(data), jac=False, method=None) assert_opt(*res['x'])
def test_bounded(make_quadratic): a, b, c, data, bounds = make_quadratic w0 = np.concatenate((np.random.randn(2), [1.5])) res = minimize(qobj, w0, args=(data,), jac=True, bounds=bounds, method='L-BFGS-B') Ea_bfgs, Eb_bfgs, Ec_bfgs = res['x'] res = sgd(qobj, w0, data, bounds=bounds, eval_obj=True, gtol=1e-4, passes=1000, rate=0.95, eta=1e-6) Ea_sgd, Eb_sgd, Ec_sgd = res['x'] assert np.allclose((Ea_bfgs, Eb_bfgs, Ec_bfgs), (Ea_sgd, Eb_sgd, Ec_sgd), atol=1e-2, rtol=0) if nlopt_test: res = minimize(qobj, w0, args=(data, False), jac=False, bounds=bounds, method='LN_BOBYQA', backend='nlopt') Ea_bq, Eb_bq, Ec_bq = res['x'] assert np.allclose((Ea_bfgs, Eb_bfgs, Ec_bfgs), (Ea_bq, Eb_bq, Ec_bq), atol=1e-3, rtol=0)
def test_unbounded(make_quadratic): a, b, c, data, _ = make_quadratic w0 = np.random.randn(3) res = sgd(qobj, w0, data, eval_obj=True, gtol=1e-4, passes=1000, rate=0.95, eta=1e-7) Ea, Eb, Ec = res['x'] assert np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-2, rtol=0) res = minimize(qobj, w0, args=(data,), jac=True, method='L-BFGS-B') Ea, Eb, Ec = res['x'] assert np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) if nlopt_test: res = minimize(qobj, w0, args=(data, False), jac=False, method='LN_BOBYQA', backend='nlopt') Ea, Eb, Ec = res['x'] assert np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0) res = minimize(qobj, w0, args=(data, False), jac=False, method=None) Ea, Eb, Ec = res['x'] assert np.allclose((a, b, c), (Ea, Eb, Ec), atol=1e-3, rtol=0)
def sgd_demo(): # Settings batchsize = 100 var = 0.05 nPoints = 1000 nQueries = 500 passes = 200 min_grad_norm = 0.01 rate = 0.9 eta = 1e-5 # Create dataset X = np.linspace(0.0, 1.0, nPoints)[:, np.newaxis] Y = np.sin(2 * np.pi * X.flatten()) + np.random.randn(nPoints) * var centres = np.linspace(0.0, 1.0, 20)[:, np.newaxis] Phi = RadialBasis(centres)(X, 0.1) train_dat = np.hstack((Y[:, np.newaxis], Phi)) Xs = np.linspace(0.0, 1.0, nQueries)[:, np.newaxis] Yt = np.sin(2 * np.pi * Xs.flatten()) Phi_s = RadialBasis(centres)(Xs, 0.1) w = np.linalg.solve(Phi.T.dot(Phi), Phi.T.dot(Y)) Ys = Phi_s.dot(w) # L-BFGS approach to test objective w0 = np.random.randn(Phi.shape[1]) results = minimize(f, w0, args=(train_dat, ), jac=True, method='L-BFGS-B') w_grad = results['x'] Ys_grad = Phi_s.dot(w_grad) # SGD for learning w w0 = np.random.randn(Phi.shape[1]) results = sgd(f, w0, train_dat, passes=passes, batchsize=batchsize, eval_obj=True, gtol=min_grad_norm, rate=rate, eta=eta) w_sgd, gnorms, costs = results['x'], results['norms'], results['objs'] Ys_sgd = Phi_s.dot(w_sgd) # Visualise results fig = pl.figure() ax = fig.add_subplot(121) # truth pl.plot(X, Y, 'r.', Xs, Yt, 'k-') # exact weights pl.plot(Xs, Ys, 'c-') pl.plot(Xs, Ys_grad, 'b-') pl.plot(Xs, Ys_sgd, 'g-') pl.title('Function fitting') pl.xlabel('x') pl.ylabel('y') pl.legend(['Training', 'Truth', 'Analytic', 'LBFGS', 'SGD']) ax = fig.add_subplot(122) pl.xlabel('iteration') pl.title('SGD convergence') ax.plot(range(len(costs)), costs, 'b') ax.set_ylabel('cost', color='b') for t in ax.get_yticklabels(): t.set_color('b') ax2 = ax.twinx() ax2.plot(range(len(gnorms)), gnorms, 'r') ax2.set_ylabel('gradient norms', color='r') for t in ax2.get_yticklabels(): t.set_color('r') pl.show()
def sgd_demo(): # Settings batchsize = 100 var = 0.05 nPoints = 1000 nQueries = 500 passes = 200 min_grad_norm = 0.01 rate = 0.9 eta = 1e-5 # Create dataset X = np.linspace(0.0, 1.0, nPoints)[:, np.newaxis] Y = np.sin(2 * np.pi * X.flatten()) + np.random.randn(nPoints) * var centres = np.linspace(0.0, 1.0, 20)[:, np.newaxis] Phi = RadialBasis(centres)(X, 0.1) train_dat = np.hstack((Y[:, np.newaxis], Phi)) Xs = np.linspace(0.0, 1.0, nQueries)[:, np.newaxis] Yt = np.sin(2 * np.pi * Xs.flatten()) Phi_s = RadialBasis(centres)(Xs, 0.1) w = np.linalg.solve(Phi.T.dot(Phi), Phi.T.dot(Y)) Ys = Phi_s.dot(w) # L-BFGS approach to test objective w0 = np.random.randn(Phi.shape[1]) results = minimize(f, w0, args=(train_dat,), jac=True, method='L-BFGS-B') w_grad = results['x'] Ys_grad = Phi_s.dot(w_grad) # SGD for learning w w0 = np.random.randn(Phi.shape[1]) results = sgd(f, w0, train_dat, passes=passes, batchsize=batchsize, eval_obj=True, gtol=min_grad_norm, rate=rate, eta=eta) w_sgd, gnorms, costs = results['x'], results['norms'], results['objs'] Ys_sgd = Phi_s.dot(w_sgd) # Visualise results fig = pl.figure() ax = fig.add_subplot(121) # truth pl.plot(X, Y, 'r.', Xs, Yt, 'k-') # exact weights pl.plot(Xs, Ys, 'c-') pl.plot(Xs, Ys_grad, 'b-') pl.plot(Xs, Ys_sgd, 'g-') pl.title('Function fitting') pl.xlabel('x') pl.ylabel('y') pl.legend(['Training', 'Truth', 'Analytic', 'LBFGS', 'SGD']) ax = fig.add_subplot(122) pl.xlabel('iteration') pl.title('SGD convergence') ax.plot(range(len(costs)), costs, 'b') ax.set_ylabel('cost', color='b') for t in ax.get_yticklabels(): t.set_color('b') ax2 = ax.twinx() ax2.plot(range(len(gnorms)), gnorms, 'r') ax2.set_ylabel('gradient norms', color='r') for t in ax2.get_yticklabels(): t.set_color('r') pl.show()