def g(self, x): # compute g gx = np.zeros(self.n) tmp = x[1:] - np.sin(x[:-1]) gx[1:] += (2 * self.c1) * tmp gx[:-1] -= (2 * self.c1) * tmp * np.cos(x[:-1]) gx[:-1] += (2 * self.c2) * x[:-1] return gx * self.scale n_list = [1000, 2000, 5000, 10000] for n in n_list: x = -np.ones(n) x[0] = 4.712389 scale = 1e-5 eps = 1e-8 * scale fun = GENSIN(n, scale=scale) for method in ['fr', 'prp', 'prp+', 'hs', 'cd', 'dy', 'bb', 'sd']: if method == 'bb': res = bb.bb(fun, x, eps=eps) elif method == 'sd': res = bb.sd(fun, x, eps=eps) else: res = cg.cg(fun, x, method=method, eps=eps) print(res[0][:10]) print('& %.3e & %.1f & %d & %d ' % (res[1] / scale - 2.455, -log(res[2] / scale) / log(10), res[3], res[4]))
tmp = x[1:] - x[:-1] + 1 - x[:-1] ** 2 fx = np.dot(tmp, tmp) return fx * self.scale def g(self, x): # compute g gx = np.zeros(self.n) tmp = x[1:] - x[:-1] + 1 - x[:-1] ** 2 gx[:-1] -= 2 * tmp * (1 + 2 * x[:-1]) gx[1:] += 2 * tmp return gx * self.scale n_list = [1000, 2000, 5000, 10000] for n in n_list: x = np.zeros(n) scale = 1e-1 kwargs = {'eps': 1e-7 * scale, 'maxiter': 50000} fun = FLETCHCR(n, scale) for method in ['fr', 'prp', 'prp+', 'hs', 'cd', 'dy', 'bb', 'sd']: if method == 'bb': res = bb.bb(fun, x, **kwargs) elif method == 'sd': res = bb.sd(fun, x, **kwargs) else: res = cg.cg(fun, x, method=method, **kwargs) print(res[0][:10]) print('& %.3e & %.1f & %d & %d '%( res[1] / scale, -log(res[2] / scale) / log(10), res[3], res[4]))
def train(agent, Train_epoch, max_iter, file_name='./res.dat'): output_file = open(file_name, 'w') for epoch in range(Train_epoch): global env_train, env_test env = env_train[(epoch // 20) % num_train] pre_state = env.reset() acc_reward = 0 for step in range(max_iter): # print('pre:', pre_state) action = agent.action(pre_state) # print('action:', action) if action[0] != action[0]: raise ('nan error!') next_state, reward, done, _ = env.step(action) reward *= step**0.2 acc_reward += reward # print('next:', next_state) if step == max_iter - 1: done = True # agent.train(state_featurize.transfer(pre_state), action, reward, state_featurize.transfer(next_state), done) agent.train(pre_state, action, reward, next_state, done) if done and epoch % args.print_every == 0: #print('episode: ', epoch + 1, 'step: ', step + 1, ' reward is', acc_reward, file = output_file) #print('episode: ', epoch + 1, 'step: ', step + 1, ' reward is', acc_reward, ) print('episode: ', epoch + 1, 'step: ', step + 1, ' final value: ', env.get_value()) break pre_state = next_state if epoch % 100 == 0: test_count = epoch // 100 final_value = play(agent, 1, max_iter, test_count) print('--------------episode ', epoch, 'final_value: ', final_value, '---------------', file=output_file) print('--------------episode ', epoch, 'test_id', test_count % num_test, 'final value: ', test_record[test_count % num_test], '---------------') env = env_test[test_count % num_test] if args.obj == 'logistic': obj = Logistic(args.dim, env.func.X, env.func.Y) elif args.obj == 'neural': obj = NeuralNet(dim, env.func.X, env.func.Y, **kwargs) cg_x, cg_y, _, cg_iter, _, _, _ = cg(obj, x0=init_point, maxiter=max_iter, a_high=args.action_high) print('CG method: optimal value: {0}, iterations {1}'.format( cg_y, cg_iter)) sd_x, sd_y, _, sd_iter, _, _, _ = sd(obj, x0=init_point, maxiter=max_iter, a_high=args.action_high) print('SD method: optimal value: {0}, iterations {1}'.format( sd_y, sd_iter)) bfgs_x, bfgs_y, _, bfgs_iter, _, _, _ = quasiNewton( obj, x0=init_point, maxiter=max_iter, a_high=args.action_high) print('BFGS method: optimal value: {0}, iterations {1}'.format( bfgs_y, bfgs_iter)) # if np.mean(np.array(final_value)) < min(cg_y, sd_y, bfgs_y): # print('----- using ', epoch, ' epochs') # #agent.save_model() # break time.sleep(1) if epoch % 500 == 0 and epoch > 0: path = save_path + str(epoch) agent.save(path) return agent
trajectory_number=100, update_epoach=50) if args.obj == 'quadratic': obj = Quadratic(dim) elif args.obj == 'logistic': obj = Logistic(dim, X, Y) elif args.obj == 'ackley': obj = Ackley(dim) elif args.obj == 'neural': obj = NeuralNet(dim, X, Y, **kwargs) cg_x, cg_y, _, cg_iter, _ = cg(obj, x0=init_point, maxiter=max_iter) print('CG method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(cg_x, cg_y, cg_iter)) sd_x, sd_y, _, sd_iter, _ = sd(obj, x0=init_point, maxiter=max_iter) print('SD method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(sd_x, sd_y, sd_iter)) bfgs_x, bfgs_y, _, bfgs_iter, _ = quasiNewton(obj, x0=init_point, maxiter=max_iter) print('bfgs method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(bfgs_x, bfgs_y, bfgs_iter)) if args.agent == 'naf': agent = train(naf, max_epoch, max_iter) elif args.agent == 'ddpg': agent = train(ddpg, max_epoch, max_iter) elif args.agent == 'cac': agent = train(cac, max_epoch, max_iter) elif args.agent == 'ppo':