示例#1
0
    def g(self, x):  # compute g
        gx = np.zeros(self.n)
        tmp = x[1:] - np.sin(x[:-1])
        gx[1:] += (2 * self.c1) * tmp
        gx[:-1] -= (2 * self.c1) * tmp * np.cos(x[:-1])
        gx[:-1] += (2 * self.c2) * x[:-1]
        return gx * self.scale


n_list = [1000, 2000, 5000, 10000]
for n in n_list:
    x = -np.ones(n)
    x[0] = 4.712389
    scale = 1e-5
    eps = 1e-8 * scale
    fun = GENSIN(n, scale=scale)

    for method in ['fr', 'prp', 'prp+', 'hs', 'cd', 'dy', 'bb', 'sd']:
        if method == 'bb':
            res = bb.bb(fun, x, eps=eps)
        elif method == 'sd':
            res = bb.sd(fun, x, eps=eps)
        else:
            res = cg.cg(fun, x, method=method, eps=eps)

        print(res[0][:10])
        print('& %.3e & %.1f & %d & %d ' %
              (res[1] / scale - 2.455, -log(res[2] / scale) / log(10), res[3],
               res[4]))
        tmp = x[1:] - x[:-1] + 1 - x[:-1] ** 2
        fx = np.dot(tmp, tmp)
        return fx * self.scale

    def g(self, x): # compute g
        gx = np.zeros(self.n)
        tmp = x[1:] - x[:-1] + 1 - x[:-1] ** 2
        gx[:-1] -= 2 * tmp * (1 + 2 * x[:-1])
        gx[1:] += 2 * tmp
        return gx * self.scale


n_list = [1000, 2000, 5000, 10000]
for n in n_list:
    x = np.zeros(n)
    scale = 1e-1
    kwargs = {'eps': 1e-7 * scale, 'maxiter': 50000}
    fun = FLETCHCR(n, scale)

    for method in ['fr', 'prp', 'prp+', 'hs', 'cd', 'dy', 'bb', 'sd']:
        if method == 'bb':
            res = bb.bb(fun, x, **kwargs)
        elif method == 'sd':
            res = bb.sd(fun, x, **kwargs)
        else:
            res = cg.cg(fun, x, method=method, **kwargs)

        print(res[0][:10])
        print('& %.3e & %.1f & %d & %d '%(
            res[1] / scale, -log(res[2] / scale) / log(10), res[3], res[4]))
示例#3
0
def train(agent, Train_epoch, max_iter, file_name='./res.dat'):
    output_file = open(file_name, 'w')
    for epoch in range(Train_epoch):

        global env_train, env_test
        env = env_train[(epoch // 20) % num_train]

        pre_state = env.reset()
        acc_reward = 0

        for step in range(max_iter):

            # print('pre:', pre_state)
            action = agent.action(pre_state)
            # print('action:', action)

            if action[0] != action[0]:
                raise ('nan error!')

            next_state, reward, done, _ = env.step(action)
            reward *= step**0.2
            acc_reward += reward
            # print('next:', next_state)

            if step == max_iter - 1:
                done = True

            # agent.train(state_featurize.transfer(pre_state), action, reward, state_featurize.transfer(next_state), done)
            agent.train(pre_state, action, reward, next_state, done)

            if done and epoch % args.print_every == 0:
                #print('episode: ', epoch + 1, 'step: ', step + 1, ' reward is', acc_reward,  file = output_file)
                #print('episode: ', epoch + 1, 'step: ', step + 1, ' reward is', acc_reward, )
                print('episode: ', epoch + 1, 'step: ', step + 1,
                      ' final value: ', env.get_value())
                break

            pre_state = next_state

        if epoch % 100 == 0:
            test_count = epoch // 100
            final_value = play(agent, 1, max_iter, test_count)
            print('--------------episode ',
                  epoch,
                  'final_value: ',
                  final_value,
                  '---------------',
                  file=output_file)
            print('--------------episode ', epoch, 'test_id',
                  test_count % num_test, 'final value: ',
                  test_record[test_count % num_test], '---------------')

            env = env_test[test_count % num_test]

            if args.obj == 'logistic':
                obj = Logistic(args.dim, env.func.X, env.func.Y)
            elif args.obj == 'neural':
                obj = NeuralNet(dim, env.func.X, env.func.Y, **kwargs)

            cg_x, cg_y, _, cg_iter, _, _, _ = cg(obj,
                                                 x0=init_point,
                                                 maxiter=max_iter,
                                                 a_high=args.action_high)
            print('CG method: optimal value: {0}, iterations {1}'.format(
                cg_y, cg_iter))
            sd_x, sd_y, _, sd_iter, _, _, _ = sd(obj,
                                                 x0=init_point,
                                                 maxiter=max_iter,
                                                 a_high=args.action_high)
            print('SD method: optimal value: {0}, iterations {1}'.format(
                sd_y, sd_iter))
            bfgs_x, bfgs_y, _, bfgs_iter, _, _, _ = quasiNewton(
                obj, x0=init_point, maxiter=max_iter, a_high=args.action_high)
            print('BFGS method: optimal value: {0}, iterations {1}'.format(
                bfgs_y, bfgs_iter))

            # if np.mean(np.array(final_value)) < min(cg_y, sd_y, bfgs_y):
            #     print('----- using ', epoch, '  epochs')
            #     #agent.save_model()
            #     break
            time.sleep(1)

            if epoch % 500 == 0 and epoch > 0:
                path = save_path + str(epoch)
                agent.save(path)

    return agent
示例#4
0
           trajectory_number=100,
           update_epoach=50)

if args.obj == 'quadratic':
    obj = Quadratic(dim)
elif args.obj == 'logistic':
    obj = Logistic(dim, X, Y)
elif args.obj == 'ackley':
    obj = Ackley(dim)
elif args.obj == 'neural':
    obj = NeuralNet(dim, X, Y, **kwargs)

cg_x, cg_y, _, cg_iter, _ = cg(obj, x0=init_point, maxiter=max_iter)
print('CG method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(cg_x, cg_y, cg_iter))
sd_x, sd_y, _, sd_iter, _ = sd(obj, x0=init_point, maxiter=max_iter)
print('SD method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(sd_x, sd_y, sd_iter))
bfgs_x, bfgs_y, _, bfgs_iter, _ = quasiNewton(obj,
                                              x0=init_point,
                                              maxiter=max_iter)
print('bfgs method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(bfgs_x, bfgs_y, bfgs_iter))

if args.agent == 'naf':
    agent = train(naf, max_epoch, max_iter)
elif args.agent == 'ddpg':
    agent = train(ddpg, max_epoch, max_iter)
elif args.agent == 'cac':
    agent = train(cac, max_epoch, max_iter)
elif args.agent == 'ppo':