Exemplo n.º 1
0
        q_target = b_r + GAMMA * (1. - b_d) * q_next
        q_target = q_target.detach()

        # loss
        loss = self.loss_function(q_eval, q_target)
        logger.store(loss=loss)
        # backprop loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss

dqn = DQN()
logdir = './DQN/%s' % args.games + '/%i' % int(time.time())

logger_kwargs = setup_logger_kwargs(args.games, args.seed, data_dir=logdir)
logger = EpochLogger(**logger_kwargs)
kwargs = {

        'seed': args.seed,
        'learning rate':args.lr,
    }
logger.save_config(kwargs)
# model load with check
if LOAD and os.path.isfile(PRED_PATH) and os.path.isfile(TARGET_PATH):
    dqn.load_model()
    pkl_file = open(RESULT_PATH,'rb')
    result = pickle.load(pkl_file)
    pkl_file.close()
    print('Load complete!')
else:
Exemplo n.º 2
0
            # logger.log_tabular('VVals', with_min_and_max=True)
            # logger.log_tabular('LogPi', with_min_and_max=True)
            # logger.log_tabular('LossPi', average_only=True)
            # logger.log_tabular('LossQ1', average_only=True)
            # logger.log_tabular('LossQ2', average_only=True)
            # logger.log_tabular('LossV', average_only=True)
            # logger.log_tabular('Time', time.time()-start_time)
            # logger.dump_tabular()


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='HalfCheetah-v2')
    parser.add_argument('--hid', type=int, default=300)
    parser.add_argument('--l', type=int, default=2)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--exp_name', type=str, default='sac')
    args = parser.parse_args()

    from logx import setup_logger_kwargs
    logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

    sac(lambda: gym.make(args.env),
        ac_kwargs=dict(hidden_sizes=[args.hid] * args.l),
        gamma=args.gamma,
        seed=args.seed,
        epochs=args.epochs,
        logger_kwargs=logger_kwargs)
Exemplo n.º 3
0
env = make_env(util.ENV_CONFIG_DIR + env_config)

obs = []
actions = []
action_sign = np.array([-1, -1])
for i in range(iterations):
    current_bound = initial_bound
    o = env.reset()
    real_action = env.action_space.default() * 0.5
    for t in range(max_ep_len):
        o, r, d, _ = env.step(real_action)
        obs.append(o)
        actions.append(real_action)

        vp = o
        vi = np.mean(obs[-5:])
        vd = np.mean(np.diff(obs, axis=0)[-5:])
        vd = 0 if np.isnan(vd) else vd
        delta = np.exp((wp * vp + wi * vi + wd * vd) * action_sign)
        delta = np.clip(delta, 1. / current_bound, current_bound)
        #print(real_action, o, delta)
        real_action = env.action_space.clip(real_action * delta)
        current_bound = np.maximum(final_bound, current_bound * bound_decay)

print(np.mean(np.abs(obs[-20:])) * 100)
logger_kwargs = setup_logger_kwargs(exp_name,
                                    seed,
                                    data_dir=util.LOG_DIR +
                                    os.path.splitext(env_config)[0])
logger = EpochLogger(**logger_kwargs)
#util.plot_seq_obs_and_actions(np.abs(obs), actions, env.action_space.high, logger.output_dir + '/actions.png')
Exemplo n.º 4
0
        loss.backward()
        self.optimizer.step()

        loss = self.loss_function(q_eval2, q_target1)
        self.optimizer1.zero_grad()
        loss.backward()
        self.optimizer1.step()
        return loss


dqn = Smoothing_DQN()
logdir = './DOUBLE_average_choose_DQN/%s' % args.games + '/%i' % int(
    time.time())

logger_kwargs = setup_logger_kwargs(args.games + "DOUBLE_average_choose",
                                    args.seed,
                                    data_dir=logdir)
logger = EpochLogger(**logger_kwargs)
kwargs = {
    'seed': args.seed,
    'delay_interval': dealy_interval,
}
logger.save_config(kwargs)
# model load with check
if LOAD and os.path.isfile(PRED_PATH) and os.path.isfile(TARGET_PATH):
    dqn.load_model()
    pkl_file = open(RESULT_PATH, 'rb')
    result = pickle.load(pkl_file)
    pkl_file.close()
    print('Load complete!')
else: