Пример #1
0
def main():
    # 创建飞行器环境
    env = make_env("Quadrotor", task="no_collision", seed=1)
    env.reset()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0] + 1
    max_action = float(env.action_space.high[0])

    model = QuadrotorModel(act_dim, max_action)
    algorithm = parl.algorithms.TD3(model,
                                    max_action=max_action,
                                    gamma=GAMMA,
                                    tau=TAU,
                                    actor_lr=ACTOR_LR,
                                    critic_lr=CRITIC_LR)
    agent = QuadrotorAgent(algorithm, obs_dim, act_dim)
    rpm = ReplayMemory(int(MEMORY_SIZE), obs_dim, act_dim)

    agent.restore_critic('model_dir/critic.ckpt')
    agent.restore_actor('model_dir/actor.ckpt')
    for epics in range(1, 5):
        evaluate_reward = run_evaluate_episode(env,
                                               agent,
                                               max_action,
                                               is_render=True)
        print("evaluate_reward: ", evaluate_reward)
Пример #2
0
 def test_load_valid_conf(self):
     env = make_env('Quadrotor',
                    task='no_collision',
                    obs_as_dict=True,
                    simulator_conf=self.valid_config)
     state = env.reset()
     self.assertTrue('x' in state)
Пример #3
0
 def test_load_invalid_conf(self):
     try:
         env = make_env('Quadrotor',
                        task='no_collision',
                        simulator_conf=self.invalid_config)
         env.reset()
     except RuntimeError as e:
         self.assertTrue('quality' in str(e))
Пример #4
0
 def test_hovering_control_task(self):
     env = make_env('Quadrotor', task='hovering_control')
     state = env.reset()
     act = env.action_space.sample()
     reset = False
     while not reset:
         state, reward, reset, info = env.step(act)
         act = env.action_space.sample()
Пример #5
0
 def test_no_collision_task(self):
     env = make_env('Quadrotor', task='no_collision')
     state = env.reset()
     act = env.action_space.sample()
     reset = False
     while not reset:
         state, reward, reset, info = env.step(act)
         act = env.action_space.sample()
Пример #6
0
    def test_velocity_control_task(self):
        env = make_env('Quadrotor', task='velocity_control')
        state = env.reset()
        reset = False
        step = 0
        while not reset:
            state, reward, reset, info = env.step([1., 1., 1., 1.])
            self.assertTrue('next_target_g_v_x' in info)
            step += 1

        self.assertEqual(step, env.nt)
Пример #7
0
 def test_no_collision_task(self):
     env = make_env('Quadrotor',
                    task='no_collision',
                    simulator_conf=self.valid_config)
     state = env.reset()
     init_x, init_y = state['x'], state['y']
     reset = False
     while not reset:
         state, reward, reset = env.step([1., 1., 1., 1.])
         msg = 'quadrotor gets wrong movement in %s direction.'
         self.assertEqual(state['x'], init_x, msg % 'x')
         self.assertEqual(state['y'], init_y, msg % 'y')
Пример #8
0
    def test_velocity_control_task(self):
        env = make_env('Quadrotor',
                       task='velocity_control',
                       obs_as_dict=True,
                       simulator_conf=self.valid_config)
        state = env.reset()
        reset = False
        step = 0
        while not reset:
            state, reward, reset = env.step([1., 1., 1., 1.])
            self.assertTrue('next_target_g_v_x' in state)
            step += 1

        self.assertEqual(step, env.nt)
Пример #9
0
def main():
    # 创建飞行器环境
    env = make_env("Quadrotor", task="velocity_control", seed=1)
    env.reset()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0] + 1
    model = QuadrotorModel(act_dim)
    algorithm = DDPG(model,
                     gamma=GAMMA,
                     tau=TAU,
                     actor_lr=ACTOR_LR,
                     critic_lr=CRITIC_LR)
    agent = QuadrotorAgent(algorithm, obs_dim, act_dim)
    ckpt = 'steps_490883_reward_-20.52.ckpt'
    agent.restore(ckpt)
    evaluate_reward = evaluate(env, agent, True)
    logger.info('Evaluate reward: {}'.format(evaluate_reward))  # 打印评估的reward
Пример #10
0
def main():
    # 创建飞行器环境
    env = make_env("Quadrotor", task="no_collision", seed=1)
    env.reset()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0] + 1
    model = QuadrotorModel(act_dim)
    algorithm = DDPG(model,
                     gamma=GAMMA,
                     tau=TAU,
                     actor_lr=ACTOR_LR,
                     critic_lr=CRITIC_LR)
    agent = QuadrotorAgent(algorithm, obs_dim, act_dim)
    ckpt = 'steps_970464_reward_467.17.ckpt'
    agent.restore(ckpt)
    evaluate_reward = evaluate(env, agent, render=True)
    logger.info('Evaluate reward: {}'.format(evaluate_reward))
Пример #11
0
def main():
    # 创建飞行器环境
    env = make_env("Quadrotor_hovering_control", task="hovering_control")
    env.reset()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    print(obs_dim, act_dim)

    model = QuadrotorModel(act_dim + 1)
    algorithm = DDPG(model,
                     gamma=GAMMA,
                     tau=TAU,
                     actor_lr=ACTOR_LR,
                     critic_lr=CRITIC_LR)
    agent = QuadrotorAgent(algorithm, obs_dim, act_dim + 1)
    ckpt = 'steps_700176.ckpt'  # 请设置ckpt为你训练中效果最好的一次评估保存的模型文件名称
    agent.restore(ckpt)
    evaluate_reward = evaluate(env, agent)
    logger.info('Evaluate reward: {}'.format(evaluate_reward))  # 打印评估的reward
Пример #12
0
def main():
    # 创建飞行器环境
    env = make_env("Quadrotor", task="hovering_control")
    env.reset()
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    act_dim = act_dim + 1

    model = QuadrotorModel(act_dim)
    algorithm = DDPG(model,
                     gamma=GAMMA,
                     tau=TAU,
                     actor_lr=ACTOR_LR,
                     critic_lr=CRITIC_LR)
    agent = QuadrotorAgent(algorithm, obs_dim, act_dim)

    # parl库也为DDPG算法内置了ReplayMemory,可直接从 parl.utils 引入使用
    rpm = ReplayMemory(int(MEMORY_SIZE), obs_dim, act_dim)

    # 启动训练
    test_flag = 0
    total_steps = 0
    while total_steps < TRAIN_TOTAL_STEPS:
        train_reward, steps = run_episode(env, agent, rpm)
        total_steps += steps
        logger.info('Steps: {} Reward: {}'.format(total_steps,
                                                  train_reward))  # 打印训练reward

        if total_steps // TEST_EVERY_STEPS >= test_flag:  # 每隔一定step数,评估一次模型
            while total_steps // TEST_EVERY_STEPS >= test_flag:
                test_flag += 1

            evaluate_reward = evaluate(env, agent)
            logger.info('Steps {}, Test reward: {}'.format(
                total_steps, evaluate_reward))  # 打印评估的reward

            # 每评估一次,就保存一次模型,以训练的step数命名
            ckpt = 'model_dir/steps_{}.ckpt'.format(total_steps)
            agent.save(ckpt)
Пример #13
0
# @File : velocity.py
# @Software: PyCharm

import numpy as np
from parl.utils import action_mapping
from rlschool import make_env


def evaluate_episode(env, render=False):
    env_reward = []
    for j in range(5):
        env.reset()
        d_r = 0
        while True:
            actuall = np.array([-1, -1, -1, -1], dtype='float32')
            actuall = action_mapping(actuall, env.action_space.low[0],
                                     env.action_space.high[0])
            next_obs, reward, done, info = env.step(actuall)
            d_r += reward
            if render:
                env.render()
            if done:
                break
        env_reward.append(d_r)
    env_reward.append(np.mean(env_reward))
    return env_reward


env = make_env("Quadrotor", task="velocity_control")
Test_Reward = evaluate_episode(env, render=True)
print(Test_Reward)
Пример #14
0
                                env.action_space.high[0])

        next_obs, reward, done, _ = env.step(action)
        agent.step(obs, action, REWARD_SCALE * reward, next_obs, done)

        obs = next_obs
        total_reward += reward
        steps += 1

        if done:
            break

    return total_reward, steps


env = make_env("Quadrotor", task="hovering_control")
env.reset()
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]
set_trace()
model = Model(state_size=obs_dim, action_size=act_dim)
target_model = Model(state_size=obs_dim, action_size=act_dim)

alg = DDPG(model,
           target_model,
           gamma=GAMMA,
           tau=TAU,
           actor_lr=ACTOR_LR,
           critic_lr=CRITIC_LR)
agent = Agent(alg, BUFFER_SIZE, BATCH_SIZE, seed=10)
Пример #15
0
            env_action = np.clip(env_action, -1.0, 1.0)
            env_action = action_mapping(env_action, env.action_space.low[0],
                                        env.action_space.high[0])
            next_obs, reward, done, info = env.step(env_action)
            obs = next_obs
            total_reward += reward
            steps += 1

            if done:
                break
        eval_reward.append(total_reward)
    return np.mean(eval_reward)


# 创建飞行器环境
env = make_env("Quadrotor", task="no_collision", seed=1)
env.reset()
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0] + 1
model = QuadrotorModel(act_dim)
algorithm = DDPG(model,
                 gamma=GAMMA,
                 tau=TAU,
                 actor_lr=ACTOR_LR,
                 critic_lr=CRITIC_LR)
agent = QuadrotorAgent(algorithm, obs_dim, act_dim)

rpm = ReplayMemory(int(MEMORY_SIZE), obs_dim, act_dim)

# 启动训练
test_flag = 0