Exemplo n.º 1
0
def collect_porcess(agent_index, queue_mem, acrot_param):
    env = NormalizedEnv(gym.make('Pendulum-v0'))
    agent = Action(state_dim=env.observation_space.shape[0],
                   action_dim=env.action_space.shape[0])
    try:
        while True:
            done = False
            state = env.reset()
            state = (state - env.observation_space.low) / (
                env.observation_space.high - env.observation_space.low)
            agent.load_param(acrot_param)
            print("agent {} load param".format(agent_index))

            while not done:
                action = agent.chose_action(state, explort=True)
                next_state, reward, done, _ = env.step(action)
                # env.render()
                next_state = (next_state - env.observation_space.low) / (
                    env.observation_space.high - env.observation_space.low)
                is_done = 0 if done else 1
                queue_mem.put((state, action, next_state, reward, is_done))
                state = next_state
    except Exception as e:
        print(e)
        print("agent {} exit".format(agent_index))
        env.close()
Exemplo n.º 2
0
def main():
    mp.set_start_method('spawn')
    config = Config()
    # 1. 初始化环境
    env = NormalizedEnv(gym.make('Pendulum-v0'))

    # 2. 初始化agent
    agent = DDPGAgent(env=env,
                      seed=config.seed,
                      batch_size=config.batch_size,
                      learning_rate_actor=config.learning_rate_actor,
                      learning_rate_critic=config.learning_rate_critic,
                      weight_decay=config.weight_decay)
    agent.target_actor.share_memory()
    # 3. 初始化memory
    memory = ReplayMemory(config.capacity)

    q = mp.Queue(10)

    process_collect_list = []
    for i in range(config.agent_num):
        process_name = "collect_process_" + str(i)
        process = mp.Process(name=process_name,
                             target=collect_porcess,
                             args=(i, q, agent.target_actor))
        process.start()
        process_collect_list.append(process)

    steps = mp.Value('d', 0)
    test_p = mp.Process(name="test_process",
                        target=test_process,
                        args=(config, steps, agent.target_actor))
    test_p.start()
    process_collect_list.append(test_p)

    try:
        while True:
            len = q.qsize()
            while len:
                mem = q.get()
                memory.push(mem[0], mem[1], mem[2], mem[3], mem[4])
                len -= 1
            # 4.4 学习
            if memory.len > config.batch_size:
                agent.learning(memory)
            # save model
            if steps.value > 1 and steps.value % config.save_steps == 0:
                agent.save_models(steps.value / config.save_steps)
            steps.value += 1
    except Exception as e:
        print(e)
    except:
        for process in process_collect_list:
            process.join()
            print(process.name + " stop ")
    env.close()
Exemplo n.º 3
0
def test_process(config, steps, target_actor):
    env = NormalizedEnv(gym.make('Pendulum-v0'))
    agent = Action(state_dim=env.observation_space.shape[0],
                   action_dim=env.action_space.shape[0])
    reward_list = []
    try:
        while True:
            # for test
            if (steps.value) != 0 and (steps.value % config.test_every_eposide
                                       == 0):
                agent.load_param(target_actor)
                print("test agent load param ")
                et_reward = 0
                for index in range(config.num_eposide_test):
                    eposide = 0
                    state = env.reset()
                    state = (state - env.observation_space.low) / (
                        env.observation_space.high - env.observation_space.low)

                    while True:
                        action = agent.chose_action(state, explort=False)
                        next_state, reward, done, _ = env.step(action)
                        env.render()
                        next_state = (next_state - env.observation_space.low
                                      ) / (env.observation_space.high -
                                           env.observation_space.low)
                        eposide += reward
                        state = next_state
                        if done:
                            break
                    et_reward += eposide
                print("\033[93m [ test ] eposide average reward : {}\033[00m".
                      format(et_reward / config.num_eposide_test))
                reward_list.append(et_reward / config.num_eposide_test)

                x = np.arange(len(reward_list))
                y = np.array(reward_list)
                plt.plot(x, y)
                plt.savefig("./eposide_reward.png")

    except Exception as e:
        print(e)
        print("test process exit")
        env.close()
Exemplo n.º 4
0
Arquivo: test.py Projeto: YuanyeMa/RL
def main():
    env = NormalizedEnv(gym.make('Pendulum-v0'))

    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    agent = Actor(state_dim, action_dim).to('cuda')

    agent.load_state_dict(torch.load('./Models/78.0_actor.pt'))

    eposide = 0
    done = False
    eposide_list = []
    while eposide < 100:
        eposide_reward = 0
        state = env.reset()
        state = (state - env.observation_space.low) / (
            env.observation_space.high - env.observation_space.low)
        state = to_tensor(state)
        while not done:
            action = agent.forward(state).detach().cpu().data.numpy()
            state_, reward, done, _ = env.step(action)
            state_ = (state_ - env.observation_space.low) / (
                env.observation_space.high - env.observation_space.low)
            env.render()
            state = to_tensor(state_)
            eposide_reward += reward

        eposide_list.append(eposide_reward)
        eposide += 1
        done = False
        print("{} : {}".format(eposide, eposide_reward))

    import matplotlib.pyplot as plt
    x = np.arange(100)
    y = np.array(eposide_list)
    plt.plot(x, y)
    plt.savefig("./test_eposide_reward.png")

    env.close()