Python Agent.choose_action示例

编程语言: Python

命名空间/包名称: DDPG

类/类型: Agent

方法/功能: choose_action

hotexamples.com的示例: 3

Python Agent.choose_action - 已找到3个示例。这些是从开源项目中提取的最受好评的DDPG.Agent.choose_action现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Agent(10)

choose_action(3)

save(2)

learn(2)

load(2)

act(2)

save_models(2)

train(1)

summary(1)

step(1)

remember(1)

restore(1)

reset(1)

optimize(1)

network_copy(1)

load_models(1)

update_target_net(1)

示例#1

显示文件

文件： profile.py 项目： KanishkNavale/DDPG-Algorithm-for-Continuous-Action-RL

def collect_trajectories(env: gym.Env,
                         agent: Agent,
                         n_games: int = 10) -> np.ndarray:

    for _ in range(n_games):
        state = env.reset()
        done: bool = False
        state_history: list[np.ndarray] = []

        while not done:
            state_history.append(state)
            action = agent.choose_action(state)
            next_state, _, done, _ = env.step(action)
            state = next_state

    return np.vstack(state_history)

示例#2

显示文件

文件： test.py 项目： KanishkNavale/DDPG-Algorithm-for-Continuous-Action-RL

    # Init. Agent
    agent = Agent(env=env, n_games=n_games, training=False)
    agent.load_models(data_path)

    for i in tqdm(range(n_games), desc=f'Testing', total=n_games):
        score_history: List[np.float32] = [] * n_games

        for _ in tqdm(range(n_games), desc=f'Testing', total=n_games):
            score = 0
            done = False

            # Initial Reset of Environment
            state = env.reset()

            while not done:
                action = agent.choose_action(state)
                next_state, reward, done, _ = env.step(action)

                agent.memory.add(state, action, reward, next_state, done)

                state = copy.deepcopy(next_state)
                score += reward

            score_history.append(score)

        print(f'Test Analysis:\n'
              f'Mean:{np.mean(score_history)}\n'
              f'Variance:{np.std(score_history)}')

        test_data.append({'Test Score': score_history})

示例#3

显示文件

              env=env,
              batch_size=64,
              layer1_size=256,
              layer2_size=128,
              n_actions=3)

#agent.load_models()
# np.random.seed(1)

score_history = []
for i in range(50):
    obs = env.reset()
    done = False
    score = 0
    while not done:
        act = agent.choose_action(obs)
        print(act)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state
        #env.render()
    score_history.append(score)

    if i % 10 == 0:
        agent.save_models()
        env.render()

    print('episode ', i, 'score %.2f' % score,
          'trailing 25 games avg %.3f' % np.mean(score_history[-25:]))