Python DDPG.get_action примеры использования

Язык программирования: Python

Пространство имен/Пакет: agent

Класс/Тип: DDPG

Метод/Функция: get_action

Примеров на hotexamples.com: 4

Python DDPG.get_action - 4 примера найдено. Это лучшие примеры Python кода для agent.DDPG.get_action, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DDPG(26)

construct_model(4)

sample_action(4)

get_action(4)

store_experience(4)

learn(3)

update_model(3)

update(3)

add_experience_arp(3)

save_model(3)

train_arp(3)

select_action(3)

decay(3)

store_transition(3)

step(2)

test(2)

train(2)

train_ac(2)

train_lm(2)

save(2)

load_models(2)

reset_episode(2)

refine_action(2)

act(2)

load_model(2)

evaluate_actor(2)

construct_state(2)

build_nets(2)

add_experience_lm(2)

add_experience_ac(2)

load_memory(1)

hardupdate(1)

store_transitions(1)

close_all(1)

choose_action(1)

action_choose(1)

update_target_net(1)

Пример #1

Показать файл

Файл: test.py Проект: takeru1205/DDPG_PyTorch

import gym
from agent import DDPG

env = gym.make('Pendulum-v0')

agent = DDPG(env)
agent.load_model()

state = env.reset()

cumulative_reward = 0
for i in range(200):
    action = agent.get_action(state)
    env.render()
    state, reward, _, _ = env.step(action * 2)
    cumulative_reward += reward
print('Cumulative Reward: {}'.format(cumulative_reward))

Пример #2

Показать файл

Файл: test.py Проект: takeru1205/DDPG_PyTorch

from collections import deque
import gym
import numpy as np
from agent import DDPG
from utils import get_screen

env = gym.make('Pendulum-v0')

agent = DDPG(env, memory=False)
agent.load_model()

env.reset()
pixel = env.render(mode='rgb_array')
state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
cumulative_reward = 0
for timestep in range(200):
    action = agent.get_action(np.array(state)[np.newaxis])
    _, reward, _, _ = env.step(action * 2)
    pixel = env.render(mode='rgb_array')
    state_ = state.copy()
    state_.append(get_screen(pixel))
    state = state_
    cumulative_reward += reward
print('Cumulative Reward: {}'.format(cumulative_reward))

Пример #3

Показать файл

np.random.seed(42)
env.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    state = env.reset()
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(state, noise, timestep)
        state_, reward, done, _ = env.step(action * env.action_space.high[0])
        # env.render()
        agent.store_transition(state, action, state_, reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)

agent.save_model()

Пример #4

Показать файл

Файл: train.py Проект: takeru1205/DDPG_PyTorch

torch.manual_seed(42)
torch.cuda.manual_seed(42)

writer = SummaryWriter(log_dir='logs/')
agent = DDPG(env, writer)

all_timesteps = 0

for e in range(epoch):
    noise = OUActionNoise(env.action_space.shape[0])
    env.reset()
    pixel = env.render(mode='rgb_array')
    state = deque([get_screen(pixel) for _ in range(3)], maxlen=3)
    cumulative_reward = 0
    for timestep in range(200):
        action = agent.get_action(np.array(state)[np.newaxis], noise, timestep)
        _, reward, done, _ = env.step(action * env.action_space.high[0])
        pixel = env.render(mode='rgb_array')
        state_ = state.copy()
        state_.append(get_screen(pixel))
        agent.store_transition(np.array(state), action, np.array(state_),
                               reward, done)

        state = state_
        cumulative_reward += reward

        agent.update(all_timesteps, batch_size=16)
        all_timesteps += 1
    print('Epoch : {} / {}, Cumulative Reward : {}'.format(
        e, epoch, cumulative_reward))
    writer.add_scalar("reward", cumulative_reward, e)