Python Agent.step примеры использования

Язык программирования: Python

Пространство имен/Пакет: agents.agent

Класс/Тип: Agent

Метод/Функция: step

Примеров на hotexamples.com: 3

Python Agent.step - 3 примера найдено. Это лучшие примеры Python кода для agents.agent.Agent.step, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Agent(30)

__init__(16)

test(6)

act(3)

prediction_history(3)

reset(3)

run_reps(3)

train(3)

step(3)

get_action(2)

aggregate_history(2)

collect_reward(2)

rank_bet(2)

set_sim_t(2)

state_dict(1)

test_goal_prediction(1)

put_data(1)

restart_coloring(1)

reset_episode(1)

reward_for_action(1)

observe(1)

set_session(1)

set_sim_dt(1)

place_bet(1)

load_model(1)

noisy_eval(1)

apply_control_open_loop(1)

_init_fmm_map(1)

_init_obj_fn(1)

_init_planner(1)

_init_psc_objective(1)

_init_system_dynamics(1)

actions(1)

advantage(1)

calc_weights(1)

memorize(1)

cuda(1)

environment(1)

eval(1)

get_move(1)

infer(1)

init_actor_critic(1)

load_memory(1)

value(1)

Пример #1

Показать файл

Файл: interact.py Проект: zh0123210/continuous-rl

def interact(env: Env, agent: Agent,
             start_obs: Arrayable) -> Tuple[array, array, array]:
    """One step interaction between env and agent.

    :args env: environment
    :args agent: agent
    :args start_obs: initial observation

    :return: (next observation, reward, terminal?)
    """
    action = agent.step(start_obs)
    next_obs, reward, done, information = env.step(action)
    time_limit = information[
        'time_limit'] if 'time_limit' in information else None
    agent.observe(next_obs, reward, done, time_limit)
    return next_obs, reward, done

Пример #2

Показать файл

Файл: main.py Проект: thenhz/RL-Quadcopter-2

target_pos = np.array([0., 0., 150.])
task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.)
agent = Agent(task)

# save rewards for plotting
rewards = []
rotor_speeds_var = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    step = 0
    while True:
        step += 1
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        if done:
            rewards.append(agent.score)
            rotor_speeds_var.append(np.var(action))
            print(
                "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}"
                .format(i_episode, agent.score, agent.top_score,
                        round(task.sim.pose[:3][0], 2),
                        round(task.sim.pose[:3][1], 2),
                        round(task.sim.pose[:3][2], 2),
                        round(task.sim.pose[3:6][0], 2),
                        round(task.sim.pose[3:6][1], 2),
                        round(task.sim.pose[3:6][2], 2),
                        round(abs(task.sim.v).sum()), 2),
                end="")  # [debug]

Пример #3

Показать файл

Файл: main.py Проект: lucasmfaria/Reinforcement-Learning

#Results with the conditions of the quadcopter
labels = [
    'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity',
    'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity',
    'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4'
]
results = {x: [] for x in labels}

num_episodes = 1000

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = takeoff.step(action)
        agent.step(reward, done)
        state = next_state
        to_write = [takeoff.sim.time] + list(takeoff.sim.pose) + list(
            takeoff.sim.v) + list(takeoff.sim.angular_v) + list(action)
        for ii in range(len(labels)):
            results[labels[ii]].append(to_write[ii])
        if done:
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.score, agent.best_score,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()
''' Shows the results of the control
plt.plot(results['time'], results['x'], label='x')