Python RandomAgent.act примеры использования

Язык программирования: Python

Пространство имен/Пакет: agents

Класс/Тип: RandomAgent

Метод/Функция: act

Примеров на hotexamples.com: 4

Python RandomAgent.act - 4 примера найдено. Это лучшие примеры Python кода для agents.RandomAgent.act, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RandomAgent(30)

act(4)

choose_action(2)

solve(2)

evaluate(2)

get_action(2)

__init__(2)

is_stochastic(2)

policy(1)

step(1)

save_performance(1)

print_final_stats(1)

get_best_score(1)

learn(1)

get_move(1)

do_episode(1)

describe_state_variables(1)

action(1)

update_estimation(1)

Пример #1

Показать файл

Файл: test_agents.py Проект: LaRiffle/private-RL

class TestRandomAgent(unittest.TestCase):
    def setUp(self):
        number_of_actions = 2
        action_space = spaces.Discrete(2)
        self.agent = RandomAgent(action_space)

    def testAction(self):
        action = self.agent.act(state=None, reward=None, done=None)
        assert action == 0

Пример #2

Показать файл

def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean,
                    demand_var, num_commodities, orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day)
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)
    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    obs = env.reset()
    reward = 0
    done = False
    print("=========== starting episode loop ===========")
    print("Initial environment: ")
    env.render()
    actions = []
    episode_rewards = []
    #demands_per_k = np.zeros((num_commodities,num_steps))
    #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente
    while not done:
        action = agent.act(obs, reward, done)

        # print(f"Agent is taking action: {action}")
        # the agent observes the first state and chooses an action
        # environment steps with the agent's action and returns new state and reward
        obs, reward, done, info = env.step(action)
        # print(f"Got reward {reward} done {done}")

        # Render the current state of the environment
        env.render()
        actions.append(action)
        episode_rewards.append(reward)

        if done:
            print("===========Environment says we are DONE ===========")

    return actions, episode_rewards

Пример #3

Показать файл

Файл: test_env.py Проект: nikhil-dce/ece590hineman

    return processed_observation


current_path = []
path_length = 0
path_return = 0.
num_episodes = 1

for episode in range(1, num_episodes + 1):

    current_ob = env.reset()
    rewards = []

    while True:
        action = agent.act(current_ob, reward, done)
        ob, reward, done, info = env.step(action)

        processed_sample = process_sample(observation=current_ob,
                                          action=action,
                                          reward=reward,
                                          terminal=done,
                                          next_observation=ob,
                                          info=info)

        current_path.append(processed_sample)

        env.render_rollouts(current_path)
        path_length += 1

        if done or path_length > 50:

Пример #4

Показать файл

def main(args):
    # Make the environment.
    env = gym.make(args.env_id)

    # logging
    outdir = 'logs/secret_breakout'

    if args.monitoring:
        env = wrappers.Monitor(env,
                               directory=outdir,
                               video_callable=False,
                               force=True)

    env.seed(args.seed)
    state = env.reset()

    # Get the action and observation space from the environment.
    logger.debug('Action space vector length: {}'.format(env.action_space.n))
    # TODO: fix this so that it can be directly read from env.observation_space
    logger.debug('Observation space vector length: {}'.format(len(state)))
    logger.debug('Max episode steps: {}'.format(env.spec.max_episode_steps))

    # Build the agent
    if args.agent_id == 'random':
        agent = RandomAgent(env.action_space)
    elif args.agent_id == 'reinforce':
        agent = ReinforceAgent(input_size=len(state),
                               hidden_size=args.hidden_size,
                               output_size=env.action_space.n,
                               learning_rate=args.learning_rate,
                               gamma=args.gamma)
    elif args.agent_id == 'ac':
        agent = ActorCriticAgent(input_size=len(state),
                                 hidden_size=args.hidden_size,
                                 output_size=env.action_space.n,
                                 learning_rate=5e-3,
                                 gamma=args.gamma)

    reward = 0
    done = False

    ep_rewards = []
    ep_start_time = time.time()

    for i_episode in range(args.max_episodes + 1):
        # Don't loop forever, add one to the env_max_steps
        # to make sure to take the final step
        state = env.reset()

        # keep track of the performance over the episode
        single_ep_cumulative_reward = 0
        for step in range(env.spec.max_episode_steps):
            # get the next action from the agent
            action = agent.act(state, reward, done)
            # perform the action in the environment
            state, reward, done, info = env.step(action)
            # track the episode performance
            single_ep_cumulative_reward += reward
            if done:
                break

        # add the accumulated reward to list of episode returns
        ep_rewards.append(single_ep_cumulative_reward)

        # update reporting times
        ep_report_time = round(time.time() - ep_start_time, 2)
        ep_start_time = time.time()

        if i_episode % args.log_interval == 0:
            logger.info(
                't(s): {}, ep: {}, R: {:.2f}, R_av_5: {:.2f}, i: {}'.format(
                    ep_report_time, i_episode, ep_rewards[-1],
                    np.mean(ep_rewards[-5:]), info))
    env.close()
    return True