Пример #1
0
class TestRandomAgent(unittest.TestCase):
    def setUp(self):
        number_of_actions = 2
        action_space = spaces.Discrete(2)
        self.agent = RandomAgent(action_space)

    def testAction(self):
        action = self.agent.act(state=None, reward=None, done=None)
        assert action == 0
Пример #2
0
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean,
                    demand_var, num_commodities, orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day)
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)
    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    obs = env.reset()
    reward = 0
    done = False
    print("=========== starting episode loop ===========")
    print("Initial environment: ")
    env.render()
    actions = []
    episode_rewards = []
    #demands_per_k = np.zeros((num_commodities,num_steps))
    #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente
    while not done:
        action = agent.act(obs, reward, done)

        # print(f"Agent is taking action: {action}")
        # the agent observes the first state and chooses an action
        # environment steps with the agent's action and returns new state and reward
        obs, reward, done, info = env.step(action)
        # print(f"Got reward {reward} done {done}")

        # Render the current state of the environment
        env.render()
        actions.append(action)
        episode_rewards.append(reward)

        if done:
            print("===========Environment says we are DONE ===========")

    return actions, episode_rewards
Пример #3
0
    return processed_observation


current_path = []
path_length = 0
path_return = 0.
num_episodes = 1

for episode in range(1, num_episodes + 1):

    current_ob = env.reset()
    rewards = []

    while True:
        action = agent.act(current_ob, reward, done)
        ob, reward, done, info = env.step(action)

        processed_sample = process_sample(observation=current_ob,
                                          action=action,
                                          reward=reward,
                                          terminal=done,
                                          next_observation=ob,
                                          info=info)

        current_path.append(processed_sample)

        env.render_rollouts(current_path)
        path_length += 1

        if done or path_length > 50:
Пример #4
0
def main(args):
    # Make the environment.
    env = gym.make(args.env_id)

    # logging
    outdir = 'logs/secret_breakout'

    if args.monitoring:
        env = wrappers.Monitor(env,
                               directory=outdir,
                               video_callable=False,
                               force=True)

    env.seed(args.seed)
    state = env.reset()

    # Get the action and observation space from the environment.
    logger.debug('Action space vector length: {}'.format(env.action_space.n))
    # TODO: fix this so that it can be directly read from env.observation_space
    logger.debug('Observation space vector length: {}'.format(len(state)))
    logger.debug('Max episode steps: {}'.format(env.spec.max_episode_steps))

    # Build the agent
    if args.agent_id == 'random':
        agent = RandomAgent(env.action_space)
    elif args.agent_id == 'reinforce':
        agent = ReinforceAgent(input_size=len(state),
                               hidden_size=args.hidden_size,
                               output_size=env.action_space.n,
                               learning_rate=args.learning_rate,
                               gamma=args.gamma)
    elif args.agent_id == 'ac':
        agent = ActorCriticAgent(input_size=len(state),
                                 hidden_size=args.hidden_size,
                                 output_size=env.action_space.n,
                                 learning_rate=5e-3,
                                 gamma=args.gamma)

    reward = 0
    done = False

    ep_rewards = []
    ep_start_time = time.time()

    for i_episode in range(args.max_episodes + 1):
        # Don't loop forever, add one to the env_max_steps
        # to make sure to take the final step
        state = env.reset()

        # keep track of the performance over the episode
        single_ep_cumulative_reward = 0
        for step in range(env.spec.max_episode_steps):
            # get the next action from the agent
            action = agent.act(state, reward, done)
            # perform the action in the environment
            state, reward, done, info = env.step(action)
            # track the episode performance
            single_ep_cumulative_reward += reward
            if done:
                break

        # add the accumulated reward to list of episode returns
        ep_rewards.append(single_ep_cumulative_reward)

        # update reporting times
        ep_report_time = round(time.time() - ep_start_time, 2)
        ep_start_time = time.time()

        if i_episode % args.log_interval == 0:
            logger.info(
                't(s): {}, ep: {}, R: {:.2f}, R_av_5: {:.2f}, i: {}'.format(
                    ep_report_time, i_episode, ep_rewards[-1],
                    np.mean(ep_rewards[-5:]), info))
    env.close()
    return True