Exemplo n.º 1
0
def create_agent_66_experiment_runner(num_dcs, num_customers, dcs_per_customer,
                                      demand_mean, demand_var, num_commodities,
                                      orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)

    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = AgentHighest(env)

    return ExperimentRunner(order_generator,
                            generator,
                            agent,
                            env,
                            experiment_name="randomvalid_validation")
Exemplo n.º 2
0
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean,
                    demand_var, num_commodities, orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day)
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)
    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    obs = env.reset()
    reward = 0
    done = False
    print("=========== starting episode loop ===========")
    print("Initial environment: ")
    env.render()
    actions = []
    episode_rewards = []
    #demands_per_k = np.zeros((num_commodities,num_steps))
    #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente
    while not done:
        action = agent.act(obs, reward, done)

        # print(f"Agent is taking action: {action}")
        # the agent observes the first state and chooses an action
        # environment steps with the agent's action and returns new state and reward
        obs, reward, done, info = env.step(action)
        # print(f"Got reward {reward} done {done}")

        # Render the current state of the environment
        env.render()
        actions.append(action)
        episode_rewards.append(reward)

        if done:
            print("===========Environment says we are DONE ===========")

    return actions, episode_rewards