evaluation_epoch_len = 60
    env.set_iteration(1, 1)
    env.env_config.update({'episod_duration': evaluation_epoch_len, 'downsampling_rate': 1})
    print(f"Environment: Producer action space {env.action_space_producer}, Consumer action space {env.action_space_consumer}, Observation space {env.observation_space}")

    def load_policy(agent_id):
        agent_echelon = env.world.agent_echelon[Utils.agentid_to_fid(agent_id)]
        if Utils.is_producer_agent(agent_id):
            policy_name = 'baseline_producer'
        else:
            if agent_echelon == total_echelon - 1:
                policy_name = 'ppo_store_consumer'
            else:
                if agent_echelon >= total_echelon-echelon_to_train:
                    policy_name = 'ppo_warehouse_consumer'
                else:
                    policy_name = 'baseline_consumer'
        return ppo_trainer.get_policy(policy_name)

    policies = {}
    for agent_id in env.agent_ids():
        policies[agent_id] = load_policy(agent_id)
        
    # Simulation loop
    tracker = SimulationTracker(evaluation_epoch_len, 1, env, policies)
    
    if args.pt:
        loc_path = f"{os.environ['PT_OUTPUT_DIR']}/{policy_mode}/"
    else:
        loc_path = 'output/%s/' % policy_mode
    tracker.run_and_render(loc_path)
    env_config_for_rendering = env_config.copy()
    episod_duration = args.episod
    env_config_for_rendering['episod_duration'] = episod_duration
    env = InventoryManageEnv(env_config_for_rendering)

    policy_mode = "eoq"
    # Create the environment
    env.set_iteration(1, 1)
    print(f"Environment: Producer action space {env.action_space_producer}, Consumer action space {env.action_space_consumer}, Observation space {env.observation_space}")

    def load_policy(agent_id):
        if Utils.is_producer_agent(agent_id):
            return ProducerBaselinePolicy(env.observation_space, env.action_space_producer, BaselinePolicy.get_config_from_env(env))
        if agent_id.startswith('SKUStoreUnit') or agent_id.startswith('OuterSKUStoreUnit'):
            return ConsumerEOQPolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env))
        else:
            return ConsumerBaselinePolicy(env.observation_space, env.action_space_consumer, BaselinePolicy.get_config_from_env(env))

    policies = {}
    for agent_id in env.agent_ids():
        policies[agent_id] = load_policy(agent_id)

    # Simulation loop
    tracker = SimulationTracker(episod_duration, 1, env, policies)
    if args.pt:
        loc_path = f"{os.environ['PT_OUTPUT_DIR']}/{policy_mode}/"
    else:
        loc_path = 'output/%s/' % policy_mode
    tracker.run_and_render(loc_path)
예제 #3
0
    def eval(self, iter, eval_on_trainingset=False):
        self.switch_mode(eval=True)

        print(f"  == eval iteration {iter} == ")

        obss = self.env.reset(eval=True,
                              eval_on_trainingset=eval_on_trainingset)
        _, infos = self.env.state_calculator.world_to_state(self.env.world)
        rnn_states = {}
        rewards_all = {}
        episode_reward_all = {}
        episode_reward = {}
        episode_steps = []
        episode_step = 0

        tracker = SimulationTracker(self.env.done_step, 1,
                                    self.env.agent_ids())

        for agent_id in obss.keys():
            # policies[agent_id] = load_policy(agent_id)
            rnn_states[agent_id] = self.policies[agent_id].get_initial_state()
            rewards_all[agent_id] = []
            episode_reward_all[agent_id] = []
            episode_reward[agent_id] = 0

        for i in range(100000):
            episode_step += 1
            actions = {}
            # print("timestep : ", self.step)
            # print("Start calculate action ....")
            for agent_id, obs in obss.items():
                policy = self.policies[agent_id]
                action, new_state, _ = policy.compute_single_action(
                    obs,
                    state=rnn_states[agent_id],
                    info=infos[agent_id],
                    explore=False)
                actions[agent_id] = action
                # print(agent_id, " :", policy.__class__, " : ", action)
            next_obss, rewards, dones, infos = self.env.step(actions)

            for agent_id, reward in rewards.items():
                rewards_all[agent_id].append(reward)
                episode_reward[agent_id] += reward

            step_balances = {}
            for agent_id in rewards.keys():
                step_balances[agent_id] = self.env.world.facilities[
                    Utils.agentid_to_fid(
                        agent_id)].economy.step_balance.total()
            # print(env.world.economy.global_balance().total(), step_balances, rewards)
            tracker.add_sample(0, episode_step - 1,
                               self.env.world.economy.global_balance().total(),
                               step_balances, rewards)

            done = any(dones.values())

            if done:
                obss = self.env.reset(eval=True)
                episode_steps.append(episode_step)
                episode_step = 0
                for agent_id, reward in episode_reward.items():
                    episode_reward_all[agent_id].append(reward)
                    episode_reward[agent_id] = 0
                break
            else:
                obss = next_obss
        infos = {
            "rewards_all": rewards_all,
            "episode_reward_all": episode_reward_all,
            "epsilon": self.policies[self.policies_to_train[0]].epsilon,
            "all_step": self.step,
            "episode_step": sum(episode_steps) / len(episode_steps),
            "profit": tracker.get_retailer_profit(),
        }
        return infos
예제 #4
0
        if Utils.is_producer_agent(agent_id):
            return ProducerBaselinePolicy(
                env.observation_space, env.action_space_producer,
                BaselinePolicy.get_config_from_env(env))
        elif isinstance(_facility, SKUStoreUnit) or isinstance(
                _facility, SKUWarehouseUnit):
            policy = ConsumerBaseStockPolicy(
                env.observation_space, env.action_space_consumer,
                BaselinePolicy.get_config_from_env(env))
            policy.base_stock = sku_base_stocks[Utils.agentid_to_fid(agent_id)]
            return policy
        else:
            return ConsumerBaselinePolicy(
                env.observation_space, env.action_space_consumer,
                BaselinePolicy.get_config_from_env(env))

    policies = {}
    for agent_id in env.agent_ids():
        policies[agent_id] = load_policy(agent_id)

    # Simulation loop
    if args.visualization:
        visualization(env, policies, 1, policy_mode)
    else:
        tracker = SimulationTracker(episod_duration, 1, env.agent_ids())
        if args.pt:
            loc_path = f"{os.environ['PT_OUTPUT_DIR']}/{policy_mode}/"
        else:
            loc_path = 'output/%s/' % policy_mode
        tracker.run_and_render(loc_path)
예제 #5
0
def visualization(env, policies, iteration, policy_mode, basestock=False):

    policy_mode = policy_mode  # + f'_{iteration}'

    renderer = AsciiWorldRenderer()
    frame_seq = []

    evaluation_epoch_len = env.env_config['evaluation_len']
    starter_step = env.env_config['episod_duration']+env.env_config['tail_timesteps']
    env.set_iteration(1, 1)
    # env.env_config.update({'episod_duration': evaluation_epoch_len, 'downsampling_rate': 1})
    print(
        f"Environment: Producer action space {env.action_space_producer}, Consumer action space {env.action_space_consumer}, Observation space {env.observation_space}"
        , flush=True)
    obss = env.reset()
    if basestock:
        from scheduler.inventory_base_stock_policy import ConsumerBaseStockPolicy
        ConsumerBaseStockPolicy.facilities = env.world.facilities    

    if Utils.get_demand_sampler()=='ONLINE':
        env.set_retailer_step(starter_step)
    _, infos = env.state_calculator.world_to_state(env.world)


    # policies = {}
    rnn_states = {}
    rewards = {}
    for agent_id in obss.keys():
        # policies[agent_id] = load_policy(agent_id)
        rnn_states[agent_id] = policies[agent_id].get_initial_state()
        rewards[agent_id] = 0

    # Simulation loop
    tracker = SimulationTracker(evaluation_epoch_len, 1, env.agent_ids())
    print(f"  === evaluation length {evaluation_epoch_len}, it will take about 1 min ....", flush=True)

    for epoch in range(evaluation_epoch_len):
        action_dict = {}
        for agent_id, obs in obss.items():
            policy = policies[agent_id]
            action, new_state, _ = policy.compute_single_action(obs, state=rnn_states[agent_id], info=infos[agent_id],
                                                                explore=False)
            action_dict[agent_id] = action
            # if agent_id.startswith('SKUStoreUnit') and Utils.is_consumer_agent(agent_id):
            #     print(agent_id, action, rewards[agent_id])
            #     print(obs.tolist())
        obss, rewards, dones, infos = env.step(action_dict)
        step_balances = {}
        for agent_id in rewards.keys():
            step_balances[agent_id] = env.world.facilities[Utils.agentid_to_fid(agent_id)].economy.step_balance.total()
        # print(env.world.economy.global_balance().total(), step_balances, rewards)
        tracker.add_sample(0, epoch, env.world.economy.global_balance().total(), step_balances, rewards)
        # some stats
        stock_status = env.get_stock_status()
        order_in_transit_status = env.get_order_in_transit_status()
        demand_status = env.get_demand_status()

        tracker.add_sku_status(0, epoch, stock_status, order_in_transit_status, demand_status)

        frame = renderer.render(env.world)
        frame_seq.append(np.asarray(frame))

    print(tracker.get_retailer_profit())

    if not os.path.exists('output'):
        os.mkdir('output')

    if not os.path.exists('output/%s' % policy_mode):
        os.mkdir('output/%s' % policy_mode)

    if not os.path.exists(f'output/{policy_mode}/iter_{iteration}'):
        os.mkdir(f'output/{policy_mode}/iter_{iteration}')

    # tracker.render("output/%s/plot.png" % policy_mode)
    tracker.render(f'output/{policy_mode}/iter_{iteration}/plot.png')
    tracker.render_sku(policy_mode, iteration)
    print(f"  === evaluation length end ", flush=True)