Пример #1
0
 def load_policy(agent_id):
     _facility = env.world.facilities[Utils.agentid_to_fid(agent_id)]
     if Utils.is_producer_agent(agent_id):
         return ProducerBaselinePolicy(
             env.observation_space, env.action_space_producer,
             BaselinePolicy.get_config_from_env(env))
     elif isinstance(_facility, SKUStoreUnit) or isinstance(
             _facility, SKUWarehouseUnit):
         policy = ConsumerBaseStockPolicy(
             env.observation_space, env.action_space_consumer,
             BaselinePolicy.get_config_from_env(env))
         policy.base_stock = sku_base_stocks[Utils.agentid_to_fid(agent_id)]
         return policy
     else:
         return ConsumerBaselinePolicy(
             env.observation_space, env.action_space_consumer,
             BaselinePolicy.get_config_from_env(env))
    def action_dictionary_to_control(self, action_dict, world):
        actions_by_facility = defaultdict(list)
        for agent_id, action in action_dict.items():
            f_id = Utils.agentid_to_fid(agent_id)
            actions_by_facility[f_id].append((agent_id, action))

        controls = {}
        for f_id, actions in actions_by_facility.items():
            controls[f_id] = self._actions_to_control(world.facilities[f_id],
                                                      actions)
        return World.Control(facility_controls=controls)
 def load_policy(agent_id):
     agent_echelon = env.world.agent_echelon[Utils.agentid_to_fid(agent_id)]
     if Utils.is_producer_agent(agent_id):
         policy_name = 'baseline_producer'
     else:
         if agent_echelon == total_echelon - 1:
             policy_name = 'ppo_store_consumer'
         else:
             if agent_echelon >= total_echelon-echelon_to_train:
                 policy_name = 'ppo_warehouse_consumer'
             else:
                 policy_name = 'baseline_consumer'
     return ppo_trainer.get_policy(policy_name)
def echelon_policy_map_fn(echelon, agent_id):
    facility_id = Utils.agentid_to_fid(agent_id)
    if Utils.is_producer_agent(agent_id):
        return 'baseline_producer'
    else:
        agent_echelon = env.world.agent_echelon[facility_id]
        if  agent_echelon == 0: # supplier
            return 'baseline_consumer'
        elif agent_echelon == env.world.total_echelon - 1: # retailer
            return 'ppo_store_consumer'
        elif agent_echelon >= echelon: # warehouse and current layer is trainning or has been trained.
            return 'ppo_warehouse_consumer'
        else: # warehouse on layers that haven't been trained yet
            return 'baseline_consumer'
Пример #5
0
    def eval(self, iter, eval_on_trainingset=False):
        self.switch_mode(eval=True)

        print(f"  == eval iteration {iter} == ")

        obss = self.env.reset(eval=True,
                              eval_on_trainingset=eval_on_trainingset)
        _, infos = self.env.state_calculator.world_to_state(self.env.world)
        rnn_states = {}
        rewards_all = {}
        episode_reward_all = {}
        episode_reward = {}
        episode_steps = []
        episode_step = 0

        tracker = SimulationTracker(self.env.done_step, 1,
                                    self.env.agent_ids())

        for agent_id in obss.keys():
            # policies[agent_id] = load_policy(agent_id)
            rnn_states[agent_id] = self.policies[agent_id].get_initial_state()
            rewards_all[agent_id] = []
            episode_reward_all[agent_id] = []
            episode_reward[agent_id] = 0

        for i in range(100000):
            episode_step += 1
            actions = {}
            # print("timestep : ", self.step)
            # print("Start calculate action ....")
            for agent_id, obs in obss.items():
                policy = self.policies[agent_id]
                action, new_state, _ = policy.compute_single_action(
                    obs,
                    state=rnn_states[agent_id],
                    info=infos[agent_id],
                    explore=False)
                actions[agent_id] = action
                # print(agent_id, " :", policy.__class__, " : ", action)
            next_obss, rewards, dones, infos = self.env.step(actions)

            for agent_id, reward in rewards.items():
                rewards_all[agent_id].append(reward)
                episode_reward[agent_id] += reward

            step_balances = {}
            for agent_id in rewards.keys():
                step_balances[agent_id] = self.env.world.facilities[
                    Utils.agentid_to_fid(
                        agent_id)].economy.step_balance.total()
            # print(env.world.economy.global_balance().total(), step_balances, rewards)
            tracker.add_sample(0, episode_step - 1,
                               self.env.world.economy.global_balance().total(),
                               step_balances, rewards)

            done = any(dones.values())

            if done:
                obss = self.env.reset(eval=True)
                episode_steps.append(episode_step)
                episode_step = 0
                for agent_id, reward in episode_reward.items():
                    episode_reward_all[agent_id].append(reward)
                    episode_reward[agent_id] = 0
                break
            else:
                obss = next_obss
        infos = {
            "rewards_all": rewards_all,
            "episode_reward_all": episode_reward_all,
            "epsilon": self.policies[self.policies_to_train[0]].epsilon,
            "all_step": self.step,
            "episode_step": sum(episode_steps) / len(episode_steps),
            "profit": tracker.get_retailer_profit(),
        }
        return infos
Пример #6
0
def visualization(env, policies, iteration, policy_mode, basestock=False):

    policy_mode = policy_mode  # + f'_{iteration}'

    renderer = AsciiWorldRenderer()
    frame_seq = []

    evaluation_epoch_len = env.env_config['evaluation_len']
    starter_step = env.env_config['episod_duration']+env.env_config['tail_timesteps']
    env.set_iteration(1, 1)
    # env.env_config.update({'episod_duration': evaluation_epoch_len, 'downsampling_rate': 1})
    print(
        f"Environment: Producer action space {env.action_space_producer}, Consumer action space {env.action_space_consumer}, Observation space {env.observation_space}"
        , flush=True)
    obss = env.reset()
    if basestock:
        from scheduler.inventory_base_stock_policy import ConsumerBaseStockPolicy
        ConsumerBaseStockPolicy.facilities = env.world.facilities    

    if Utils.get_demand_sampler()=='ONLINE':
        env.set_retailer_step(starter_step)
    _, infos = env.state_calculator.world_to_state(env.world)


    # policies = {}
    rnn_states = {}
    rewards = {}
    for agent_id in obss.keys():
        # policies[agent_id] = load_policy(agent_id)
        rnn_states[agent_id] = policies[agent_id].get_initial_state()
        rewards[agent_id] = 0

    # Simulation loop
    tracker = SimulationTracker(evaluation_epoch_len, 1, env.agent_ids())
    print(f"  === evaluation length {evaluation_epoch_len}, it will take about 1 min ....", flush=True)

    for epoch in range(evaluation_epoch_len):
        action_dict = {}
        for agent_id, obs in obss.items():
            policy = policies[agent_id]
            action, new_state, _ = policy.compute_single_action(obs, state=rnn_states[agent_id], info=infos[agent_id],
                                                                explore=False)
            action_dict[agent_id] = action
            # if agent_id.startswith('SKUStoreUnit') and Utils.is_consumer_agent(agent_id):
            #     print(agent_id, action, rewards[agent_id])
            #     print(obs.tolist())
        obss, rewards, dones, infos = env.step(action_dict)
        step_balances = {}
        for agent_id in rewards.keys():
            step_balances[agent_id] = env.world.facilities[Utils.agentid_to_fid(agent_id)].economy.step_balance.total()
        # print(env.world.economy.global_balance().total(), step_balances, rewards)
        tracker.add_sample(0, epoch, env.world.economy.global_balance().total(), step_balances, rewards)
        # some stats
        stock_status = env.get_stock_status()
        order_in_transit_status = env.get_order_in_transit_status()
        demand_status = env.get_demand_status()

        tracker.add_sku_status(0, epoch, stock_status, order_in_transit_status, demand_status)

        frame = renderer.render(env.world)
        frame_seq.append(np.asarray(frame))

    print(tracker.get_retailer_profit())

    if not os.path.exists('output'):
        os.mkdir('output')

    if not os.path.exists('output/%s' % policy_mode):
        os.mkdir('output/%s' % policy_mode)

    if not os.path.exists(f'output/{policy_mode}/iter_{iteration}'):
        os.mkdir(f'output/{policy_mode}/iter_{iteration}')

    # tracker.render("output/%s/plot.png" % policy_mode)
    tracker.render(f'output/{policy_mode}/iter_{iteration}/plot.png')
    tracker.render_sku(policy_mode, iteration)
    print(f"  === evaluation length end ", flush=True)