Пример #1
0
 def agent_ids(self):
     agents = []
     for f_id in self.world.facilities.keys():
         agents.append(Utils.agentid_producer(f_id))
     for f_id in self.world.facilities.keys():
         agents.append(Utils.agentid_consumer(f_id))
     return agents
    def _retailer_profit(self, env, step_outcome):
        # 终端(Retailer)营业额
        wc = self.env_config['global_reward_weight_consumer']
        parent_facility_balance = dict()
        # 计算SKU的Reward的时候,将其所属的Store的Reward也计算在内(以一定的权重wc)
        for facility in env.world.facilities.values():
            if isinstance(facility, ProductUnit):
                parent_facility_balance[
                    facility.id] = step_outcome.facility_step_balance_sheets[
                        facility.facility.id]
            else:
                parent_facility_balance[
                    facility.id] = step_outcome.facility_step_balance_sheets[
                        facility.id]

        consumer_reward_by_facility = {
            f_id: wc * parent_facility_balance[f_id] + (1 - wc) * reward
            for f_id, reward in
            step_outcome.facility_step_balance_sheets.items()
        }
        rewards_by_agent = {}

        for f_id, reward in step_outcome.facility_step_balance_sheets.items():
            rewards_by_agent[Utils.agentid_producer(f_id)] = reward / 1000000.0

        for f_id, reward in consumer_reward_by_facility.items():
            rewards_by_agent[Utils.agentid_consumer(f_id)] = reward / 1000000.0

        return rewards_by_agent
    def world_to_state(self, world):
        state = {}
        for facility_id, facility in world.facilities.items():
            f_state = self._state(facility)
            self._add_global_features(f_state, world)
            state[Utils.agentid_producer(facility_id)] = f_state
            state[Utils.agentid_consumer(facility_id)] = f_state

        return self._serialize_state(state), state