Пример #1
0
    def day(self, actions, rewards):
        """
        Run the day phase, that is execute target based on votes and reward accordingly or the voting
        :param actions: dict, map id_ to vote
        :param rewards: dict, maps agent id_ to curr reward
        :return: updated rewards
        """

        def execution(actions, rewards):
            """
            To be called when is execution phase
            :return:
            """

            self.custom_metrics["suicide"] += suicide_num(actions)

            # get the agent to be executed
            target = most_frequent(actions)

            # penalize for non divergent target
            rewards = self.target_accord(target, rewards, actions)

            # if target is alive
            if self.status_map[target]:
                # log
                if Params.log_step == self.ep_step:
                    logger.debug(f"Player {target} ({self.role_map[target]}) has been executed")

                # for every agent alive, [to be executed agent too]
                for id_ in self.get_ids('all', alive=True):
                    # add/subtract penalty
                    if id_ == target:
                        rewards[id_] += self.penalties.get("death")
                    else:
                        rewards[id_] += self.penalties.get("execution")

                # kill target
                self.status_map[target] = 0
            else:
                # penalize agents for executing a dead one
                for id_ in self.get_ids("all", alive=True):
                    rewards[id_] += self.penalties.get('execute_dead')
                if Params.log_step == self.ep_step:
                    logger.debug(f"Players tried to execute dead agent {target}")

            # update day
            self.day_count += 1

            return rewards

        # call the appropriate method depending on the phase
        if self.is_comm:
            if Params.log_step == self.ep_step:
                logger.debug("Day Time| Voting")
            return rewards
        else:
            if Params.log_step == self.ep_step:
                logger.debug("Day Time| Executing")
            rewards = {id_: val + self.penalties.get('day') for id_, val in rewards.items()}
            return execution(actions, rewards)
Пример #2
0
        def execution(actions, rewards):
            """
            To be called when is execution phase
            :return:
            """

            self.custom_metrics["suicide"] += suicide_num(actions)

            # get the agent to be executed
            target = most_frequent(actions)

            # penalize for non divergent target
            rewards = self.target_accord(target, rewards,
                                         self.get_ids("all", alive=True))

            # if target is alive
            if self.status_map[target]:
                # log
                logger.debug(
                    f"Player {target} ({self.role_map[target]}) has been executed"
                )

                # for every agent alive, [to be executed agent too]
                for id_ in [
                        elem for elem in rewards.keys()
                        if self.status_map[elem]
                ]:
                    # add/subtract penalty
                    if id_ == target:
                        rewards[id_] += self.penalties.get("death")
                    else:
                        rewards[id_] += self.penalties.get("execution")

                # kill target
                self.status_map[target] = 0
            else:
                # penalize agents for executing a dead one
                for id_ in self.get_ids("all", alive=True):
                    rewards[id_] += self.penalties.get('execute_dead')
                logger.debug(f"Players tried to execute dead agent {target}")

                # increase the number of dead_man_execution in info
                self.custom_metrics["dead_man_execution"] += 1

            # update day
            self.day_count += 1

            return rewards
Пример #3
0
    def step(self, actions):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.

        Accepts an action and returns a tuple (observation, reward, done, info).

        Args:
            actions_dict (dict): a list of action provided by the agents

        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """

        # rewards start from zero
        rewards = {id: 0 for id in self.get_ids("all", alive=False)}

        # execute night action
        if self.is_night:
            logger.debug("Night Time")
            rewards = self.night(actions, rewards)
            self.is_night = not self.is_night
        else:  # else go with day
            logger.debug("Day Time")
            # penalize since a day has passed
            rewards = {id: val + self.penalties.get('day') for id, val in rewards.items()}
            rewards = self.day(actions, rewards)
            self.is_night = not self.is_night

        # update dones
        dones, rewards = self.check_done(rewards)
        obs = self.observe()
        info = {id: self.infos for id in self.get_ids("all", alive=False)}

        obs, rewards, dones, info = self.convert(obs, rewards, dones, info)

        # if game over reset
        if self.is_done:
            self.infos["tot_days"] = self.day_count

            dones["__all__"] = True
        else:
            dones["__all__"] = False

        return obs, rewards, dones, info
Пример #4
0
    def night(self, actions, rewards):
        """
        Is night, time to perform actions!
        During this phase, villagers action are not considered
        :param actions: dict, map id_ to vote
        :param rewards: dict, maps agent id_ to curr reward
        :return: return updated rewards
        """

        if self.is_comm:
            logger.debug("Night Time| Voting")
        else:
            logger.debug("Night Time| Eating")

        # execute wolf actions
        rewards = self.wolf_action(actions, rewards)

        return rewards
Пример #5
0
        def kill(actions, rewards):

            # upvote suicide info
            self.custom_metrics["suicide"] += suicide_num(actions)

            if not len(wolves_ids):
                raise Exception(
                    "Game not done but wolves are dead, have reset been called?"
                )

            # get agent to be eaten
            target = most_frequent(actions)

            # penalize for different ids
            rewards = self.target_accord(target, rewards, wolves_ids)

            # if target is alive
            if self.status_map[target]:
                # kill him
                self.status_map[target] = 0
                # penalize dead player
                rewards[target] += self.penalties.get("death")
                # reward wolves
                for id_ in wolves_ids:
                    rewards[id_] += self.penalties.get("kill")
                logger.debug(
                    f"Wolves killed {target} ({self.role_map[target]})")

            else:
                logger.debug(f"Wolves tried to kill dead agent {target}")
                # penalize the wolves for eating a dead player
                for id_ in wolves_ids:
                    rewards[id_] += self.penalties.get('execute_dead')
                # log it
                self.custom_metrics["dead_man_kill"] += 1

            if target in wolves_ids:
                # penalize the agent for eating one of their kind
                for id_ in wolves_ids:
                    rewards[id_] += self.penalties.get('kill_wolf')
                # log it
                self.custom_metrics["cannibalism"] += 1

            return rewards
Пример #6
0
    def wolf_action(self, actions, rewards):
        """
        Perform wolf action, that is kill agent based on votes and reward
        :param actions: dict, map id to vote
        :param rewards: dict, maps agent id to curr reward
        :return: updated rewards
        """

        # get wolves ids
        wolves_ids = self.get_ids(ww, alive=True)
        # filter action to get only wolves
        actions = {k: v for k, v in actions.items() if k in wolves_ids}

        # upvote suicide info
        self.infos["suicide"] += suicide_num(actions)

        if not len(wolves_ids):
            raise Exception("Game not done but wolves are dead")

        # get choices by wolves
        actions = [actions[id] for id in wolves_ids]

        logger.debug(f"wolves votes :{actions}")

        # get agent to be eaten
        target = most_frequent(actions)
        # if target is alive
        if self.status_map[target]:
            # kill him
            self.status_map[target] = 0
            # penalize dead player
            rewards[target] += self.penalties.get("death")
            # reward wolves
            for id in wolves_ids:
                rewards[id] += self.penalties.get("kill")
            logger.debug(f"Wolves killed {target} ({self.role_map[target]})")



        else:
            logger.debug(f"Wolves tried to kill dead agent {target}")
            # penalize the wolves for eating a dead player
            for id in wolves_ids:
                rewards[id] += self.penalties.get('execute_dead')
            # log it
            self.infos["dead_man_kill"] += 1

        if target in wolves_ids:
            # penalize the agent for eating one of their kind
            for id in wolves_ids:
                rewards[id] += self.penalties.get('kill_wolf')
            # log it
            self.infos["cannibalism"] += 1

        return rewards
Пример #7
0
    def day(self, actions, rewards):
        """
        Run the day phase, that is execute target based on votes and reward accordingly
        :param actions: dict, map id to vote
        :param rewards: dict, maps agent id to curr reward
        :return: updated rewards
        """

        # update vote list
        for idx in range(self.num_players):
            # use -1 if agent is dead
            self.votes[idx] = actions.get(idx, -1)

        self.infos["suicide"] += suicide_num(actions)

        # get the agent to be executed
        target = most_frequent(actions)
        logger.debug(f"Villagers votes {[elem for elem in actions.values()]}")

        # if target is alive
        if self.status_map[target]:
            # log
            logger.debug(f"Player {target} ({self.role_map[target]}) has been executed")
            # kill target
            self.status_map[target] = 0

            # for every agent alive
            for id in [elem for elem in rewards.keys() if self.status_map[elem]]:
                # add/subtract penalty
                if id == target:
                    rewards[id] += self.penalties.get("death")
                else:
                    rewards[id] += self.penalties.get("execution")
        else:
            # penalize agents for executing a dead one
            for id in self.get_ids("all", alive=True):
                rewards[id] += self.penalties.get('execute_dead')
            logger.debug(f"Players tried to execute dead agent {target}")

            # increase the number of dead_man_execution in info
            self.infos["dead_man_execution"] += 1

        # update day
        self.day_count += 1

        return rewards