def reset(self): """Resets the state of the environment and returns an initial observation. Returns: observation (object): the initial observation. """ logger.info("Reset called") self.initialize() return self.observe(phase=0)
def __init__(self, num_players, roles=None, flex=0): """ :param num_players: int, number of player, must be grater than 4 :param roles: list of str, list of roles for each agent :param flex: float [0,1), percentage of targets to consider when voting, 0 is just one, depend on the number of player. EG: if num_players=10 -> targets are list of 10 elements, 10*0.5=5 -> first 5 player are considered when voting """ if isinstance(num_players, EnvContext): try: num_players = num_players['num_players'] except KeyError: raise AttributeError( f"Attribute 'num_players' should be present in the EnvContext" ) # number of player should be more than 5 assert num_players >= 5, "Number of player should be >= 5" if roles is None: # number of wolves should be less than villagers num_wolves = math.floor(math.sqrt(num_players)) num_villagers = num_players - num_wolves roles = [ww] * num_wolves + [vil] * num_villagers random.shuffle(roles) logger.info( f"Starting game with {num_players} players: {num_villagers} {vil} and {num_wolves} {ww}" ) else: assert len( roles ) == num_players, f"Length of role list ({len(roles)}) should be equal to number of players ({num_players})" self.num_players = num_players self.roles = roles self.penalties = CONFIGS['penalties'] self.max_days = CONFIGS['max_days'] if flex == 0: self.flex = 1 else: self.flex = math.floor(num_players * flex) # define empty attributes, refer to initialize method for more info self.role_map = None self.status_map = None self.is_night = True self.is_comm = True self.day_count = 0 self.is_done = False self.targets = None self.previous_target = None self.custom_metrics = None self.initialize()
def reset(self): """Resets the state of the environment and returns an initial observation. Returns: observation (object): the initial observation. """ if Params.log_step == self.ep_step: logger.info("Reset called") self.initialize() init_signal = {p: [-1] * self.signal_length for p in range(self.num_players)} obs = self.observe(phase=0, signal=init_signal, targets={k: -1 for k in range(self.num_players)}) obs, _, _, _ = self.convert(obs, {}, {}, {}, 0) return obs
def check_done(self, rewards): """ Check if the game is over, moreover return true for dead agent in done :param rewards: dict, maps agent id_ to curr reward :return: dones: list of bool statement rewards: update rewards """ dones = {id_: 0 for id_ in rewards.keys()} for idx in range(self.num_players): # done if the player is not alive done = not self.status_map[idx] dones[idx] = done # get list of alive agents alives = self.get_ids('all', alive=True) # if there are more wolves than villagers than they won wolf_won = len(self.get_ids(ww)) >= len(self.get_ids(vil)) # if there are no more wolves than the villager won village_won = all([ role == vil for id_, role in self.role_map.items() if id_ in alives ]) if wolf_won: # if wolves won # set flag to true (for reset) self.is_done = True # reward for idx in self.get_ids(ww, alive=False): rewards[idx] += self.penalties.get('victory') for idx in self.get_ids(vil, alive=False): rewards[idx] += self.penalties.get('lost') logger.info(f"\n{'#' * 10}\nWolves won\n{'#' * 10}\n") self.custom_metrics['win_wolf'] += 1 if village_won: self.is_done = True for idx in self.get_ids(vil, alive=False): rewards[idx] += self.penalties.get('victory') for idx in self.get_ids(ww, alive=False): rewards[idx] += self.penalties.get('lost') logger.info(f"\n{'#' * 10}\nVillagers won\n{'#' * 10}\n") self.custom_metrics['win_vil'] += 1 if self.day_count >= self.max_days - 1: self.is_done = True return dones, rewards
def __init__(self, configs, roles=None, flex=0): """ :param num_players: int, number of player, must be grater than 4 :param roles: list of str, list of roles for each agent :param flex: float [0,1), percentage of targets to consider when voting, 0 is just one, depend on the number of player. EG: if num_players=10 -> targets are list of 10 elements, 10*0.5=5 -> first 5 player are considered when voting """ # if config is dict if isinstance(configs, EnvContext) or isinstance(configs, dict): # get num player try: num_players = configs['num_players'] except KeyError: raise AttributeError(f"Attribute 'num_players' should be present in the EnvContext") elif isinstance(configs, int): # used for back compatibility num_players = configs else: raise AttributeError(f"Type {type(configs)} is invalid for config") # number of player should be more than 5 assert num_players >= 5, "Number of player should be >= 5" if roles is None: # number of wolves should be less than villagers num_wolves = math.floor(math.sqrt(num_players)) num_villagers = num_players - num_wolves roles = [ww] * num_wolves + [vil] * num_villagers # random.shuffle(roles) logger.info(f"Starting game with {num_players} players: {num_villagers} {vil} and {num_wolves} {ww}") else: assert len( roles) == num_players, f"Length of role list ({len(roles)}) should be equal to number of players ({num_players})" num_wolves = len([elem for elem in roles if elem == ww]) self.num_players = num_players self.num_wolves = num_wolves self.roles = roles self.penalties = CONFIGS['penalties'] self.max_days = CONFIGS['max_days'] self.signal_length = CONFIGS['signal_length'] self.signal_range = CONFIGS['signal_range'] # used for logging game self.ep_step = 0 if flex == 0: self.flex = 1 else: self.flex = math.floor(num_players * flex) # define empty attributes, refer to initialize method for more info self.status_map = None self.shuffle_map = None self.unshuffle_map = None self.is_night = True self.is_comm = True self.day_count = 0 self.phase = 0 self.is_done = False self.custom_metrics = None self.role_map = None self.initialize()