def make_world(self, wall_prob=0, wall_seed=10, food_prob=0.1, food_seed=10): self.gen_wall(wall_prob, wall_seed) predators = {} preys = {} agents = [Agent() for _ in range(self.predator_num + self.prey_num)] empty_cells_ind = np.where(self.map == 0) perm = np.random.permutation(range(len(empty_cells_ind[0]))) for i, agent in enumerate(agents): agent.name = 'agent {:d}'.format(i + 1) health = np.random.uniform(self.min_health, self.max_health) agent.health = health agent.original_health = health agent.birth_time = self.timestep agent.life = np.random.normal(500, scale=100) agent.age = np.random.randint(150) agent.resilience = np.random.uniform(self.min_resilience, self.max_resilience) agent.gene_resilience = agent.resilience agent.attack = np.random.uniform(self.min_attack, self.max_attack) agent.gene_attack = agent.attack if i < self.predator_num: agent.predator = True agent.id = self.max_id agent.speed = 1 agent.hunt_square = self.max_hunt_square agent.property = [self._gen_power(i + 1), [0, 0, 1]] else: agent.predator = False agent.id = self.max_id agent.property = [self._gen_power(i + 1), [1, 0, 0]] agent.speed = np.random.randint(self.min_speed, self.max_speed) agent.gene_speed = agent.speed new_embedding = np.random.normal(size=[self.agent_emb_dim]) self.agent_embeddings[agent.id] = new_embedding x = empty_cells_ind[0][perm[i]] y = empty_cells_ind[1][perm[i]] self.map[x][y] = self.max_id agent.pos = (x, y) self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = self.max_id self.max_id += 1 if agent.predator: predators[agent.id] = agent else: preys[agent.id] = agent self.predators = predators self.preys = preys
def make_world(self, wall_prob=0, seed=100): """ initialise an environment """ self.gen_wall(wall_prob) predators = {} preys = {} agents = [Agent() for _ in range(self.predator_num + self.prey_num)] empty_cells_ind = np.where(self.map == 0) perm = np.random.permutation(range(len(empty_cells_ind[0]))) for i, agent in enumerate(agents): agent.name = 'agent {:d}'.format(i + 1) health = np.random.uniform(self.min_health, self.max_health) agent.health = health agent.original_health = health agent.birth_time = self.timestep agent.life = np.random.normal(500, scale=100) agent.age = np.random.randint(350) if i < self.predator_num: agent.predator = True agent.id = self.max_id agent.speed = 1 agent.hunt_square = self.max_hunt_square agent.property = [self._gen_power(i + 1), [0, 0, 1]] else: agent.predator = False agent.id = i + 1 agent.property = [self._gen_power(i + 1), [1, 0, 0]] x = empty_cells_ind[0][perm[i]] y = empty_cells_ind[1][perm[i]] self.map[x][y] = self.max_id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = self.max_id agent.pos = (x, y) self.max_id += 1 if agent.predator: predators[agent.id] = agent else: preys[agent.id] = agent self.predators = predators self.preys = preys
def increase_prey(self, prob): ''' Generates new preys Args: prob: Ratio against the population which determins how many new agents generated. ''' num = max(1, int(self.prey_num * prob)) self.increase_preys = num ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) if self.experiment_type == 'variation': total = len(self.random_preys) + len(self.trained_preys) + len( self.training_preys) p = [ len(self.random_preys) / total, len(self.trained_preys) / total, len(self.training_preys) / total ] #for i in range(len(self.predator_agents)): for i in range(num): # if np.random.rand() < prob: agent = Agent() agent.health = 1 agent.original_health = 1 agent.birth_time = self.timestep agent.predator = False agent.id = self.max_id self.max_id += 1 agent.property = [self._gen_power(agent.id), [1, 0, 0]] x = ind[0][perm[i]] y = ind[1][perm[i]] if self.map[x][y] == 0: self.map[x][y] = agent.id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = agent.id agent.pos = (x, y) if self.experiment_type == 'variation': exp_type = np.random.choice(3, p=p) if exp_type == 0: agent.policy_type = 'random' self.random_preys[agent.id] = agent elif exp_type == 1: agent.policy_type = 'trained' self.trained_preys[agent.id] = agent else: agent.policy_type = 'trainig' self.training_preys[agent.id] = agent else: self.preys[agent.id] = agent self.prey_num += 1
def crossover_prey(self, crossover_scope=3, crossover_rate=0.001): ''' Mating function which generates new agents with the given probability if agents are within a certain square Args: crossover_scope: Scope of crossover. If two agents are within this scope, then those agents are candidates for the crossover crossover_rate: The probability that two agents successfully reproduce a new agent ''' ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) index = 0 for prey in list(self.prey_agents.values()): x, y = prey.pos local_map = self.large_map[(self.w + x - crossover_scope // 2):( self.w + x - crossover_scope // 2 + crossover_scope), (self.h + y - crossover_scope // 2):( self.h + y - crossover_scope // 2 + crossover_scope)] agent_indices = np.where(local_map > 0) flag = True if len(agent_indices[0]) == 0 or prey.crossover: continue for candidate_x, candidate_y in zip(agent_indices[0], agent_indices[1]): candidate_id = local_map[candidate_x, candidate_y] candidate_agent = self.agents[candidate_id] prey.checked.append(candidate_agent.id) if (not candidate_agent.predator and not candidate_agent.crossover and candidate_agent.id != prey.id and \ prey.id not in candidate_agent.checked and prey.age > self.args.min_crossover_age and candidate_agent.age > self.args.min_crossover_age and len(self.prey_agents) <= self.args.prey_capacity): candidate_agent.get_closer = True if np.random.rand() < crossover_rate and flag: for i in range( np.random.randint( self.args.max_prey_offsprings) + 1): candidate_agent.crossover = True prey.crossover = True child = Agent() child.id = self.max_id self.max_id += 1 child.predator = False child.life = np.random.normal(500, scale=100) child.health = 1 new_embedding = np.random.normal( size=[self.agent_emb_dim]) self.agent_embeddings[child.id] = new_embedding child.hunt_square = self.max_hunt_square child.property = [ self._gen_power(child.id), [1, 0, 0] ] new_pos_indices = np.where(local_map == 0) x = ind[0][perm[index]] y = ind[1][perm[index]] index += 1 self.map[x][y] = child.id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map. shape[1]] = child.id child.pos = (x, y) self.prey_num += 1 if self.experiment_type == 'variation': rand = np.random.rand() if rand < 0.5: child.policy_type = prey.policy_type else: child.policy_type = candidate_agent.policy_type if child.policy_type == 'random': self.random_preys[child.id] = child elif child.policy_type == 'trained': self.trained_preys[child.id] = child else: self.training_preys[child.id] = child else: self.preys[child.id] = child #candidate_agent.health -= 0.1 #prey.health -= 0.1 self.increase_preys += 1 flag = False
def add_preys(self, num): ''' Add preys Args: num: Number of prey this function generates ''' self.increase_preys += num ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) if self.experiment_type == 'variation': total = len(self.random_preys) + len(self.trained_preys) + len( self.training_preys) p = [ len(self.random_preys) / total, len(self.trained_preys) / total, len(self.training_preys) / total ] for i in range(num): agent = Agent() agent.health = 1 agent.original_health = 1 agent.birth_time = self.timestep agent.predator = False agent.life = np.random.normal(500, 100) agent.id = self.max_id self.max_id += 1 agent.property = [self._gen_power(agent.id), [1, 0, 0]] x = ind[0][perm[i]] y = ind[1][perm[i]] if self.map[x][y] == 0: self.map[x][y] = agent.id self.prey_num += 1 self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = agent.id agent.pos = (x, y) if self.experiment_type == 'variation': exp_type = np.random.choice(3, p=p) if exp_type == 0: agent.policy_type = 'random' self.random_preys[agent.id] = agent elif exp_type == 1: agent.policy_type = 'trained' self.trained_preys[agent.id] = agent else: agent.policy_type = 'training' self.training_preys[agent.id] = agent else: self.preys[agent.id] = agent
def variation_make_world(self, wall_prob=0): """ Generates an environment for the experiment which veirfies if agents are trained or not. Three types of agnets (random policy, trained policy without any continual learning, trained policy with continual learning) The proportion of the initial population among three types of agnets are same Args: wall_prob: the probability of generating a wall block at the specific coordinate """ self.gen_wall(wall_prob) random_predators = {} trained_predators = {} training_predators = {} random_preys = {} trained_preys = {} training_preys = {} agents = [Agent() for _ in range(self.predator_num + self.prey_num)] empty_cells_ind = np.where(self.map == 0) perm = np.random.permutation(range(len(empty_cells_ind[0]))) random_predator_num = int(self.predator_num / 3.) trained_predator_num = int(self.predator_num / 3.) training_predator_num = int(self.predator_num - random_predator_num - trained_predator_num) random_prey_num = int(self.prey_num / 3.) trained_prey_num = int(self.prey_num / 3.) training_prey_num = int(self.prey_num - random_prey_num - trained_prey_num) for i, agent in enumerate(agents): health = np.random.uniform(self.min_health, self.max_health) agent.health = health agent.original_health = health agent.birth_time = self.timestep agent.life = np.random.normal(500, scale=100) agent.age = np.random.randint(150) if i < self.predator_num: agent.predator = True agent.id = self.max_id agent.speed = 1 agent.hunt_square = self.max_hunt_square agent.property = [self._gen_power(i + 1), [0, 0, 1]] if i < random_predator_num: agent.policy_type = 'random' elif i >= random_predator_num and i < random_predator_num + trained_predator_num: agent.policy_type = 'trained' else: agent.policy_type = 'training' else: agent.predator = False agent.id = i + 1 agent.property = [self._gen_power(i + 1), [1, 0, 0]] if i < random_prey_num + self.predator_num: agent.policy_type = 'random' elif i >= random_prey_num + self.predator_num and i < random_prey_num + trained_prey_num + self.predator_num: agent.policy_type = 'trained' else: agent.policy_type = 'training' x = empty_cells_ind[0][perm[i]] y = empty_cells_ind[1][perm[i]] self.map[x][y] = self.max_id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = self.max_id agent.pos = (x, y) self.max_id += 1 if agent.predator: if agent.policy_type == 'random': random_predators[agent.id] = agent elif agent.policy_type == 'trained': trained_predators[agent.id] = agent else: training_predators[agent.id] = agent else: if agent.policy_type == 'random': random_preys[agent.id] = agent elif agent.policy_type == 'trained': trained_preys[agent.id] = agent else: training_preys[agent.id] = agent self.random_predators = random_predators self.trained_predators = trained_predators self.training_predators = training_predators self.random_preys = random_preys self.trained_preys = trained_preys self.training_preys = training_preys
def crossover_prey(self, crossover_scope=3, crossover_rate=0.001, mutation_prob=0.001): ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) index = 0 for prey in list(self.preys.values()): x, y = prey.pos local_map = self.large_map[(self.w + x - crossover_scope // 2):( self.w + x - crossover_scope // 2 + crossover_scope), (self.h + y - crossover_scope // 2):( self.h + y - crossover_scope // 2 + crossover_scope)] agent_indices = np.where(local_map > 0) flag = True if len(agent_indices[0]) == 0 or prey.crossover: continue for candidate_x, candidate_y in zip(agent_indices[0], agent_indices[1]): candidate_id = local_map[candidate_x, candidate_y] candidate_agent = self.agents[candidate_id] #prey.checked.append(candidate_agent.id) if (not candidate_agent.predator and not candidate_agent.crossover and candidate_agent.id != prey.id and \ prey.id not in candidate_agent.checked and prey.age > self.args.min_crossover_age and candidate_agent.age > self.args.min_crossover_age \ and len(self.preys) <= self.args.prey_capacity): candidate_agent.get_closer = True if np.random.rand() < crossover_rate and flag: #for i in range(np.random.randint(self.args.max_prey_offsprings)+1): child = Agent() child.id = self.max_id self.max_id += 1 child.predator = False child.life = np.random.normal(500, scale=100) child.health = 1 rate = np.random.rand() if np.random.rand() < mutation_prob: child.attack = ( rate * prey.gene_attack + (1 - rate) * candidate_agent.gene_attack ) + np.random.normal() else: child.attack = ( rate * prey.gene_attack + (1 - rate) * candidate_agent.gene_attack) rate = np.random.rand() if np.random.rand() < mutation_prob: child.resilience = ( rate * prey.gene_resilience + (1 - rate) * candidate_agent.gene_resilience ) + np.random.normal() else: child.resilience = ( rate * prey.gene_resilience + (1 - rate) * candidate_agent.gene_resilience) rate = np.random.rand() if np.random.rand() < mutation_prob: speed = (rate * prey.gene_speed + (1 - rate) * candidate_agent.gene_speed ) + np.random.normal() speed = np.clip(speed, self.min_speed, self.max_speed) child.speed = int(speed) else: child.speed = int( np.round(rate * prey.gene_speed + (1 - rate) * candidate_agent.gene_speed)) child.gene_attack = child.attack child.gene_resilience = child.resilience child.gene_speed = child.speed prey.reward = child.gene_attack + child.gene_resilience candidate_agent.reward = child.gene_attack + child.gene_resilience new_embedding = np.random.normal( size=[self.agent_emb_dim]) self.agent_embeddings[child.id] = new_embedding child.hunt_square = self.max_hunt_square child.property = [self._gen_power(child.id), [1, 0, 0]] x = ind[0][perm[index]] y = ind[1][perm[index]] index += 1 self.map[x][y] = child.id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map. shape[1]] = child.id child.pos = (x, y) self.preys[child.id] = child self.prey_num += 1 #candidate_agent.health -= 0.1 #prey.health -= 0.1 self.increase_preys += 1 flag = False candidate_agent.crossover = True prey.crossover = True
def add_preys(self, num): self.increase_preys += num ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) for i in range(num): agent = Agent() health = np.random.uniform(self.min_health, self.max_health) agent.health = health agent.birth_time = self.timestep agent.predator = False agent.life = np.random.normal(500, 100) agent.attack = np.random.uniform(self.min_attack, self.max_attack) agent.resilience = np.random.uniform(self.min_resilience, self.max_resilience) agent.gene_attack = agent.attack agent.gene_resilience = agent.resilience agent.speed = np.random.randint(self.min_speed, self.max_speed) agent.gene_speed = agent.speed agent.id = self.max_id self.max_id += 1 agent.property = [self._gen_power(agent.id), [1, 0, 0]] x = ind[0][perm[i]] y = ind[1][perm[i]] if self.map[x][y] == 0: self.map[x][y] = agent.id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = agent.id agent.pos = (x, y) self.preys[agent.id] = agent
def crossover_prey(self, prob, mutation_prob=0.001): ''' Crossover function for preys Args: prob: Ratio against the population. This determins how many agents are chosen for the crossover mutation_prob: Mutation probability ''' num = max(1, int(self.prey_num * prob)) self.increase_preys = num ind = np.where(self.map == 0) perm = np.random.permutation(np.arange(len(ind[0]))) index = 0 preys = list(self.preys.values()) np.random.shuffle(preys) for i in range(num): prey = preys[i * 2] candidate_agent = preys[i * 2 + 1] child = Agent() child.id = self.max_id self.max_id += 1 child.predator = False child.life = np.random.normal(500, scale=100) child.health = 1 child.attack = 1 rate = np.random.rand() if np.random.rand() < mutation_prob: child.resilience = ( rate * prey.gene_resilience + (1 - rate) * candidate_agent.gene_resilience) + np.random.normal() else: child.resilience = ( rate * prey.gene_resilience + (1 - rate) * candidate_agent.gene_resilience) rate = np.random.rand() if np.random.rand() < mutation_prob: speed = (rate * prey.gene_speed + (1 - rate) * candidate_agent.gene_speed) + np.random.normal() speed = np.clip(speed, self.min_speed, self.max_speed) child.speed = int(speed) else: child.speed = int( np.round(rate * prey.gene_speed + (1 - rate) * candidate_agent.gene_speed)) child.gene_speed = child.speed child.gene_attack = child.attack child.gene_resilience = child.resilience child.gene_speed = child.speed prey.reward = child.gene_attack + child.gene_resilience candidate_agent.reward = child.gene_attack + child.gene_resilience new_embedding = np.random.normal(size=[self.agent_emb_dim]) self.agent_embeddings[child.id] = new_embedding child.hunt_square = self.max_hunt_square child.property = [self._gen_power(child.id), [1, 0, 0]] x = ind[0][perm[i]] y = ind[1][perm[i]] index += 1 self.map[x][y] = child.id self.large_map[ x:self.large_map.shape[0]:self.map.shape[0], y:self.large_map.shape[1]:self.map.shape[1]] = child.id child.pos = (x, y) self.preys[child.id] = child self.prey_num += 1