def load_rtneat(self, location , pop, team=constants.OBJECT_TYPE_TEAM_0): location = os.path.relpath("/") + location if os.path.exists(location): OpenNero.set_ai("rtneat-%s" % team, OpenNero.RTNEAT( str(location), "data/ai/neat-params.dat", constants.pop_size, OpenNero.get_environment().agent_info.reward))
def deploy(self, ai='rtneat', team=constants.OBJECT_TYPE_TEAM_0): OpenNero.disable_ai() if ai == 'rtneat': OpenNero.set_ai('rtneat-%s' % team, None) self.environment.remove_all_agents(team) for _ in range(constants.pop_size): self.spawnAgent(ai=ai, team=team) OpenNero.enable_ai()
def load_rtneat(self, location, pop, team=constants.OBJECT_TYPE_TEAM_0): location = os.path.relpath("/") + location if os.path.exists(location): OpenNero.set_ai( "rtneat-%s" % team, OpenNero.RTNEAT(str(location), "data/ai/neat-params.dat", constants.pop_size, OpenNero.get_environment().agent_info.reward))
def __init__(self): """ Create the environment """ OpenNero.Environment.__init__(self) self.curr_id = 0 self.max_steps = 20 self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM) self.states = {} self.teams = {} self.script = 'Hw5/menu.py' abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors rbound = OpenNero.FeatureVectorInfo() # rewards # actions abound.add_continuous( -1, 1 ) # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED) abound.add_continuous( -constants.MAX_TURN_RADIANS, constants.MAX_TURN_RADIANS) # left/right turn (in radians) # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1) # Rewards # the enviroment returns the raw multiple dimensions of the fitness as # they get each step. This then gets combined into, e.g. Z-score, by # the ScoreHelper in order to calculate the final rtNEAT-fitness for f in constants.FITNESS_DIMENSIONS: # we don't care about the bounds of the individual dimensions rbound.add_continuous(-sys.float_info.max, sys.float_info.max) # range for reward # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", OpenNero.Population(), constants.pop_size, 1) key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0 OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) # set the initial lifetime lifetime = module.getMod().lt rtneat.set_lifetime(lifetime) print 'rtNEAT lifetime:', lifetime self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
def __init__(self): """ Create the environment """ OpenNero.Environment.__init__(self) self.curr_id = 0 self.max_steps = 20 self.MAX_DIST = math.hypot(constants.XDIM, constants.YDIM) self.states = {} self.teams = {} self.script = 'Hw5/menu.py' abound = OpenNero.FeatureVectorInfo() # actions sbound = OpenNero.FeatureVectorInfo() # sensors rbound = OpenNero.FeatureVectorInfo() # rewards # actions abound.add_continuous(-1, 1) # forward/backward speed (gets multiplied by constants.MAX_MOVEMENT_SPEED) abound.add_continuous(-constants.MAX_TURN_RADIANS, constants.MAX_TURN_RADIANS) # left/right turn (in radians) # sensor dimensions for a in range(constants.N_SENSORS): sbound.add_continuous(0, 1); # Rewards # the enviroment returns the raw multiple dimensions of the fitness as # they get each step. This then gets combined into, e.g. Z-score, by # the ScoreHelper in order to calculate the final rtNEAT-fitness for f in constants.FITNESS_DIMENSIONS: # we don't care about the bounds of the individual dimensions rbound.add_continuous(-sys.float_info.max, sys.float_info.max) # range for reward # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS, constants.N_ACTIONS, constants.pop_size, 1.0, rbound, False) key = "rtneat-%s" % constants.OBJECT_TYPE_TEAM_0 OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) # set the initial lifetime lifetime = module.getMod().lt rtneat.set_lifetime(lifetime) print 'rtNEAT lifetime:', lifetime self.agent_info = OpenNero.AgentInitInfo(sbound, abound, rbound)
def start_rtneat(self, pop_size): " start the rtneat learning demo " OpenNero.disable_ai() #self.environment = RoombaEnvironment(constants.XDIM, constants.YDIM, self) #set_environment(self.environment) #self.reset_sandbox() # Create RTNEAT object rbound = OpenNero.FeatureVectorInfo() rbound.add_continuous(-sys.float_info.max, sys.float_info.max) rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", 2, 1, pop_size, 1.0, rbound, False) rtneat.set_weight(0,1) OpenNero.set_ai("rtneat",rtneat) OpenNero.enable_ai() self.distribute_bots(pop_size, "data/shapes/roomba/RoombaRTNEAT.xml")
def start_rtneat(self, team=constants.OBJECT_TYPE_TEAM_0): # initialize the rtNEAT algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the actions # population size matches ours # 1.0 is the weight initialization noise rtneat = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS + 1, constants.N_ACTIONS, constants.pop_size, 1.0, rtneat_rewards(), False) key = "rtneat-%s" % team OpenNero.set_ai(key, rtneat) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) rtneat.set_lifetime(self.environment.lifetime)
def start_rtneatq(self, team=constants.OBJECT_TYPE_TEAM_0): # initialize the rtNEAT+Q algorithm parameters # input layer has enough nodes for all the observations plus a bias # output layer has enough values for all the wires # population size matches ours # 1.0 is the weight initialization noise rtneatq = OpenNero.RTNEAT("data/ai/neat-params.dat", constants.N_SENSORS+1, constants.N_ACTION_CANDIDATES * (constants.N_ACTIONS + 1), constants.pop_size, 1.0, rtneat_rewards(), False) key = "rtneatq-%s" % team OpenNero.set_ai(key, rtneatq) print "get_ai(%s): %s" % (key, OpenNero.get_ai(key)) rtneatq.set_lifetime(self.environment.lifetime)
class NeroModule: def __init__(self): self.environment = None self.agent_id = None self.flag_loc = None self.flag_id = None self.set_speedup(constants.DEFAULT_SPEEDUP) x = constants.XDIM / 2.0 y = constants.YDIM / 3.0 self.spawn_x = {} self.spawn_y = {} self.set_spawn(x, y, constants.OBJECT_TYPE_TEAM_0) self.set_spawn(x, 2 * y, constants.OBJECT_TYPE_TEAM_1) # Bounds for sensors in neural network and advice language. These bounds are # used to convert sensor values between network and advice. self.sbounds_network = OpenNero.FeatureVectorInfo() self.sbounds_advice = OpenNero.FeatureVectorInfo() # Networks have better learning bias when cube sensors produce values in the # range [-1, 1], but the range [0, 1] is more intuitive in the advice # language. Wall sensors use the same range [0, 1] for both network and advice. # The sense() method in the ForageEnvironment class use these network bounds # to scale the sensor values. for i in range(constants.N_SENSORS): self.sbounds_network.add_continuous(0, 1) self.sbounds_advice.add_continuous(0, 1) # The last sensor is the bias, which always takes the value 1 (upper bound). self.sbounds_network.add_continuous(0, 1) self.sbounds_advice.add_continuous(0, 1) print 'sbounds_network', self.sbounds_network def setup_map(self): """ setup the test environment """ OpenNero.disable_ai() if self.environment: error("Environment already created") return # create the environment - this also creates the rtNEAT object self.environment = self.create_environment() OpenNero.set_environment(self.environment) # world walls height = constants.HEIGHT + constants.OFFSET common.addObject("data/shapes/cube/Cube.xml", OpenNero.Vector3f(constants.XDIM / 2, 0, height), OpenNero.Vector3f(0, 0, 90), scale=OpenNero.Vector3f(constants.WIDTH, constants.XDIM, constants.HEIGHT * 2), label="World Wall0", type=constants.OBJECT_TYPE_OBSTACLE) common.addObject("data/shapes/cube/Cube.xml", OpenNero.Vector3f(0, constants.YDIM / 2, height), OpenNero.Vector3f(0, 0, 0), scale=OpenNero.Vector3f(constants.WIDTH, constants.YDIM, constants.HEIGHT * 2), label="World Wall1", type=constants.OBJECT_TYPE_OBSTACLE) common.addObject("data/shapes/cube/Cube.xml", OpenNero.Vector3f(constants.XDIM, constants.YDIM / 2, height), OpenNero.Vector3f(0, 0, 0), scale=OpenNero.Vector3f(constants.WIDTH, constants.YDIM, constants.HEIGHT * 2), label="World Wall2", type=constants.OBJECT_TYPE_OBSTACLE) common.addObject("data/shapes/cube/Cube.xml", OpenNero.Vector3f(constants.XDIM / 2, constants.YDIM, height), OpenNero.Vector3f(0, 0, 90), scale=OpenNero.Vector3f(constants.WIDTH, constants.XDIM, constants.HEIGHT * 2), label="World Wall3", type=constants.OBJECT_TYPE_OBSTACLE) # Add an obstacle wall in the middle common.addObject("data/shapes/cube/Cube.xml", OpenNero.Vector3f(constants.XDIM / 2, constants.YDIM / 2, height), OpenNero.Vector3f(0, 0, 90), scale=OpenNero.Vector3f(constants.WIDTH, constants.YDIM / 4, constants.HEIGHT * 2), label="World Wall4", type=constants.OBJECT_TYPE_OBSTACLE) # Add some trees for i in (0.25, 0.75): for j in (0.25, 0.75): # don't collide with trees - they are over 500 triangles each common.addObject("data/shapes/tree/Tree.xml", OpenNero.Vector3f(i * constants.XDIM, j * constants.YDIM, constants.OFFSET), OpenNero.Vector3f(0, 0, 0), scale=OpenNero.Vector3f(1, 1, 1), label="Tree %d %d" % (10 * i, 10 * j), type=constants.OBJECT_TYPE_LEVEL_GEOM) # collide with their trunks represented with cubes instead common.addObject( "data/shapes/cube/Cube.xml", OpenNero.Vector3f(i * constants.XDIM, j * constants.YDIM, constants.OFFSET), OpenNero.Vector3f(0, 0, 0), scale=OpenNero.Vector3f(1, 1, constants.HEIGHT), type=constants.OBJECT_TYPE_OBSTACLE) # Add the surrounding Environment common.addObject("data/terrain/NeroWorld.xml", OpenNero.Vector3f(constants.XDIM / 2, constants.YDIM / 2, 0), scale=OpenNero.Vector3f(1.2, 1.2, 1.2), label="NeroWorld", type=constants.OBJECT_TYPE_LEVEL_GEOM) return True def create_environment(self): return NeroEnvironment.NeroEnvironment() def remove_flag(self): if self.flag_id: common.removeObject(self.flag_id) def change_flag(self, loc): if self.flag_id: common.removeObject(self.flag_id) self.flag_loc = OpenNero.Vector3f(*loc) self.flag_id = common.addObject("data/shapes/cube/BlueCube.xml", self.flag_loc, label="Flag", scale=OpenNero.Vector3f(1, 1, 10), type=constants.OBJECT_TYPE_FLAG) def place_basic_turret(self, loc): return common.addObject("data/shapes/character/steve_basic_turret.xml", OpenNero.Vector3f(*loc), type=constants.OBJECT_TYPE_TEAM_1) #The following is run when one of the Deploy buttons is pressed def deploy(self, ai='rtneat', team=constants.OBJECT_TYPE_TEAM_0): OpenNero.disable_ai() if ai == 'rtneat': OpenNero.set_ai('rtneat-%s' % team, None) self.environment.remove_all_agents(team) for _ in range(constants.pop_size): self.spawnAgent(ai=ai, team=team) OpenNero.enable_ai() #The following is run when the Save button is pressed def save_team(self, location, team=constants.OBJECT_TYPE_TEAM_0): # if there are rtneat agents in the environment, save them as a group. rtneat = OpenNero.get_ai("rtneat-%s" % team) if rtneat: location = rtneat.save_population(str(location)) # then, check whether there are any qlearning agents, and save them. with open(location, 'a') as handle: for agent in self.environment.teams[team]: if agent.group == 'Agent' and agent.ai == 'qlearning': handle.write('\n\n%s' % agent.to_string()) if hasattr(agent, 'stats'): handle.write('\n\n%s' % agent.stats()) #The following is run when the Load button is pressed def load_team(self, location, team=constants.OBJECT_TYPE_TEAM_0): OpenNero.disable_ai() self.environment.remove_all_agents(team) if not os.path.exists(location): print location, 'does not exist, cannot load population' return # parse out different agents from the population file. contents = '' try: try: handle = gzip.open(location) contents = handle.read() finally: handle.close() except Exception, e: with open(location) as handle: contents = handle.read() if not contents: print 'cannot read', location, 'skipping' return rtneat, qlearning, stats = self._split_population( contents.splitlines(True)) print 'qlearning agents:', qlearning.count('Approximator') # load any qlearning agents first, subtracting them from the population # size that rtneat will need to manage. since we cannot deserialize an # agent's state until after it's been added to the world, we put the # serialized chunk for the agent into a map, then NeroEnvironment#step # takes care of the deserialization. pop_size = constants.pop_size if qlearning.strip(): for chunk in re.split(r'\n\n+', qlearning): if not chunk.strip(): continue id = self.spawnAgent(ai='qlearning', team=team) self.environment.agents_to_load[id] = chunk pop_size -= 1 if pop_size == 0: break print 'rtneat agents:', rtneat.count('genomeend') # load any rtneat agents from the file, as a group. if pop_size > 0 and rtneat.strip(): tf = tempfile.NamedTemporaryFile(delete=False) tf.write(rtneat) tf.close() print tf.name OpenNero.set_ai( "rtneat-%s" % team, OpenNero.RTNEAT(tf.name, "data/ai/neat-params.dat", pop_size, rtneat_rewards(), False)) os.unlink(tf.name) while pop_size > 0: self.spawnAgent(ai='rtneat', team=team) pop_size -= 1 OpenNero.enable_ai()
def stop_training(self): OpenNero.set_ai('rtneat-%s' % self.team_type, None)
def start_training(self): OpenNero.set_ai('rtneat-%s' % self.team_type, self.rtneat)