def set_display_hint(self): """ set the display hint above the agent's head (toggled with F2) """ display_hint = constants.getDisplayHint() if display_hint: if display_hint == 'fitness': self.state.label = '%.2f' % self.org.fitness elif display_hint == 'time alive': self.state.label = str(self.org.time_alive) elif display_hint == 'hit points': self.state.label = ''.join('.' for i in range(int(5*OpenNero.get_environment().get_hitpoints(self)))) elif display_hint == 'id': self.state.label = str(self.org.genome.id) elif display_hint == 'champion': if self.org.champion: self.state.label = 'champ!' else: self.state.label = '' elif display_hint == 'rank': self.state.label = str(self.org.rank) elif display_hint == 'debug': self.state.label = str(OpenNero.get_environment().get_state(self)) else: self.state.label = '?' else: # the first time we switch away from displaying stuff, # change the window caption if self.state.label: self.state.label = ""
def parseInputCommand(content): """ Parse commands from training window """ mod = getMod() command, arg = content.attrib['command'], content.attrib['arg'] # first word is command rest is filename if command.isupper(): vali = int(arg) if command == "LT": mod.ltChange(vali) if command == "EE": mod.eeChange(vali) if command == "HP": mod.hpChange(vali) if command == "SP": mod.set_speedup(vali) if command == "save1": mod.save_team(arg, constants.OBJECT_TYPE_TEAM_0) if command == "load1": mod.load_team(arg, constants.OBJECT_TYPE_TEAM_0) if command == "rtneat": mod.deploy('rtneat') if command == "qlearning": mod.deploy('qlearning') if command == "pause": OpenNero.disable_ai() if command == "resume": OpenNero.enable_ai() if command == "example": print 'command: example' if arg == "start": print 'command: example start' mod.start_demonstration() elif arg == "cancel": print 'command: example cancel' OpenNero.get_environment().cancel_demonstration() elif arg == "confirm": print 'command: example confirm' OpenNero.get_environment().use_demonstration()
def set_display_hint(self): """ set the display hint above the agent's head (toggled with F2) """ display_hint = constants.getDisplayHint() if display_hint: if display_hint == 'fitness': self.state.label = '%.2g' % self.fitness[0] elif display_hint == 'time alive': self.state.label = str(self.step) elif display_hint == 'hit points': self.state.label = ''.join('.' for i in range( int(5 * OpenNero.get_environment().get_hitpoints(self)))) elif display_hint == 'id': self.state.label = str(self.state.id) elif display_hint == 'species id': self.state.label = 'q' elif display_hint == 'debug': self.state.label = str( OpenNero.get_environment().get_state(self)) else: self.state.label = '?' else: # the first time we switch away from displaying stuff, # change the window caption if self.state.label: self.state.label = ""
def set_display_hint(self): """ set the display hint above the agent's head (toggled with F2) """ display_hint = constants.getDisplayHint() if display_hint: org = self.get_org() if display_hint == 'fitness': self.state.label = '%.2f' % org.fitness elif display_hint == 'time alive': self.state.label = str(org.time_alive) elif display_hint == 'hit points': self.state.label = ''.join('.' for i in range( int(5 * OpenNero.get_environment().get_hitpoints(self)))) elif display_hint == 'id': self.state.label = str(org.id) elif display_hint == 'species id': self.state.label = str(org.species_id) elif display_hint == 'champion': if org.champion: self.state.label = 'champ!' else: self.state.label = '' elif display_hint == 'rank': self.state.label = str(org.rank) elif display_hint == 'debug': self.state.label = str( OpenNero.get_environment().get_state(self)) else: self.state.label = '?' else: # the first time we switch away from displaying stuff, # change the window caption if self.state.label: self.state.label = ""
def load_rtneat(self, location , pop, team=constants.OBJECT_TYPE_TEAM_0): location = os.path.relpath("/") + location if os.path.exists(location): OpenNero.set_ai("rtneat-%s" % team, OpenNero.RTNEAT( str(location), "data/ai/neat-params.dat", constants.pop_size, OpenNero.get_environment().agent_info.reward))
def load_rtneat(self, location, pop, team=constants.OBJECT_TYPE_TEAM_0): location = os.path.relpath("/") + location if os.path.exists(location): OpenNero.set_ai( "rtneat-%s" % team, OpenNero.RTNEAT(str(location), "data/ai/neat-params.dat", constants.pop_size, OpenNero.get_environment().agent_info.reward))
def evaluate_trace(self): """ evaluate agent and compute fitness based on trace information """ # flush network from previous activations org = self.get_org() org.net.flush() environment = OpenNero.get_environment() trace = environment.trace # place the agent at the beginning of the trace self.state.position = OpenNero.Vector3f(trace.position[0].x, trace.position[0].y, trace.position[0].z) self.state.rotation = OpenNero.Vector3f(trace.rotation[0].x, trace.rotation[0].y, trace.rotation[0].z) current_step = trace.initial_step j = 0 # trace index at which error is calculated while j < len(trace.position ) - 1 and current_step < environment.STEPS_PER_EPISODE: self.state.position = position self.state.rotation = rotation sensors = environment.sense(self) actions = self.network_action(sensors) # error based on position - find index in trace where error based on # current position starts to increase, i.e. starting with the current # trace index, we find position in trace that is closest to the current # state position. error1 = trace.position[j].getDistanceFrom(position) error2 = trace.position[j + 1].getDistanceFrom(position) while error1 >= error2 and j < len(trace.position) - 2: j += 1 error1 = error2 error2 = trace.position[j + 1].getDistanceFrom(position) if error1 > self.ERROR_THRESHOLD: break # calculate new position, orientation, and velocity self.environment.act(self, actions) current_step += 1 self.passed_steps = j return float(j) / len(trace.position)
def evaluate_trace(self): """ evaluate agent and compute fitness based on trace information """ # flush network from previous activations org = self.get_org() org.net.flush() environment = OpenNero.get_environment() trace = environment.trace # place the agent at the beginning of the trace self.state.position = OpenNero.Vector3f( trace.position[0].x, trace.position[0].y, trace.position[0].z) self.state.rotation = OpenNero.Vector3f( trace.rotation[0].x, trace.rotation[0].y, trace.rotation[0].z) current_step = trace.initial_step j = 0 # trace index at which error is calculated while j < len(trace.position)-1 and current_step < environment.STEPS_PER_EPISODE: self.state.position = position self.state.rotation = rotation sensors = environment.sense(self) actions = self.network_action(sensors) # error based on position - find index in trace where error based on # current position starts to increase, i.e. starting with the current # trace index, we find position in trace that is closest to the current # state position. error1 = trace.position[j].getDistanceFrom(position) error2 = trace.position[j+1].getDistanceFrom(position) while error1 >= error2 and j < len(trace.position)-2: j += 1 error1 = error2 error2 = trace.position[j+1].getDistanceFrom(position) if error1 > self.ERROR_THRESHOLD: break # calculate new position, orientation, and velocity self.environment.act(self, actions) current_step += 1 self.passed_steps = j return float(j)/len(trace.position)
def normalize_reward(self, reward): """ Combine reward vector into a single value in the range [0,1] """ weighted_sum = 0 min_sum = 0 max_sum = 0 environment = OpenNero.get_environment() for i, f in enumerate(constants.FITNESS_DIMENSIONS): weight = environment.reward_weights[f] weighted_sum += weight * reward[i] min_sum += abs(weight) * -1.0 max_sum += abs(weight) normalized_reward = weighted_sum if max_sum > min_sum: #Normalize weighted sum to [0, 1] d = max_sum - min_sum normalized_reward = (normalized_reward - min_sum) / d return normalized_reward
def destroy(self): env = OpenNero.get_environment() if env is not None: env.remove_agent(self) return True