def play(self, obs): # reward the networks with the last reward get # self.network.update_network_with_reward(self.reward) # print(self.network.weights_layers[-1]) # reinforcement method # print("layers", self.network.layers) # act_vector = self.network.take_action(obs.vector) # TMP as it is the part 1 model # just to make sure all is fine for now # we only use the linear information of the observation (not the images) with torch.no_grad(): linear_obs = torch.tensor( obs.vector[0:8]).squeeze().float() #.cuda() out = self.model(linear_obs) out = np.array(out) # out = np.array(out.cpu()) act_vector = np.array([ out[0] > 0.5, out[1] > 0.5, int(out[2] * 400), int(out[3] * 400), ]) # print(act_vector) # print(self.act_vector) # print("act_vector shape\n", self.act_vector.shape) # print("act_vector\n", self.act_vector) action = Action(vector=act_vector) return action
def crazy_runner(self, obs): """Ship is a fast boi. Ship can dance to.""" shoot = False thrust = random() < 0.9 if random() < 0.1: pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y)) else: pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def crazy_turret(self, obs): """Ship don't want to move. Ship only want to kill.""" shoot = random() < 0.8 thrust = False if random() < 0.3: pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y)) else: pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def read_keys(self): shoot = "shoot" in self.player.actions_set thrust = "thrust" in self.player.actions_set if "pointing" in self.player.actions_set: pointing = Point(self.player.cursor.x, self.player.cursor.y) else: pointing = self.pointing self.player.clear_keys() # print(shoot, thrust, pointing) return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def random_play(self, obs): """Ship is confused. Ship don't know how to play.""" possibles = ["shoot", "thrust", "pointing"] action = choice(possibles) shoot = action == "shoot" thrust = action == "thrust" if action == "pointing": pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y)) else: pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def play(self, obs): # play will be called even after the ship death so he can observe if he wants # but here we only remember the last losing frame and we don't care after # and just give anything (that will not be played anyway) if self.done: return None if obs.done: if self.is_learning: l = self.trainer.replay(self.batch_size) self.losses.append(l.history['loss'][0]) if self.episode % 1 == 0: print( "episode: {}, moves: {}, score: {}, epsilon: {}, loss: {}" .format(self.episode, self.steps, self.score, self.trainer.epsilon, self.losses[-1])) self.done = True if self.previous_obs is not None and self.previous_action is not None and self.previous_pointer is not None: # TODO maybe we can only save obs ? better ? self.trainer.remember(self.previous_obs, self.previous_action, self.previous_pointer, obs.reward, obs, obs.done) # we start with a sequence to collect information (still with learning) if (self.total_steps < self.collecting_steps ): # or (random.random() < self.exploration) : # action = self.collecting_agent.bot_play(obs) [iaction, ipointer] = random_play() else: [iaction, ipointer] = self.trainer.get_best_action(obs) # All bots share the same trainer so we only save it once if self.is_learning and self.id == 1: self.trainer.decay_epsilon() self.previous_obs = obs self.previous_action = iaction self.previous_pointer = ipointer # self.previous_action = action.vector # All bots share the same trainer so we only save it once # and replay it once as remember is also shared # print("total steps / 50", self.total_steps / 50) if self.is_learning and self.id == 1: if self.total_steps % 50 == 0: l = self.trainer.replay(self.batch_size) self.losses.append(l.history['loss'][0]) if self.episode % 1 == 0: print( "episode: {}, moves: {}, score: {}, epsilon: {}, loss: {}" .format(self.episode, self.steps, self.score, self.trainer.epsilon, self.losses[-1])) if self.episode > 0 and self.episode % self.snapshot == 0 and self.steps < 2: self.trainer.save(id='iteration-%s' % self.episode) # reward the networks with the last reward get # self.network.update_network_with_reward(self.reward) # print(self.network.weights_layers[-1]) # reinforcement method # print("layers", self.network.layers) # act_vector = self.network.take_action(obs.vector) # TMP as it is the part 1 model # just to make sure all is fine for now # we only use the linear information of the observation (not the images) # with torch.no_grad(): # linear_obs = torch.tensor(obs.vector[0:8]).squeeze().float() #.cuda() # out = self.model(linear_obs) # out = np.array(out) # # out = np.array(out.cpu()) # act_vector = np.array([ # out[0] > 0.5, # out[1] > 0.5, # int(out[2] * 400), # int(out[3] * 400), # ]) # print(act_vector) # print(self.act_vector) # print("act_vector shape\n", self.act_vector.shape) # print("act_vector\n", self.act_vector) act_vector = np.zeros((Action.size, 1)) # act_vector[iaction] = 1 act_vector[iaction] = 1 # concern the first 2 cells act_vector[2] = ipointer[0] # coords of the pointer act_vector[3] = ipointer[1] # action = ActionOneHot(vector=act_vector) # print("act_vector", act_vector) action = Action(vector=act_vector) # print(action) return action
def idlebot(self, obs): """Ship don't like life anymore. Ship don't like its taste.""" shoot = False thrust = False pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def mass_shooter(self, obs): """Who give him that ?""" shoot = True thrust = False pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)
def never_back_down(self, obs): """Thrust ship, thrust !""" shoot = False thrust = True pointing = obs.pointing return Action(shoot=shoot, thrust=thrust, pointing=pointing)