Пример #1
0
    def play(self, obs):
        # reward the networks with the last reward get
        # self.network.update_network_with_reward(self.reward)
        # print(self.network.weights_layers[-1])

        # reinforcement method
        # print("layers", self.network.layers)
        # act_vector = self.network.take_action(obs.vector)

        # TMP as it is the part 1 model
        # just to make sure all is fine for now
        # we only use the linear information of the observation (not the images)
        with torch.no_grad():
            linear_obs = torch.tensor(
                obs.vector[0:8]).squeeze().float()  #.cuda()
            out = self.model(linear_obs)
            out = np.array(out)
            # out = np.array(out.cpu())
            act_vector = np.array([
                out[0] > 0.5,
                out[1] > 0.5,
                int(out[2] * 400),
                int(out[3] * 400),
            ])
            # print(act_vector)

        # print(self.act_vector)
        # print("act_vector shape\n", self.act_vector.shape)
        # print("act_vector\n", self.act_vector)
        action = Action(vector=act_vector)

        return action
Пример #2
0
 def crazy_runner(self, obs):
     """Ship is a fast boi. Ship can dance to."""
     shoot = False
     thrust = random() < 0.9
     if random() < 0.1:
         pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y))
     else:
         pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #3
0
 def crazy_turret(self, obs):
     """Ship don't want to move. Ship only want to kill."""
     shoot = random() < 0.8
     thrust = False
     if random() < 0.3:
         pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y))
     else:
         pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #4
0
 def read_keys(self):
     shoot = "shoot" in self.player.actions_set
     thrust = "thrust" in self.player.actions_set
     if "pointing" in self.player.actions_set:
         pointing = Point(self.player.cursor.x, self.player.cursor.y)
     else:
         pointing = self.pointing
     self.player.clear_keys()
     # print(shoot, thrust, pointing)
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #5
0
 def random_play(self, obs):
     """Ship is confused. Ship don't know how to play."""
     possibles = ["shoot", "thrust", "pointing"]
     action = choice(possibles)
     shoot = action == "shoot"
     thrust = action == "thrust"
     if action == "pointing":
         pointing = Point(randint(0, obs.dim.x), randint(0, obs.dim.y))
     else:
         pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #6
0
    def play(self, obs):
        # play will be called even after the ship death so he can observe if he wants
        # but here we only remember the last losing frame and we don't care after
        # and just give anything (that will not be played anyway)
        if self.done:
            return None

        if obs.done:
            if self.is_learning:
                l = self.trainer.replay(self.batch_size)
                self.losses.append(l.history['loss'][0])
                if self.episode % 1 == 0:
                    print(
                        "episode: {}, moves: {}, score: {}, epsilon: {}, loss: {}"
                        .format(self.episode, self.steps, self.score,
                                self.trainer.epsilon, self.losses[-1]))
            self.done = True

        if self.previous_obs is not None and self.previous_action is not None and self.previous_pointer is not None:
            # TODO maybe we can only save obs ? better ?
            self.trainer.remember(self.previous_obs, self.previous_action,
                                  self.previous_pointer, obs.reward, obs,
                                  obs.done)

        # we start with a sequence to collect information (still with learning)
        if (self.total_steps < self.collecting_steps
            ):  # or (random.random() < self.exploration) :
            # action = self.collecting_agent.bot_play(obs)
            [iaction, ipointer] = random_play()
        else:
            [iaction, ipointer] = self.trainer.get_best_action(obs)
            # All bots share the same trainer so we only save it once
            if self.is_learning and self.id == 1:
                self.trainer.decay_epsilon()
        self.previous_obs = obs
        self.previous_action = iaction
        self.previous_pointer = ipointer
        # self.previous_action = action.vector

        # All bots share the same trainer so we only save it once
        # and replay it once as remember is also shared
        # print("total steps / 50", self.total_steps / 50)
        if self.is_learning and self.id == 1:
            if self.total_steps % 50 == 0:
                l = self.trainer.replay(self.batch_size)
                self.losses.append(l.history['loss'][0])
                if self.episode % 1 == 0:
                    print(
                        "episode: {}, moves: {}, score: {}, epsilon: {}, loss: {}"
                        .format(self.episode, self.steps, self.score,
                                self.trainer.epsilon, self.losses[-1]))
            if self.episode > 0 and self.episode % self.snapshot == 0 and self.steps < 2:
                self.trainer.save(id='iteration-%s' % self.episode)

        # reward the networks with the last reward get
        # self.network.update_network_with_reward(self.reward)
        # print(self.network.weights_layers[-1])

        # reinforcement method
        # print("layers", self.network.layers)
        # act_vector = self.network.take_action(obs.vector)

        # TMP as it is the part 1 model
        # just to make sure all is fine for now
        # we only use the linear information of the observation (not the images)
        # with torch.no_grad():
        #     linear_obs = torch.tensor(obs.vector[0:8]).squeeze().float() #.cuda()
        #     out = self.model(linear_obs)
        #     out = np.array(out)
        #     # out = np.array(out.cpu())
        #     act_vector = np.array([
        #         out[0] > 0.5,
        #         out[1] > 0.5,
        #         int(out[2] * 400),
        #         int(out[3] * 400),
        #     ])
        # print(act_vector)

        # print(self.act_vector)
        # print("act_vector shape\n", self.act_vector.shape)
        # print("act_vector\n", self.act_vector)

        act_vector = np.zeros((Action.size, 1))
        # act_vector[iaction] = 1
        act_vector[iaction] = 1  # concern the first 2 cells
        act_vector[2] = ipointer[0]  # coords of the pointer
        act_vector[3] = ipointer[1]
        # action = ActionOneHot(vector=act_vector)
        # print("act_vector", act_vector)
        action = Action(vector=act_vector)
        # print(action)
        return action
Пример #7
0
 def idlebot(self, obs):
     """Ship don't like life anymore. Ship don't like its taste."""
     shoot = False
     thrust = False
     pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #8
0
 def mass_shooter(self, obs):
     """Who give him that ?"""
     shoot = True
     thrust = False
     pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)
Пример #9
0
 def never_back_down(self, obs):
     """Thrust ship, thrust !"""
     shoot = False
     thrust = True
     pointing = obs.pointing
     return Action(shoot=shoot, thrust=thrust, pointing=pointing)