Exemplo n.º 1
0
class Agent:
    def __init__(self):
        self.score_reader = ScoreReader()
        self.screen_grabber = ScreenGrabber()
        self.continue_playing = False
        self.decision_maker = DecisionMaker(
            screen_width=self.screen_grabber.screen_position['width'],
            screen_height=self.screen_grabber.screen_position['height'])
        self.maximum_length_of_experience = 50000
        self.memory = Memory()
        self.gamma = 0.925

        self.processed_experience = list()
        self.experience_loss = list()
        self.loss_experience_file = os.path.join(".\\Memory_collection",
                                                 "experience_loss.txt")
        self.forgotten_directory = ".\\forgotten_experiences"
        self.load_experience_loss()

    def simulation(self):
        experience_path = "./Simulation_feeding"
        speeds, _, screens = self.memory.remember_experiences(
            experience_path=experience_path)
        self.memory.path = "./Simulations"
        self.playing(
            True, np.reshape(speeds, [-1, 1, 1]),
            np.reshape(screens, [
                -1, 1, self.decision_maker.screen_height,
                self.decision_maker.screen_width, 1
            ]), True, False)

    def playing(self,
                record_experience,
                score_reader=None,
                screen_grabber=None,
                simulation_mode=False):
        print(self.processed_experience)
        if len(self.processed_experience) > 100:
            self.forget_experience(self.processed_experience[0][0])

        print("Start new game")
        if score_reader is None:
            score_reader = self.score_reader.read_score()
        if screen_grabber is None:
            screen_grabber = self.screen_grabber.grab_screen_generator()

        if record_experience:
            experiences = [
                Experience() for _ in range(self.maximum_length_of_experience)
            ]
        else:
            experiences = [Experience()]
        counter = 0
        self.continue_playing = True
        for speed, screen in zip(score_reader, screen_grabber):
            experiences[counter].screen = screen
            experiences[counter].speed = speed
            if speed[0, 0] == 0:
                print("Zero speed is founded")
                speed[0, 0] = 100
            experiences[counter].action, experiences[
                counter].predicted_rewards = self.decision_maker.making_decision(
                    experiences[counter].screen, experiences[counter].speed)
            if not simulation_mode:
                experiences[counter].action.update_current_action_type()
                experiences[counter].action.apply()
            if record_experience:
                counter += 1
            if not simulation_mode and self.is_game_over():
                break
        print("Game is over")
        if record_experience and counter > 50:
            self.memory.record_experiences(experiences, counter)

    def is_game_over(self):
        scores = self.screen_grabber.grab_scores()
        if np.max(scores[2, :, :]) < 50:
            return True
        return False

    def wait_to_finish_ads(self):
        for i in range(1):
            self.thinking()

    def continue_to_play(self):
        my_action = Action()
        my_action.press_key("n")
        time.sleep(2)
        my_action.press_key("b")
        time.sleep(2)
        my_action.press_key("c")
        time.sleep(2)
        while self.is_game_over():
            my_action.press_key("s")
        time.sleep(10)

    def load_experience_loss(self):
        self.processed_experience = list()
        with open(self.loss_experience_file, "r") as loss_file:
            for i, line in enumerate(loss_file):
                elements = line.strip().split(sep=",")
                self.processed_experience.append([elements[0], i])
                if elements[1] == "nan":
                    elements[1] = "100000000000.0"
                try:
                    self.experience_loss.append(float(elements[1]))
                except:
                    self.experience_loss.append(100000000000.0)

    def save_experience_loss(self):
        with open(self.loss_experience_file, "w") as experience_loss_file:
            for i in range(len(self.processed_experience)):
                experience_loss_file.write("{},{}\n".format(
                    self.processed_experience[i][0], self.experience_loss[i]))

    def find_experience_index(self, directory):
        for index, processed_directory in enumerate(self.processed_experience):
            if directory == processed_directory[0]:
                return index
        return None

    def forget_experience(self, experience_directory):
        index = self.find_experience_index(experience_directory)
        if index is None:
            return
        del self.processed_experience[index]
        del self.experience_loss[index]
        os.rename(experience_directory,
                  os.path.join(self.forgotten_directory, experience_directory))
        self.save_experience_loss()

    def selecting_an_experience(self):
        directories = self.memory.find_experiences()
        for directory in directories:
            if self.find_experience_index(directory) is None:
                new_index = len(self.processed_experience)
                self.processed_experience.append([directory, new_index])
                self.experience_loss.append(100000000000.0)
        return random.choices(self.processed_experience,
                              weights=self.experience_loss)[0]

    def thinking(self):
        for _ in range(10):
            experience = self.selecting_an_experience()
            experience_directory = experience[0]
            experience_index = experience[1]
            print("processing {}, with last loss: {}".format(
                experience_directory, self.experience_loss[experience_index]))
            speeds, actions, screens = self.memory.remember_experiences(
                experience_path=experience_directory)
            # raw_rewards = self.create_rewards(speeds)
            samples_count = len(speeds) - 5
            np_screens = np.zeros(shape=[
                samples_count, screens[0].shape[0], screens[0].shape[1], 1
            ],
                                  dtype=np.float32)
            np_speeds = np.zeros(shape=[samples_count, 1], dtype=np.float32)
            np_rewards = np.zeros(shape=[samples_count,
                                         len(ActionType)],
                                  dtype=np.float32)
            np_actions = np.zeros(shape=[samples_count], dtype=np.int32)
            for i in range(samples_count):
                np_screens[i, :, :, 0] = screens[i]
                np_speeds[i, 0] = speeds[i]
                np_actions[i] = actions[i]
            raw_rewards = self.create_rewards(
                np_screens, np_speeds, experience_directory + "_analyzing")
            for i in range(len(raw_rewards)):
                np_rewards[i, actions[i]] = raw_rewards[i]
            print("selects {} samples from {} samples.".format(
                len(raw_rewards), len(raw_rewards)))
            self.experience_loss[experience_index] =\
                self.decision_maker.training(np_screens[:len(raw_rewards), :, :, :],
                                             np_speeds[:len(raw_rewards), :],
                                             np_actions[:len(raw_rewards)],
                                             np_rewards[:len(raw_rewards), :])
            self.experience_loss[experience_index] = (
                self.experience_loss[experience_index] / 50)**2
            print("Loss score of experience: {}".format(
                self.experience_loss[experience_index]))
            self.save_experience_loss()

    @staticmethod
    def speed_reward(speed):
        if speed <= 50:
            return ((speed - 50) * 20 - 11) * 0.1
        if speed <= 150:
            return ((speed - 100) * 1 + 39) * 0.1
        return ((speed - 150) * 5 + 89) * 0.1

    def create_rewards(self, screens, speeds, directory):
        os.makedirs(directory, exist_ok=True)
        state_value = self.decision_maker.find_state_value(screens, speeds, 20)
        # dumping state values
        with open(os.path.join(directory, "predicted_state_values.csv"),
                  mode="w") as file:
            for elements in state_value:
                file.write(','.join([str(i) for i in elements]) + '\n')
        # dumping screens
        # counter = 0
        # for screen in screens:
        #     new_shape_screen = np.reshape(screen, newshape=[screen.shape[0], screen.shape[1]])
        #     new_shape_screen = new_shape_screen.astype(np.uint8)
        #     screen_file = os.path.join(directory, "{}.png".format(counter))
        #     cv2.imwrite(screen_file, new_shape_screen)
        #     counter += 1
        # dumping speeds
        with open(os.path.join(directory, "speeds.txt"), mode="w") as file:
            for speed in speeds:
                file.write("{}\n".format(speed))

        max_state_value = np.amax(state_value, axis=1)
        rewards = []
        for i in range(speeds.shape[0] - 1):
            rewards.append(
                self.speed_reward(speeds[i][0]) +
                self.gamma * max_state_value[i + 1])

        # ploting rewards
        trace = plotly.graph_objs.Scatter(y=rewards)
        data = [trace]
        reward_file = os.path.join(directory, "rewards.html")
        plotly.offline.plot(data, filename=reward_file, auto_open=False)
        return rewards