示例#1
0
    def __init__(self, grid: Grid):

        Env.__init__(self)
        GameEnv.__init__(self, grid)
        self.FPS = -1
        self.clock = None

        # Define action and observation space
        # They must be gym.spaces objects
        # Using 3 discrete actions:
        self.action_space = spaces.Discrete(3)
        # Using image as input:
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(grid.x * grid.scale, grid.y * grid.scale, 3), dtype=np.uint8)
        self.reset()
示例#2
0
    def action(self, board):
        env = GameEnv().update(board)
        key = self.counter_key(env)

        #No_White = True
        #White = ['W', 'X', 'Y']
        #for i in range(3):
        #    for j in range(3):
        #        if env.board[i][j] in White:
        #            No_White = False
        #if No_White: action = 999
        #else:
        for tl in range(self.play_config.thinking_loop):
            self.search_moves(board)

            policy = self.calc_policy(board)
            action = int(np.random.choice(range(self.labels_n), p=policy))

            action_by_value = int(
                np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn:
                break

        # this is for play_gui, not necessary when training.
        self.thinking_history[env.observation] = HistoryItem(
            action, policy, list(self.var_q[key]), list(self.var_n[key]))

        self.moves.append([env.observation, list(policy)])
        return action
示例#3
0
    async def search_my_move(self, env: GameEnv, is_root_node=False):
        """
        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.white:
                return 1  # White goes second
            elif env.winner == Winner.black:
                return -1  # Black goes first
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.player_turn() == Player.white:
                return leaf_v  # Value for white
            else:
                return -leaf_v  # Value for white == -Value for white

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss

        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path update: N, W, Q, U
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        self.var_q[key][action_t] = w / n

        return leaf_v
示例#4
0
    def play_game(self, best_model, ng_model):
        env = GameEnv().reset()

        best_player = GamePlayer(self.config,
                                 best_model,
                                 play_config=self.config.eval.play_config)
        ng_player = GamePlayer(self.config,
                               ng_model,
                               play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board)
            else:
                action = white.action(env.board)
            env.step(action)

        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
示例#5
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    game_model = PlayWithHuman(config)

    while True:
        env = GameEnv().reset()
        human_is_black = random() < 0.5
        game_model.start_game(human_is_black)

        while not env.done:
            if env.player_turn() == Player.black:
                if not human_is_black:
                    action = game_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = game_model.move_by_human(env)
                    print("You move to: " + str(action))
            else:
                if human_is_black:
                    action = game_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = game_model.move_by_human(env)
                    print("You move to: " + str(action))
            env.step(action)
            env.render()

        print("\nEnd of the game.")
        print("Game result:")
        if env.winner == Winner.white:
            print("X wins")
        elif env.winner == Winner.black:
            print("O wins")
        else:
            print("Game was a draw")
示例#6
0
 async def start_search_my_move(self, board):
     self.running_simulation_num += 1
     with await self.sem:  # reduce parallel search number
         env = GameEnv().update(board)
         #print(env.board)
         #input()
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
示例#7
0
    def __init__(self, config: Config):
        self.config = config
        self.model = None  # type: GameModel
        self.loaded_filenames = set()
        self.loaded_data = {}
        self.dataset = None
        self.optimizer = None
        self.dbx = None
        self.version = 0  # Change to dynamic lookup from Drop Box Files
        self.env = GameEnv()
        self.raw_timestamp = None
        self.best_is_white = True
        self.play_files_per_generation = 15  # each file includes 25 games so each generation adds 375 games
        self.max_play_files = 300

        # at final there are alawys 7500 games to look at from previous 20 generations
        self.min_play_files_to_learn = 0
        self.play_files_on_dropbox = 0
示例#8
0
    def convert_to_training_data(data):
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            board = list(state)
            board = np.reshape(board, (3, 3))
            env = GameEnv().update(board)

            black_ary, white_ary = env.black_and_white_plane()
            state = [
                black_ary, white_ary
            ] if env.player_turn() == Player.black else [white_ary, black_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
示例#9
0
 def calc_policy(self, board):
     #calc π(a|s0)
     pc = self.play_config
     env = GameEnv().update(board)
     key = self.counter_key(env)
     if env.turn < pc.change_tau_turn:
         #print(key)
         #print('self.var_n[key]',self.var_n[key])
         #print('np.sum(self.var_n[key]',np.sum(self.var_n[key]))
         #input()
         return self.var_n[key] / np.sum(self.var_n[key])  # tau = 1
     else:
         action = np.argmax(self.var_n[key])  # tau = 0
         ret = np.zeros(self.labels_n)
         ret[action] = 1
         return ret
示例#10
0
    def play_game(self, best_model, ng_model):
        env = GameEnv().reset()

        if (self.raw_timestamp != self.dbx.files_get_metadata(
                '/model/model_best_weight.h5').client_modified):
            print('A newer model version is available - giving up this match')
            ng_win = 0
            self.best_is_white = True
            return ng_win, self.best_is_white

        best_player = GamePlayer(self.config,
                                 best_model,
                                 play_config=self.config.eval.play_config)
        ng_player = GamePlayer(self.config,
                               ng_model,
                               play_config=self.config.eval.play_config)
        self.best_is_white = not self.best_is_white
        if not self.best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board)
            else:
                action = white.action(env.board)
            env.step(action)

        ng_win = None
        if env.winner == Winner.white:
            if self.best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if self.best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, self.best_is_white
示例#11
0
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=0.2)
    return SelfPlayWorker(config, env=GameEnv()).start()
示例#12
0
class EvolverWorker:
    def __init__(self, config: Config):
        self.config = config
        self.model = None  # type: GameModel
        self.loaded_filenames = set()
        self.loaded_data = {}
        self.dataset = None
        self.optimizer = None
        self.dbx = None
        self.version = 0  # Change to dynamic lookup from Drop Box Files
        self.env = GameEnv()
        self.raw_timestamp = None
        self.best_is_white = True
        self.play_files_per_generation = 15  # each file includes 25 games so each generation adds 375 games
        self.max_play_files = 300

        # at final there are alawys 7500 games to look at from previous 20 generations
        self.min_play_files_to_learn = 0
        self.play_files_on_dropbox = 0

    def start(self):
        auth_token = 'UlBTypwXWYAAAAAAAAAAEP6hKysZi9cQKGZTmMu128TYEEig00w3b3mJ--b_6phN'
        self.dbx = dropbox.Dropbox(auth_token)

        self.version = len(
            self.dbx.files_list_folder('/model/HistoryVersion').entries)
        print('\nThe Strongest Version found is: ', self.version, '\n')

        while True:
            self.model = self.load_model()
            self.compile_model()

            self.play_files_on_dropbox = len(
                self.dbx.files_list_folder('/play_data').entries)
            self.min_play_files_to_learn = min(
                (self.version + 1) * self.play_files_per_generation,
                self.max_play_files)
            while self.play_files_on_dropbox < self.min_play_files_to_learn:
                print('\nPlay Files Found:', self.play_files_on_dropbox,
                      'of required', self.min_play_files_to_learn,
                      'files. Started Self-Playing...\n')
                self.self_play()
                self.play_files_on_dropbox = len(
                    self.dbx.files_list_folder('/play_data').entries)
            print('\nPlay Files Found:', self.play_files_on_dropbox,
                  'of required', self.min_play_files_to_learn,
                  'files. Training files sufficient for Learning!\n')
            self.load_play_data()
            self.raw_timestamp = self.dbx.files_get_metadata(
                '/model/model_best_weight.h5').client_modified

            RetrainSuccessful = False
            while (RetrainSuccessful == False):
                # Training
                self.training()
                # Evaluating
                self.best_model = self.load_best_model()
                RetrainSuccessful = self.evaluate()
                if (self.raw_timestamp != self.dbx.files_get_metadata(
                        '/model/model_best_weight.h5').client_modified):
                    # Other Evolvers in Distribution already got a successful competition - cease this current eval.
                    time.sleep(20)
                    #self.remove_all_play_data()
                    self.version = len(
                        self.dbx.files_list_folder(
                            '/model/HistoryVersion').entries)
                    print('\nThe Strongest Version found is: ', self.version,
                          '\n')

                    # Also remove the oldest 15 files from dropbox
                    localfiles = get_game_data_filenames(self.config.resource)
                    localfilenames = []
                    for a in range(len(localfiles)):
                        localfilenames.append(localfiles[a][-32:])
                    dbfiles = []
                    for entry in self.dbx.files_list_folder(
                            '/play_data').entries:
                        dbfiles.append(entry.name)
                    localfiles_to_remove = set(localfilenames) - set(dbfiles)
                    print('Removing', len(localfiles_to_remove),
                          'files from local drive')
                    for file in localfiles_to_remove:
                        print('Removing local play_data file', file)
                        path = os.path.join(self.config.resource.play_data_dir,
                                            file)
                        os.remove(path)
                    break
            self.dataset = None

    def self_play(self):
        self.buffer = []
        idx = 1

        for _ in range(self.config.play_data.nb_game_in_file):
            self.play_files_on_dropbox = len(
                self.dbx.files_list_folder('/play_data').entries)
            self.min_play_files_to_learn = min(
                (self.version + 1) * self.play_files_per_generation,
                self.max_play_files)
            if (self.play_files_on_dropbox >= self.min_play_files_to_learn):
                print(
                    'Training files sufficient for Learning, ending Self-Play...'
                )
                break
            start_time = time.time()
            env = self.self_play_game(idx)
            end_time = time.time()
            print("Game", idx, " Time=", (end_time - start_time),
                  " sec, Turn=", env.turn, env.observation, env.winner)
            idx += 1

    def load_model(self):
        for entry in self.dbx.files_list_folder('/model').entries:
            if (entry.name != 'HistoryVersion'
                    and entry.name != 'next_generation'):
                md, res = self.dbx.files_download('/model/' + entry.name)
                with open(
                        'SantoriniAZ-Dist/SmallSanto - Distributed/data/model/'
                        + entry.name, 'wb') as f:
                    #with open('./data/model/'+entry.name, 'wb') as f:
                    f.write(res.content)

        from agent.model import GameModel
        model = GameModel(self.config)
        rc = self.config.resource

        dirs = get_next_generation_model_dirs(rc)
        if not dirs:
            print("loading best model")
            if not load_best_model_weight(model):
                print("Best model can not loaded!")
        else:
            latest_dir = dirs[-1]
            print("loading latest model")
            config_path = os.path.join(
                latest_dir, rc.next_generation_model_config_filename)
            weight_path = os.path.join(
                latest_dir, rc.next_generation_model_weight_filename)
            model.load(config_path, weight_path)
        return model

    def compile_model(self):
        self.optimizer = SGD(lr=1e-2, momentum=0.9)
        losses = [objective_function_for_policy, objective_function_for_value]
        self.model.model.compile(optimizer=self.optimizer, loss=losses)

    def training(self):
        #self.compile_model()
        last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps
        #self.update_learning_rate(total_steps)
        steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint)
        total_steps += steps
        self.save_current_model()
        last_save_step = total_steps

    def load_play_data(self):
        for entry in self.dbx.files_list_folder('/play_data').entries:
            md, res = self.dbx.files_download('/play_data/' + entry.name)
            with open(
                    'SantoriniAZ-Dist/SmallSanto - Distributed/data/play_data/'
                    + entry.name, 'wb') as f:
                #with open('./data/play_data/'+entry.name, 'wb') as f:
                f.write(res.content)
        filenames = get_game_data_filenames(self.config.resource)

        updated = False
        for filename in filenames:
            #if filename in self.loaded_filenames:
            #    continue
            if filename not in self.loaded_filenames:
                self.load_data_from_file(filename)
            updated = True

        for filename in (self.loaded_filenames - set(filenames)):
            self.unload_data_of_file(filename)

        if updated:
            print("Updated Play Data.\n")
            self.dataset = self.collect_all_loaded_data()

    def load_data_from_file(self, filename):
        try:
            print("loading data from ", filename)
            data = read_game_data_from_file(filename)
            self.loaded_data[filename] = self.convert_to_training_data(data)
            self.loaded_filenames.add(filename)
        except Exception as e:
            print(str(e))

    def unload_data_of_file(self, filename):
        print("removing data about ", filename, " from training set")
        self.loaded_filenames.remove(filename)
        if filename in self.loaded_data:
            del self.loaded_data[filename]

    def collect_all_loaded_data(self):
        state_ary_list, policy_ary_list, z_ary_list = [], [], []
        for s_ary, p_ary, z_ary_ in self.loaded_data.values():
            state_ary_list.append(s_ary)
            policy_ary_list.append(p_ary)
            z_ary_list.append(z_ary_)

        state_ary = np.concatenate(state_ary_list)
        policy_ary = np.concatenate(policy_ary_list)
        z_ary = np.concatenate(z_ary_list)
        return state_ary, policy_ary, z_ary

    @staticmethod
    def convert_to_training_data(data):
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            board = list(state)
            board = np.reshape(board, (3, 3))
            env = GameEnv().update(board)

            white_ary, black_ary, block_ary, turn_ary = env.black_and_white_plane(
            )
            #state = [black_ary, white_ary, block_ary, turn_ary] if env.player_turn() == Player.black else [white_ary, black_ary, block_ary, turn_ary]
            state = [white_ary, black_ary, block_ary, turn_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)

    @property
    def dataset_size(self):
        if self.dataset is None:
            return 0
        return len(self.dataset[0])

    def train_epoch(self, epochs):
        tc = self.config.trainer
        state_ary, policy_ary, z_ary = self.dataset
        self.model.model.fit(state_ary, [policy_ary, z_ary],
                             batch_size=tc.batch_size,
                             epochs=epochs,
                             verbose=1)
        steps = (state_ary.shape[0] // tc.batch_size) * epochs
        return steps

    def save_current_model(self):
        rc = self.config.resource
        model_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        model_dir = os.path.join(
            rc.next_generation_model_dir,
            rc.next_generation_model_dirname_tmpl % model_id)
        os.makedirs(model_dir, exist_ok=True)
        config_path = os.path.join(model_dir,
                                   rc.next_generation_model_config_filename)
        weight_path = os.path.join(model_dir,
                                   rc.next_generation_model_weight_filename)
        self.model.save(config_path, weight_path)

    def load_best_model(self):
        model = GameModel(self.config)
        load_best_model_weight(model)
        return model

    def evaluate(self):
        ng_model, model_dir = self.load_next_generation_model()
        print("start evaluate model", model_dir)
        ng_is_great = self.evaluate_model(ng_model)
        if ng_is_great:
            print("New Model become best model:", model_dir)
            save_as_best_model(ng_model)
            self.best_model = ng_model

            # Save to Drop Box inside History Version folder & save as best model in /model folder
            self.version = self.version + 1
            with open(
                    'SantoriniAZ-Dist/SmallSanto - Distributed/data/model/model_best_weight.h5',
                    'rb') as f:
                #with open('./data/model/model_best_weight.h5', 'rb') as f:
                data = f.read()
            res = self.dbx.files_upload(data,
                                        '/model/HistoryVersion/Version' +
                                        "{0:0>4}".format(self.version) + '.h5',
                                        dropbox.files.WriteMode.add,
                                        mute=True)
            res = self.dbx.files_upload(data,
                                        '/model/model_best_weight.h5',
                                        dropbox.files.WriteMode.overwrite,
                                        mute=True)

            # Remove the oldest 15 files if files is already 300
            list = []
            for entry in self.dbx.files_list_folder('/play_data').entries:
                list.append(entry)
            if (len(list) == 300):
                for i in range(
                        14, -1, -1
                ):  #Remove the oldest 15 files in both DropBox and Local
                    print('Removing Dropbox play_data file', i, list[i].name)
                    self.dbx.files_delete('/play_data/' + list[i].name)

                    print('Removing local play_data file', list[i].name)
                    path = os.path.join(self.config.resource.play_data_dir,
                                        list[i].name)
                    os.remove(path)
        self.remove_model(model_dir)
        return ng_is_great

    def load_next_generation_model(self):
        rc = self.config.resource
        while True:
            dirs = get_next_generation_model_dirs(self.config.resource)
            if dirs:
                break
            print("There is no next generation model to evaluate")
            sleep(600)
        model_dir = dirs[
            -1] if self.config.eval.evaluate_latest_first else dirs[0]
        config_path = os.path.join(model_dir,
                                   rc.next_generation_model_config_filename)
        weight_path = os.path.join(model_dir,
                                   rc.next_generation_model_weight_filename)
        model = GameModel(self.config)
        model.load(config_path, weight_path)
        return model, model_dir

    def evaluate_model(self, ng_model):
        results = []
        winning_rate = 0
        for game_idx in range(1, self.config.eval.game_num + 1):
            ng_win, white_is_best = self.play_game(self.best_model, ng_model)
            if ng_win is not None:
                results.append(ng_win)
                winning_rate = sum(results) / len(results)
            if (ng_win == 1 and white_is_best):
                print('Game', game_idx, ': Wins with Black.  Winning rate ',
                      winning_rate)
            elif (ng_win == 1 and not white_is_best):
                print('Game', game_idx, ': Wins with White.  Winning rate ',
                      winning_rate)
            elif (ng_win == 0 and white_is_best):
                print('Game', game_idx, ': Loses with Black.  Winning rate ',
                      winning_rate)
            elif (ng_win == 0 and not white_is_best):
                print('Game', game_idx, ': Loses with White.  Winning rate ',
                      winning_rate)
            if results.count(0) >= self.config.eval.game_num * (
                    1 - self.config.eval.replace_rate):
                print("Lose count reach", results.count(0),
                      " so give up challenge\n")
                break
            if results.count(
                    1
            ) >= self.config.eval.game_num * self.config.eval.replace_rate:
                print("Win count reach", results.count(1),
                      " so change best model\n")
                break

        winning_rate = sum(results) / len(results)
        return winning_rate >= self.config.eval.replace_rate

    def play_game(self, best_model, ng_model):
        env = GameEnv().reset()

        if (self.raw_timestamp != self.dbx.files_get_metadata(
                '/model/model_best_weight.h5').client_modified):
            print('A newer model version is available - giving up this match')
            ng_win = 0
            self.best_is_white = True
            return ng_win, self.best_is_white

        best_player = GamePlayer(self.config,
                                 best_model,
                                 play_config=self.config.eval.play_config)
        ng_player = GamePlayer(self.config,
                               ng_model,
                               play_config=self.config.eval.play_config)
        self.best_is_white = not self.best_is_white
        if not self.best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board)
            else:
                action = white.action(env.board)
            env.step(action)

        ng_win = None
        if env.winner == Winner.white:
            if self.best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if self.best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, self.best_is_white

    def remove_model(self, model_dir):
        rc = self.config.resource
        config_path = os.path.join(model_dir,
                                   rc.next_generation_model_config_filename)
        weight_path = os.path.join(model_dir,
                                   rc.next_generation_model_weight_filename)
        os.remove(config_path)
        os.remove(weight_path)
        os.rmdir(model_dir)

    def self_play_game(self, idx):
        self.env.reset()
        self.black = GamePlayer(self.config, self.model)
        self.white = GamePlayer(self.config, self.model)
        while not self.env.done:
            if self.env.player_turn() == Player.black:
                action = self.black.action(self.env.board)
            else:
                action = self.white.action(self.env.board)
            self.env.step(action)

        self.finish_game()
        self.save_play_data(write=idx %
                            self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0
        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        print("save play data to ", path)
        write_game_data_to_file(path, self.buffer)

        # Saving File to Drop Box
        self.play_files_on_dropbox = len(
            self.dbx.files_list_folder('/play_data').entries)
        self.min_play_files_to_learn = min(
            (self.version + 1) * self.play_files_per_generation, 300)
        if self.play_files_on_dropbox < self.min_play_files_to_learn:
            with open(path, 'rb') as f:
                data = f.read()
            res = self.dbx.files_upload(data,
                                        '/play_data/' + filename,
                                        dropbox.files.WriteMode.add,
                                        mute=True)
        #print('uploaded as', res.name.encode('utf8'))

        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def remove_all_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        for i in range(len(files)):
            os.remove(files[i])