def main():
    """Train LSTM model."""
    # Import data
    os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
    mirrored_strategy = tf.distribute.MirroredStrategy(["/gpu:0", "/gpu:1"])
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
        X, Y = import_dataset("gender")
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.3,
                                                            shuffle=True,
                                                            random_state=RAND_STATE)

        X_train, y_train = utils.augment_dataset(X_train, y_train)

        data = {
            'X_train': X_train,
            'X_test': X_test,
            'y_train': y_train,
            'y_test': y_test,
        }

        # Define model parameters
        model_type = "ResNet50"
        input_shape = (None, X_train.shape[2], X_train.shape[3], X_train.shape[4])

        filename = f"{model_type}_complete_gender_vFinal2"
        batch = 128
        hyperparameters = {
            "epochs": 1000,
            "batch_size": batch,
            "loss": "categorical_crossentropy",
            "optimizer": keras.optimizers.Adam(),
            "metrics": ["accuracy"],
            "save_file": f"models/{filename}_b{batch}.h5"
        }

        #
        try:
            classes = y_train.shape[1]
        except:
            classes = 1

        # Choose and train model
        model = choose_model(input_shape, classes, model_type)
        train_model(model, data, hyperparameters)

    return None
Пример #2
0
    def load_dataset(self):
        train_df = import_file("train", path=self.dataset_path)
        dev_df = import_file("dev", path=self.dataset_path)
        test_df = import_file("test", path=self.dataset_path)
        train_df_aug = augment_dataset(
            train_df,
            lambda score: score < -1,
            lambda score: score < -0.3,
            lambda score: score > 0.55,
            lambda score: score > 1,
            lambda score: score > 1.3,
        )

        self.dataLoader_train = get_data_loader(train_df, batch_size=32)
        self.dataLoader_train_aug = get_data_loader(train_df_aug, batch_size=32)
        self.dataLoader_dev = get_data_loader(dev_df, batch_size=32)
        self.dataLoader_test = get_data_loader(test_df, batch_size=32, test=True)

        # Masked versions of the dataset (load both into memory to avoid recalculation)
        self.dataLoader_train_masked = get_data_loader_masked(train_df, batch_size=32)
        self.dataLoader_train_aug_masked = get_data_loader_masked(
            train_df_aug, batch_size=32
        )
        self.dataLoader_dev_masked = get_data_loader_masked(dev_df, batch_size=32)
Пример #3
0
def self_play(n_selfplay):
    global cur_memory, rep_memory
    global Agent

    state_black = deque()
    state_white = deque()
    pi_black = deque()
    pi_white = deque()

    if RESIGN_MODE:
        resign_val_balck = []
        resign_val_white = []
        resign_val = []
        resign_v = -1.0
        n_resign_thres = N_SELFPLAY // 4

    for episode in range(n_selfplay):
        if (episode + 1) % 10 == 0:
            logging.warning('Playing Episode {:3}'.format(episode + 1))

        env = game.GameState('text')
        board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float')
        turn = 0
        root_id = (0, )
        win_index = 0
        time_steps = 0
        action_index = None

        if RESIGN_MODE:
            resign_index = 0

        while win_index == 0:
            if PRINT_SELFPLAY:
                utils.render_str(board, BOARD_SIZE, action_index)

            # ====================== start MCTS ============================ #

            if time_steps < TAU_THRES:
                tau = 1
            else:
                tau = 0

            pi = Agent.get_pi(root_id, tau)

            # ===================== collect samples ======================== #

            state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES)

            if turn == 0:
                state_black.appendleft(state)
                pi_black.appendleft(pi)
            else:
                state_white.appendleft(state)
                pi_white.appendleft(pi)

            # ======================== get action ========================== #

            action, action_index = utils.get_action(pi)
            root_id += (action_index, )

            # ====================== print evaluation ====================== #

            if PRINT_SELFPLAY:
                Agent.model.eval()
                with torch.no_grad():
                    state_input = torch.tensor([state]).to(device).float()
                    p, v = Agent.model(state_input)
                    p = p.cpu().numpy()[0]
                    v = v.item()

                    print('\nPi:\n{}'.format(
                        pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))
                    print('\nPolicy:\n{}'.format(
                        p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))

                if turn == 0:
                    print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100))
                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            resign_val_balck.append(v)
                        elif v < resign_v:
                            resign_index = 2
                            if PRINT_SELFPLAY:
                                print('"Black Resign!"')
                else:
                    print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100))
                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            resign_val_white.append(v)
                        elif v < resign_v:
                            resign_index = 1
                            if PRINT_SELFPLAY:
                                print('"White Resign!"')

            # =========================== step ============================= #

            board, _, win_index, turn, _ = env.step(action)
            time_steps += 1

            # ========================== result ============================ #

            if RESIGN_MODE:
                if resign_index != 0:
                    win_index = resign_index
                    result['Resign'] += 1

            if win_index != 0:
                if win_index == 1:
                    reward_black = 1.
                    reward_white = -1.
                    result['Black'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_balck:
                                resign_val.append(val)
                            resign_val_balck.clear()
                            resign_val_white.clear()

                elif win_index == 2:
                    reward_black = -1.
                    reward_white = 1.
                    result['White'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_white:
                                resign_val.append(val)
                            resign_val_white.clear()
                            resign_val_balck.clear()
                else:
                    reward_black = 0.
                    reward_white = 0.
                    result['Draw'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_balck:
                                resign_val.append(val)
                            for val in resign_val_white:
                                resign_val.append(val)
                            resign_val_balck.clear()
                            resign_val_white.clear()

                if RESIGN_MODE:
                    if episode + 1 == n_resign_thres:
                        resign_v = min(resign_val)
                        resign_val.clear()

                    if PRINT_SELFPLAY:
                        print('Resign win%: {:.2f}%'.format(
                            (resign_v + 1) / 2 * 100))

            # ====================== store in memory ======================= #

                while state_black or state_white:
                    if state_black:
                        cur_memory.append(
                            (state_black.pop(), pi_black.pop(), reward_black))
                    if state_white:
                        cur_memory.append(
                            (state_white.pop(), pi_white.pop(), reward_white))

            # =========================  result  =========================== #

                if PRINT_SELFPLAY:
                    utils.render_str(board, BOARD_SIZE, action_index)

                    bw, ww, dr, rs = result['Black'], result['White'], \
                        result['Draw'], result['Resign']
                    print('')
                    print('=' * 20, " {:3} Game End   ".format(episode + 1),
                          '=' * 20)
                    print('Black Win: {:3}   '
                          'White Win: {:3}   '
                          'Draw: {:2}   '
                          'Win%: {:.2f}%'
                          '\nResign: {:2}'.format(bw, ww, dr, (bw + 0.5 * dr) /
                                                  (bw + ww + dr) * 100, rs))
                    print('current memory size:', len(cur_memory))

                Agent.reset()

    rep_memory.extend(utils.augment_dataset(cur_memory, BOARD_SIZE))
Пример #4
0
def self_play(agent, cur_memory, rank=0):
    agent.model.eval()
    state_black = deque()
    state_white = deque()
    pi_black = deque()
    pi_white = deque()
    episode = 0
    while True:
        if (episode + 1) % 10 == 0:
            logging.info('Playing Episode {:3}'.format(episode + 1))

        env = game.GameState('text')
        board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float')
        turn = 0
        root_id = (0, )
        win_index = 0
        time_steps = 0
        action_index = None

        while win_index == 0:
            if PRINT_SELFPLAY and rank == 0:
                utils.render_str(board, BOARD_SIZE, action_index)

            # ====================== start MCTS ============================ #

            if time_steps < TAU_THRES:
                tau = 1
            else:
                tau = 0

            pi = agent.get_pi(root_id, tau, rank)

            # ===================== collect samples ======================== #

            state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES)

            if turn == 0:
                state_black.appendleft(state)
                pi_black.appendleft(pi)
            else:
                state_white.appendleft(state)
                pi_white.appendleft(pi)

            # ======================== get action ========================== #

            action, action_index = utils.get_action(pi)
            root_id += (action_index, )

            # ====================== print evaluation ====================== #

            if PRINT_SELFPLAY and rank == 0:
                with torch.no_grad():
                    state_input = torch.tensor([state]).to(device).float()
                    p, v = agent.model(state_input)
                    p = p.cpu().numpy()[0]
                    v = v.item()

                    print('\nPi:\n{}'.format(
                        pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))
                    print('\nPolicy:\n{}'.format(
                        p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))

                if turn == 0:
                    print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100))
                else:
                    print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100))

            # =========================== step ============================= #

            board, _, win_index, turn, _ = env.step(action)
            time_steps += 1

            # ========================== result ============================ #

            if win_index != 0:
                if win_index == 1:
                    reward_black = 1.
                    reward_white = -1.
                    result['Black'] += 1

                elif win_index == 2:
                    reward_black = -1.
                    reward_white = 1.
                    result['White'] += 1

                else:
                    reward_black = 0.
                    reward_white = 0.
                    result['Draw'] += 1

            # ====================== store in memory ======================= #

                while state_black or state_white:
                    if state_black:
                        cur_memory.append(
                            (state_black.pop(), pi_black.pop(), reward_black))
                    if state_white:
                        cur_memory.append(
                            (state_white.pop(), pi_white.pop(), reward_white))

            # =========================  result  =========================== #

                if PRINT_SELFPLAY and rank == 0:
                    utils.render_str(board, BOARD_SIZE, action_index)

                    bw, ww, dr = result['Black'], result['White'], \
                        result['Draw']
                    print('')
                    print('=' * 20, " {:3} Game End   ".format(episode + 1),
                          '=' * 20)
                    print('Black Win: {:3}   '
                          'White Win: {:3}   '
                          'Draw: {:2}   '
                          'Win%: {:.2f}%'.format(bw, ww, dr, (bw + 0.5 * dr) /
                                                 (bw + ww + dr) * 100))
                    print('current memory size:', len(cur_memory))
                episode += 1
                agent.reset()
                if len(cur_memory) >= MEMORY_SIZE:
                    return utils.augment_dataset(cur_memory, BOARD_SIZE)