Пример #1
0
def get_model():
    model = JanggiNetwork(N_RESIDUAL_DEFAULT)

    def load_latest_model():
        model_saver_temp = ModelSaver()
        model_saver_temp.load_latest_model(model)

    load_latest_model()
    model.to(DEVICE)
    model.eval()
    return model
def get_model():
    model = JanggiNetwork()

    def load_latest_model():
        model_saver = ModelSaver()
        model_saver.load_latest_model(model)

    load_latest_model()
    model.to(DEVICE)
    model.eval()
    return model
Пример #3
0
class Trainer:
    def __init__(self,
                 predictor,
                 n_simulations=800,
                 iter_max=200,
                 n_simulation_opponent=800,
                 dir_base="model"):
        print("Setting trainer")
        self.predictor = predictor.to(DEVICE)
        self.n_simulations = n_simulations
        self.iter_max = iter_max
        self.n_simulations_opponent = n_simulation_opponent
        self.model_saver = ModelSaver(dir_base)
        self.optimizer = torch.optim.SGD(self.predictor.parameters(),
                                         lr=LEARNING_RATE,
                                         momentum=0.9,
                                         weight_decay=0.0001)
        if not TRAIN_NEW_MODEL:
            self.model_saver.load_latest_model(self.predictor, self.optimizer)
        self.old_model = JanggiNetwork(20)
        self.old_model.to(DEVICE)

    def run_episode(self):
        examples = []
        board = get_random_board()
        initial_node = MCTSNode(is_initial=True)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               current_node=initial_node,
                               janggi_net=self.predictor,
                               temperature_start=1,
                               temperature_threshold=30,
                               temperature_end=0.01)
        player_red = NNPlayer(Color.RED,
                              n_simulations=self.n_simulations,
                              current_node=initial_node,
                              janggi_net=self.predictor,
                              temperature_start=1,
                              temperature_threshold=30,
                              temperature_end=0.01)
        game = Game(player_blue, player_red, board)
        while not game.is_finished(self.iter_max):
            new_action = game.get_next_action()
            game.actions.append(new_action)
            if game.current_player == Color.BLUE:
                examples.append([
                    board.get_features(game.current_player, game.round),
                    player_blue.current_node.get_policy(game.current_player),
                    Color.BLUE
                ])
                examples.append([
                    board.get_features(game.current_player,
                                       game.round,
                                       data_augmentation=True),
                    player_blue.current_node.get_policy(
                        game.current_player, data_augmentation=True),
                    Color.BLUE
                ])
            else:
                examples.append([
                    board.get_features(game.current_player,
                                       game.round,
                                       data_augmentation=True),
                    player_red.current_node.get_policy(game.current_player,
                                                       data_augmentation=True),
                    Color.RED
                ])
                examples.append([
                    board.get_features(game.current_player, game.round),
                    player_red.current_node.get_policy(game.current_player),
                    Color.RED
                ])
            game.board.apply_action(new_action)
            game.switch_player()
            game.board.invalidate_action_cache(
                new_action)  # Try to reduce memory usage
            game.round += 1
        winner = game.get_winner()
        set_winner(examples, winner)
        return examples

    def learn_policy(self, n_iterations, n_episodes):
        for _ in range(n_iterations):
            if self.model_saver.has_last_episode():
                examples = self.model_saver.load_last_episode()
            else:
                examples = []
                for ep in range(n_episodes):
                    begin_time = time.time()
                    examples += self.run_episode()
                    print("Time Episode", ep, ": ", time.time() - begin_time)
                self.model_saver.save_episodes(examples)
            self.train_and_fight(examples)

    def learn_supervised(self, training_file):
        print("Generate training data...")
        with open(training_file) as f:
            examples_all = list(_raw_to_examples(f))
        print("Start training")
        self.train_and_fight(examples_all)

    def continuous_learning(self):
        self.model_saver.load_latest_model(self.old_model, None)
        self.old_model.to(DEVICE)
        while True:
            if self.model_saver.has_last_episode_raw():
                print("Start new learning")
                self.continuous_learning_once()
            else:
                print("Waiting for more episodes")
                time.sleep(WAINTING_TIME_IF_NO_EPISODE)

    def continuous_learning_once(self):
        # First, train
        for _ in range(EPOCH_NUMBER_CONTINUOUS):
            training_set = []
            for example in _raw_to_examples(
                    self.model_saver.all_episodes_raw_iterators(),
                    PROP_POPULATION_FOR_LEARNING):
                training_set.append(example)
                if len(training_set) > N_LAST_GAME_TO_CONSIDER:
                    if not TRAIN_ON_ALL:
                        break
                    self.train(training_set)
                    training_set = []
            self.train(training_set)
        # Then, fight!
        # old_model = copy.deepcopy(self.predictor)
        self.model_saver.load_latest_model(self.old_model, None)
        self.old_model.to(DEVICE)
        victories = 0
        print("Start the fights!")
        for i in range(N_FIGHTS):
            if i < N_FIGHTS / 2:
                print("I am BLUE")
                new_player = NNPlayer(Color.BLUE,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.predictor,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                old_player = NNPlayer(Color.RED,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.old_model,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                winner = fight(new_player, old_player, self.iter_max)
                if winner == Color.BLUE:
                    victories += 1
            else:
                print("I am RED")
                new_player = NNPlayer(Color.RED,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.predictor,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                old_player = NNPlayer(Color.BLUE,
                                      n_simulations=self.n_simulations,
                                      janggi_net=self.old_model,
                                      temperature_start=0.01,
                                      temperature_threshold=30,
                                      temperature_end=0.01)
                winner = fight(old_player, new_player, self.iter_max)
                if winner == Color.RED:
                    victories += 1
            if (victories + N_FIGHTS - i -
                    1) / N_FIGHTS * 100 < VICTORY_THRESHOLD:
                # There is no more hope...
                break
        victory_percentage = victories / N_FIGHTS * 100
        if victory_percentage > VICTORY_THRESHOLD:
            # Replace model
            print("The model was good enough", victory_percentage)
            self.model_saver.save_weights(self.predictor,
                                          optimizer=self.optimizer)
        else:
            # We do not save the model
            print("The model was not good enough", victory_percentage)
            # self.model_saver.load_latest_model(self.predictor, optimizer=self.optimizer)

    def train_and_fight(self, examples):
        self.train(examples)
        self.organize_fight()

        self.model_saver.save_weights(self.predictor, optimizer=self.optimizer)
        self.model_saver.rename_last_episode()

    def organize_fight(self):
        player_red = RandomPlayer(Color.RED)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               janggi_net=self.predictor,
                               temperature_start=0.01,
                               temperature_threshold=30,
                               temperature_end=0.01)
        fight(player_blue, player_red, self.iter_max)
        player_red = RandomMCTSPlayer(
            Color.RED,
            n_simulations=self.n_simulations_opponent,
            temperature_start=0.01,
            temperature_threshold=30,
            temperature_end=0.01)
        player_blue = NNPlayer(Color.BLUE,
                               n_simulations=self.n_simulations,
                               janggi_net=self.predictor,
                               temperature_start=0.01,
                               temperature_threshold=30,
                               temperature_end=0.01)
        fight(player_blue, player_red, self.iter_max)

    def train(self, examples):
        self.predictor.train()
        criterion = JanggiLoss()
        dataset = ExampleDataset(examples)
        if examples:
            dataloader = DataLoader(dataset,
                                    batch_size=BATCH_SIZE,
                                    shuffle=True,
                                    num_workers=0)
        else:
            dataloader = examples

        for epoch in range(EPOCH_NUMBER):
            running_loss = 0.0
            for i, example in enumerate(dataloader):
                board, actions, value = example
                self.optimizer.zero_grad()
                board = board.to(DEVICE)
                policy, value_predicted = self.predictor(board)
                value_predicted = value_predicted.view(-1, 1)
                policy = policy.to(DEVICE)
                value_predicted = value_predicted.to(DEVICE)
                actions = actions.to(DEVICE)
                value = value.view(-1, 1).to(DEVICE)
                loss = criterion((policy, value_predicted), (actions, value))
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
                if i % LOG_PRINT_FREQ == LOG_PRINT_FREQ - 1:
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / LOG_PRINT_FREQ))
                    running_loss = 0.0
        self.predictor.eval()