def get_model(): model = JanggiNetwork(N_RESIDUAL_DEFAULT) def load_latest_model(): model_saver_temp = ModelSaver() model_saver_temp.load_latest_model(model) load_latest_model() model.to(DEVICE) model.eval() return model
def get_model(): model = JanggiNetwork() def load_latest_model(): model_saver = ModelSaver() model_saver.load_latest_model(model) load_latest_model() model.to(DEVICE) model.eval() return model
class Trainer: def __init__(self, predictor, n_simulations=800, iter_max=200, n_simulation_opponent=800, dir_base="model"): print("Setting trainer") self.predictor = predictor.to(DEVICE) self.n_simulations = n_simulations self.iter_max = iter_max self.n_simulations_opponent = n_simulation_opponent self.model_saver = ModelSaver(dir_base) self.optimizer = torch.optim.SGD(self.predictor.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=0.0001) if not TRAIN_NEW_MODEL: self.model_saver.load_latest_model(self.predictor, self.optimizer) self.old_model = JanggiNetwork(20) self.old_model.to(DEVICE) def run_episode(self): examples = [] board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = Game(player_blue, player_red, board) while not game.is_finished(self.iter_max): new_action = game.get_next_action() game.actions.append(new_action) if game.current_player == Color.BLUE: examples.append([ board.get_features(game.current_player, game.round), player_blue.current_node.get_policy(game.current_player), Color.BLUE ]) examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_blue.current_node.get_policy( game.current_player, data_augmentation=True), Color.BLUE ]) else: examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_red.current_node.get_policy(game.current_player, data_augmentation=True), Color.RED ]) examples.append([ board.get_features(game.current_player, game.round), player_red.current_node.get_policy(game.current_player), Color.RED ]) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 winner = game.get_winner() set_winner(examples, winner) return examples def learn_policy(self, n_iterations, n_episodes): for _ in range(n_iterations): if self.model_saver.has_last_episode(): examples = self.model_saver.load_last_episode() else: examples = [] for ep in range(n_episodes): begin_time = time.time() examples += self.run_episode() print("Time Episode", ep, ": ", time.time() - begin_time) self.model_saver.save_episodes(examples) self.train_and_fight(examples) def learn_supervised(self, training_file): print("Generate training data...") with open(training_file) as f: examples_all = list(_raw_to_examples(f)) print("Start training") self.train_and_fight(examples_all) def continuous_learning(self): self.model_saver.load_latest_model(self.old_model, None) self.old_model.to(DEVICE) while True: if self.model_saver.has_last_episode_raw(): print("Start new learning") self.continuous_learning_once() else: print("Waiting for more episodes") time.sleep(WAINTING_TIME_IF_NO_EPISODE) def continuous_learning_once(self): # First, train for _ in range(EPOCH_NUMBER_CONTINUOUS): training_set = [] for example in _raw_to_examples( self.model_saver.all_episodes_raw_iterators(), PROP_POPULATION_FOR_LEARNING): training_set.append(example) if len(training_set) > N_LAST_GAME_TO_CONSIDER: if not TRAIN_ON_ALL: break self.train(training_set) training_set = [] self.train(training_set) # Then, fight! # old_model = copy.deepcopy(self.predictor) self.model_saver.load_latest_model(self.old_model, None) self.old_model.to(DEVICE) victories = 0 print("Start the fights!") for i in range(N_FIGHTS): if i < N_FIGHTS / 2: print("I am BLUE") new_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(new_player, old_player, self.iter_max) if winner == Color.BLUE: victories += 1 else: print("I am RED") new_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(old_player, new_player, self.iter_max) if winner == Color.RED: victories += 1 if (victories + N_FIGHTS - i - 1) / N_FIGHTS * 100 < VICTORY_THRESHOLD: # There is no more hope... break victory_percentage = victories / N_FIGHTS * 100 if victory_percentage > VICTORY_THRESHOLD: # Replace model print("The model was good enough", victory_percentage) self.model_saver.save_weights(self.predictor, optimizer=self.optimizer) else: # We do not save the model print("The model was not good enough", victory_percentage) # self.model_saver.load_latest_model(self.predictor, optimizer=self.optimizer) def train_and_fight(self, examples): self.train(examples) self.organize_fight() self.model_saver.save_weights(self.predictor, optimizer=self.optimizer) self.model_saver.rename_last_episode() def organize_fight(self): player_red = RandomPlayer(Color.RED) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max) player_red = RandomMCTSPlayer( Color.RED, n_simulations=self.n_simulations_opponent, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max) def train(self, examples): self.predictor.train() criterion = JanggiLoss() dataset = ExampleDataset(examples) if examples: dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) else: dataloader = examples for epoch in range(EPOCH_NUMBER): running_loss = 0.0 for i, example in enumerate(dataloader): board, actions, value = example self.optimizer.zero_grad() board = board.to(DEVICE) policy, value_predicted = self.predictor(board) value_predicted = value_predicted.view(-1, 1) policy = policy.to(DEVICE) value_predicted = value_predicted.to(DEVICE) actions = actions.to(DEVICE) value = value.view(-1, 1).to(DEVICE) loss = criterion((policy, value_predicted), (actions, value)) loss.backward() self.optimizer.step() running_loss += loss.item() if i % LOG_PRINT_FREQ == LOG_PRINT_FREQ - 1: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / LOG_PRINT_FREQ)) running_loss = 0.0 self.predictor.eval()