def __init__(self, args, init_train_info={}, sub_dir=None): self.args = args misc.ensure_dir(args.logdir) sub_dir = args.continue_from or sub_dir or misc.datetimestr() self.logdir = os.path.join(args.logdir, sub_dir) misc.ensure_dir(self.logdir) self._setup_log_file() self._create_train_info(args, init_train_info)
def exploration(board, models, gpu_id, tau_func=default_tau_func, policy_noise_ratio=0, resign=None, logger=None): import misc, json, sys from ccboard import action_index history = [] cur_node = Node(board) winner = None # from tqdm import trange game_name = misc.datetimestr() for step in range(args().max_game_steps): # misc.progress_bar(step, args().max_game_steps, game_name) policy, actions = _mcts_policy(cur_node, models[step % 2], gpu_id, tau_func(step)) pos, action = _next_action(cur_node.s, policy, actions, policy_noise_ratio) # history.append( Experience(cur_node.s, policy, cur_node.v) ) # $v$ is here for resignation check later history.append( Experience(cur_node.s, action_index(pos, action), cur_node.v)) if resign is not None and v < resign: winner = (step + 1) % 2 # current player losses # sys.stderr.write('\n') break if logger is not None: logger.log_game_action(game_name, pos, board.action_list()[action]) cur_node = cur_node.next_edges[(pos, action)].next_node cur_node.prev_edge = None if cur_node.s.is_terminated(): # history.append( Experience(cur_node.s, null, cur_node.v) ) # the current step loss if the next step wins next_step_wins = cur_node.s.is_winner() winner = (step + 1 if next_step_wins else step) % 2 # sys.stderr.write('\n') break if logger is not None: logger.end_log_game_action(game_name) # fill scores min_winner_score = 1 for step in range(len(history)): if winner is None: history[step].v = 0 elif step % 2 == winner: if min_winner_score > history[step].v: min_winner_score = history[step].v history[step].v = 1 else: history[step].v = -1 return history, winner, min_winner_score
def try_reload_model(model_path, best_model, gpu_id, proc_id, iters): import os from log import Logger cur_model_path = Logger.lastest_model_path(args().logdir) if best_model is None or model_path != cur_model_path: model_path = cur_model_path best_model = Logger._load_model(model_path, gpu_id) print('Exploration GPU%d-%d it:%d %s model updated' % (gpu_id, proc_id, iters, misc.datetimestr())) return model_path, best_model
def compare_models(iters, eval_model, best_model, gpu_id, logger): from mcts import exploration wins = 0 has_cuda = torch.cuda.is_available() iters2 = 0 for i in range(args().evaluation_games): _, winner, _ = exploration(ccboard.ChessBoard(), [best_model, eval_model], gpu_id, policy_noise_ratio=args().policy_noise_ratio) if winner is not None: wins += winner iters2 += 1 logger.info("Evaluation %d:%d %s New model %s" % (iters, iters2, misc.datetimestr(), 'wins' if winner == 1 else 'lose')) return wins / float(args().evaluation_games)
def save_model(self, model, model_path=None): if isinstance(model, torch.nn.DataParallel): model = model.module if model_path is None: if 'model_path' in self.train_info and os.path.exists( os.join(self.logdir, self.train_info['model_path'])): print("Removing old model {}".format( self.train_info['model_path'])) os.remove(os.join(self.logdir, self.train_info['model_path'])) model_path = misc.datetimestr() + '.model.pth' self.train_info['model_path'] = model_path print("Saving model to {}".format(model_path)) model.save(os.path.join(self.logdir, model_path))
def exploration_process_func(gpu_id, proc_id, queue): import numpy numpy.random.seed(gpu_id * 101 + proc_id) from mcts import exploration model_path = None best_model = None has_cuda = torch.cuda.is_available() iters = 0 while True: iters += 1 model_path, best_model = try_reload_model(model_path, best_model, gpu_id, proc_id, iters) history, _, _ = exploration(ccboard.ChessBoard(), [best_model, best_model], gpu_id, policy_noise_ratio=args().policy_noise_ratio) print('Exploration GPU%d-%d it:%d %s history_size:%d' % (gpu_id, proc_id, iters, misc.datetimestr(), len(history))) queue.put(history)
def evaluation(iters, best_model, model, model_lock, logger): has_cuda = torch.cuda.is_available() gpu_id = args().train_gpu model_lock.acquire() eval_model = logger.clone_model(model, gpu_id) model_lock.release() wins = compare_models(iters, eval_model, best_model, gpu_id, logger) logger.info(">>> Evaluation %d %s New model wins %.0f%%" % (iters, misc.datetimestr(), wins * 100)) if wins < 0.55: return best_model global epoch best_model = eval_model logger.train_info['epoch'] = epoch logger.save_train_info() logger.save_model(best_model) return best_model
def train(replay_buffer, model, queue): parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=1e-4, momentum=0.9, nesterov=True) max_norm = 400 loss_history = [] from tqdm import tqdm import matplotlib.pyplot as plt eval_iters = 1 best_model = model torch.save(best_model, 'best_model.pth') for epoch in tqdm(range(500), desc="Training epoch"): if (epoch + 1) % 100 == 0: optim_state = optimizer.state_dict() optim_state['param_groups'][0][ 'lr'] = optim_state['param_groups'][0]['lr'] / 1.01 optimizer.load_state_dict(optim_state) _history, _, _ = explore(ChessBoard(), [best_model, best_model], max_steps, 0.25) replay_buffer.extend(_history) # while not queue.empty(): # replay_buffer.extend(queue.get()) while len(replay_buffer) > 100000: replay_buffer.pop() loss = train_epoch(replay_buffer, optimizer, model, max_norm) import misc print('\nTrain epoch: %d, Buffer:%d, Time: %s, Loss: %.5f' % (epoch, len(replay_buffer), misc.datetimestr(), loss)) loss_history.append(loss) plt.clf() plt.plot(loss_history) plt.savefig("loss.png") if (epoch + 1) % evaluation_interval == 0: eval_iters += 1 best_model = evaluation(eval_iters, best_model, model) with open('loss.his', 'wb') as f: import pickle pickle.dump(loss, f)
def save_model(self, model, model_path=None): to_remove = None if model_path is None: # remove old if 'model_path' in self.train_info: model_path = os.path.join(self.logdir, self.train_info['model_path']) if os.path.exists(model_path): to_remove = model_path print("Removing old model {}".format( self.train_info['model_path'])) # new path name model_path = misc.datetimestr() + '.model.pth' self.train_info['model_path'] = model_path print("Saving model to {}".format(model_path)) package = { 'config': self._model_config(), 'state_dict': model.state_dict(), } torch.save(package, os.path.join(self.logdir, model_path)) if to_remove is not None: os.remove(to_remove)
def train(replay_buffer, queue, model, model_lock, logger): global epoch parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=args().lr, momentum=args().momentum, nesterov=True) has_cuda = torch.cuda.is_available() gpu_id = args().train_gpu max_norm = args().max_norm for ep in range(args().epochs): epoch = ep if (epoch + 1) % args().anneal_interval == 0: anneal_lr(optimizer, logger) if epoch < logger.train_info['epoch']: continue while not queue.empty(): replay_buffer.extend(queue.get()) while len(replay_buffer) > args().replay_buffer_size: replay_buffer.pop() loss = train_epoch(replay_buffer, has_cuda, gpu_id, optimizer, model, model_lock, max_norm) logger.info('Train epoch: %d, Buffer:%d, Time: %s, Loss: %.5f' % (epoch, len(replay_buffer), misc.datetimestr(), loss)) logger.train_info['loss'].append(loss) if args().plot: logger.plot_progress()