def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 elif gameResult==-1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() if(self.displaybar): bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 elif gameResult==1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() if(self.displaybar): bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 elif gameResult==-1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 elif gameResult==1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def parallel_self_play(self): pool = multiprocessing.Pool(processes=self.args.numSelfPlayProcess) temp = [] res = [] result = [] bar = Bar('Self Play(each process)', max=self.args.numPerProcessSelfPlay) for i in range(self.args.numSelfPlayProcess): res.append( pool.apply_async(AsyncSelfPlay, args=( self.game, self.args, i, bar, ))) pool.close() pool.join() for i in res: result.append(i.get()) for i in result: for j in i: for trainData in j: temp += trainData return temp
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def gen_samples(self, iteration, proc_num): print('------ITER ' + str(iteration) + '------') iterationTrainExamples = deque([], maxlen=self.args["maxlenOfQueue"]) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args["numEps"] // self.args["genFilesPerIteration"]) end = time.time() for eps in range(self.args["numEps"] // self.args["genFilesPerIteration"]): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args["numEps"] // self.args["genFilesPerIteration"], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() self.saveTrainExamples(iteration - 1, proc_num, iterationTrainExamples)
def executeEpisodes(self, game, nnet, args, iteration): """ Executes a number of episodes specified in args """ self.game = game self.nnet = nnet self.args = args self.folder = self.args.folder iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): #print("episode:", eps+1, " of ", self.args.numEps) self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts.debug.folder = os.path.join("Debug", str(iteration)+"-"+str(eps)) iterationTrainExamples += self.executeEpisode(eps) # print MCTS stats after we end up with MCTS instance self.mcts.print_stats() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return iterationTrainExamples
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) scores = [] for _ in range(num): gameResult = self.playGame(verbose=verbose) scores.append(gameResult) # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return scores
def playGames(self, num): """ plays a number of games :param num: number of games, has to be divisible by 6 for fair games :return: the summed scores of each agent """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) max_scores = num * 4 num = int(num / 6) # oneWon = 0 # twoWon = 0 # draws = 0 scores = [0, 0] for lonely_player in [1, 2]: for lonely_turn in range(3): for _ in range(num): if scores[ 0] < self.args.updateThreshold * max_scores and scores[ 1] < self.args.updateThreshold * max_scores: self.game.reset_logic() print("New Game") print("Lonely Player: " + str(lonely_player)) print("Lonely Turn: " + str(lonely_turn + 1)) gameResult = self.playGame(lonely_player, lonely_turn) print("RESULTS") print(gameResult) for t in range(3): # if bool(p == lonely_player) != bool(t != lonely_turn): if t == lonely_turn: scores[lonely_player - 1] += gameResult[t] else: scores[2 - lonely_player] += gameResult[t] print("CUMMULATED RESULTS:") print(scores) # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return scores
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ trainExamples = deque([], maxlen=self.args.maxlenOfQueue) for i in range(self.args.numIters): # bookkeeping print('------ITER ' + str(i+1) + '------') eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree trainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pnet = self.nnet.__class__(self.game) pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins) + ' ; DRAWS : ' + str(draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet = pnet else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def self_play(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ # TODO: parallelize this iterations for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) self.aws_s3_sync()
def parallel_self_test_play(self, iter_num): print("Start test play") bar = Bar('Test Play', max=self.args.numAgainstPlayProcess) result = [] if self.args.multiCPU: pool = multiprocessing.Pool( processes=self.args.numAgainstPlayProcess) res = [] for i in range(self.args.numAgainstPlayProcess): res.append( pool.apply_async(AsyncAgainst, args=(self.game, self.args, i, bar))) pool.close() pool.join() for i in res: result.append(i.get()) else: result.append(AsyncAgainst(self.game, self.args, 0, bar)) pwins = 0 nwins = 0 draws = 0.0 for i in result: pwins += i[0] nwins += i[1] draws += i[2] draws /= len(result) print("pwin: " + str(pwins)) print("nwin: " + str(nwins)) print("draw: " + str(draws)) if self.args.multiCPU: pool = multiprocessing.Pool(processes=1) pool.apply_async(CheckResultAndSaveNetwork, args=( pwins, nwins, draws, self.game, self.args, iter_num, )) pool.close() pool.join() else: CheckResultAndSaveNetwork(pwins, nwins, draws, self.game, self.args, iter_num)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i>1: eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() reward_list = [] count_list = [] step_list = [] for eps in range(self.args.numEps): examples, step_count = self.executeEpisode() self.nnet.train(examples) step_list.append(step_count) reward_list.append(examples[-1][2]) count_list.append(eps) # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() plt.scatter(count_list, reward_list, label = 'rewards_training') plt.savefig("fig/rewards_"+str(i)+".png") plt.close() plt.scatter(count_list, step_list, label = 'steps_training') plt.savefig("fig/steps_"+str(i)+".png") plt.close()
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ losses = [[], []] for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) losses[0].append(pi_loss) losses[1].append(v_loss) batch_idx += 1 # measure elapsed time batch_time.update(time.time() - end) bar.next() bar.finish() return losses
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True} # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def ParallelPlay(g): bar = Bar('Play', max=args.numPlayGames) pool = multiprocessing.Pool(processes=args.numPlayPool) res = [] result = [] for i in range(args.numPlayGames): res.append(pool.apply_async(Async_Play,args=(g,args,i,bar))) pool.close() pool.join() oneWon = 0 twoWon = 0 draws = 0 for i in res: result.append(i.get()) for i in result: oneWon += i[0] twoWon += i[1] draws += i[2] print("Model 1 Win:",oneWon," Model 2 Win:",twoWon," Draw:",draws)
def learn_self_play_iter(self): iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.selfplaynum) end = time.time() for eps in range(self.selfplaynum): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.selfplaynum, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() self.trainExampleSelfPlay.extend(iterationTrainExamples) shuffle(self.trainExampleSelfPlay) print('Got %d replays through self-play.'%(len(self.trainExampleSelfPlay))) self.nnet.train(self.trainExampleSelfPlay, transform=True)
def train(self, examples, transform=False, models=[0]): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) batch_time = AverageMeter() losses = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) sample_examples = [examples[i] for i in sample_ids] for m in models: loss = self.train_functions[m](sample_examples, transform) if m == 0: pi_losses.update(loss[0], len(sample_examples)) v_losses.update(loss[1], len(sample_examples)) else: losses.update(loss, len(sample_examples)) batch_time.update(time.time() - end) end = time.time() batch_idx += 1 bar.suffix = '({batch}/{size}) Total: {total:} | Loss: {loss:.3f} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), total=bar.elapsed_td, loss=losses.avg, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def playGames(self, num, verbose=False): """ Plays num games. Returns: solved: number of solved puzzles timed_out: number of timed_out puzzles """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) solved = 0 timed_out = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == 1: solved += 1 else: timed_out += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return solved, timed_out
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 elif gameResult==-1: twoWon+=1 else: draws+=1 #計算elo self.elo(gameResult) # bookkeeping + plot progress eps += 1 #pgn log self.saveToPGN(gameResult,eps) eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon) bar.next() print('half') print(oneWon, twoWon, draws) print('elo: player1: ',self.R1,'player2',self.R2) self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 elif gameResult==1: twoWon+=1 else: draws+=1 #計算elo self.elo(gameResult * -1) # bookkeeping + plot progress eps += 1 #pgn log self.saveToPGN(gameResult * -1,eps) eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon) bar.next() bar.finish() print('elo: player1: ',self.R1,'player2',self.R2) file_name = self.player1_name + '_vs_' + self.player2_name + '.pgn' self.saveToScript(file_name) return oneWon, twoWon, draws
def train(self, examples): """ form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(self.args.epochs): print('EPOCH ::: ' + str(epoch + 1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / self.args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples) / self.args.batch_size): sample_ids = np.random.randint(len(examples), size=self.args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if self.args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda( ), target_pis.contiguous().cuda(), target_vs.contiguous( ).cuda() boards, target_pis, target_vs = Variable(boards), Variable( target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / self.args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == 1: oneWon += 1 elif gameResult == -1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 black_start = (oneWon, twoWon, draws) for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == -1: oneWon += 1 elif gameResult == 1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() white_start = (oneWon - black_start[0], twoWon - black_start[1], draws - black_start[2]) print('') print( 'Neural network as Black - Wins of (NN Won,Opponent Won,Draw) :' + str(black_start)) print( 'Neural network as White - Wins of (NN Won,Opponent Won,Draw) :' + str(white_start)) bar.finish() return oneWon, twoWon, draws
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) trainStats = [0, 0, 0] for _, _, res in iterationTrainExamples: trainStats[res] += 1 print trainStats if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ begining = 1 if self.args.load_model == True: self.loadTrainExamples() file = open(self.args.trainExampleCheckpoint + "loopinformation", "r+") lines = file.readlines() begining = lines[0] file.close() for i in range(int(begining), self.args.numIters + 1): fileLoopInformation = open( self.args.trainExampleCheckpoint + "loopinformation", "w+") fileLoopInformation.write(str(i)) fileLoopInformation.close() # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) filename = "AlphaZerocurent" + str(i) + "temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \ ":dim" + str(self.game.n) + ".pth.tar" self.nnet.train(trainExamples) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) self.mcts.clear() del self.mcts self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True) # reset search tree
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def main(): # global args args = parser.parse_args() # <editor-fold desc="Initialization"> if args.comment == "test": print("WARNING: name is test!!!\n\n") # now = datetime.datetime.now() # current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge", "NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0 mask = int(args.common_emb_ratio * args.hidden_size) cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False if args.load_model == "NONE": keep_loading = False # model_path = args.model_path + current_date + "/" model_path = args.model_path + args.comment + "/" else: keep_loading = True model_path = args.model_path + args.load_model + "/" result_path = args.result_path if result_path == "NONE": result_path = model_path + "results/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.word_embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # Freeze weighs if args.fixed_embeddings == "true": glove_emb.weight.requires_grad = False # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # </editor-fold> # <editor-fold desc="Network Initialization"> print("Setting up the Networks...") coupled_vae = CoupledVAE(glove_emb, len(vocab), hidden_size=args.hidden_size, latent_size=args.latent_size, batch_size=args.batch_size) if cuda: coupled_vae = coupled_vae.cuda() # </editor-fold> # </editor-fold> # <editor-fold desc="Optimizers"> print("Setting up the Optimizers...") vae_optim = optim.Adam(coupled_vae.parameters(), lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) # </editor-fold desc="Optimizers"> train_swapped = False # Reverse 2 step = 0 with open(os.path.join(result_path, "losses.csv"), "w") as text_file: text_file.write("Epoch, Img, Txt, CM\n") for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() txt_losses = AverageMeter() img_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) if keep_loading: suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl" try: coupled_vae.load_state_dict( torch.load( os.path.join(args.model_path, 'coupled_vae' + suffix))) except FileNotFoundError: print("Didn't find any models switching to training") keep_loading = False if not keep_loading: # Set training mode coupled_vae.train() # </editor-fold desc = "Epoch Initialization"? train_swapped = not train_swapped for i, (images, captions, lengths) in enumerate(data_loader): if i == len(data_loader) - 1: break images = to_var(images) captions = to_var(captions) lengths = to_var( torch.LongTensor(lengths)) # print(captions.size()) # Forward, Backward and Optimize vae_optim.zero_grad() img_out, img_mu, img_logv, img_z, txt_out, txt_mu, txt_logv, txt_z = \ coupled_vae(images, captions, lengths, train_swapped) img_rc_loss = img_vae_loss( img_out, images, img_mu, img_logv) / (args.batch_size * args.crop_size**2) NLL_loss, KL_loss, KL_weight = seq_vae_loss( txt_out, captions, lengths, txt_mu, txt_logv, "logistic", step, 0.0025, 2500) txt_rc_loss = (NLL_loss + KL_weight * KL_loss) / torch.sum(lengths).float() txt_losses.update(txt_rc_loss.data[0], args.batch_size) img_losses.update(img_rc_loss.data[0], args.batch_size) loss = img_rc_loss + txt_rc_loss loss.backward() vae_optim.step() step += 1 if i % args.image_save_interval == 0: subdir_path = os.path.join( result_path, str(i / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(3): # im_or = (images[im_idx].cpu().data.numpy().transpose(1,2,0))*255 # im = (img_out[im_idx].cpu().data.numpy().transpose(1,2,0))*255 im_or = (images[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 im = (img_out[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 # im = img_out[im_idx].cpu().data.numpy().transpose(1,2,0)*255 filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave(filename_prefix + '.A.jpg', im) txt_or = " ".join([ vocab.idx2word[c] for c in captions[im_idx].cpu().data.numpy() ]) _, generated = torch.topk(txt_out[im_idx], 1) txt = " ".join([ vocab.idx2word[c] for c in generated[:, 0].cpu().data.numpy() ]) with open(filename_prefix + "_captions.txt", "w") as text_file: text_file.write("Epoch %d\n" % epoch) text_file.write("Original: %s\n" % txt_or) text_file.write("Generated: %s" % txt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() # </editor-fold desc = "Logging"> bar.finish() with open(os.path.join(result_path, "losses.csv"), "a") as text_file: text_file.write("{}, {}, {}, {}\n".format( epoch, img_losses.avg, txt_losses.avg, cm_losses.avg)) # <editor-fold desc = "Saving the models"? # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save( coupled_vae.state_dict(), os.path.join(model_path, 'coupled_vae' % (epoch + 1)) + ".pkl")
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') print(str(self.game.innerN) + "x" + str(self.game.innerM)) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts = MCTS(self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) tempfile = 'temp.pth.tar' bestfile = 'best.pth.tar' # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile) self.nnet.train(trainExamples) if self.arenaEnabled: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) pmcts = MCTS(self.pnet, self.args) nmcts = MCTS(self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0), lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
def playGames(self, num, profile, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ if self.replay: self.playGame() return None eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 oneWhiteWon = 0 # number of times the first player won as white oneBlackWon = 0 # number of times the first player won as black twoWhiteWon = 0 # number of times the second player won as white twoBlackWon = 0 # number of times the second player won as black if profile: prof = cProfile.Profile() prof.enable() for _ in range(num): gameResult = None while gameResult is None: gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 oneBlackWon+=1 elif gameResult==-1: twoWon+=1 twoWhiteWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = None while gameResult is None: gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 oneWhiteWon+=1 elif gameResult==1: twoWon+=1 twoBlackWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() if profile: prof.disable() prof.print_stats(sort=2) return oneWon, twoWon, draws, oneWhiteWon, oneBlackWon, twoWhiteWon, twoBlackWon
model = Seq2SeqPytorch(args=args, vocab=dm.vocab) model.encoder.embedding.weight.data = load_embeddings.load_embeddings( dm.vocab, constants.EMBED_DATA_PATH, args.embedding_size) model_pipeline_pytorch.load_checkpoint(model, checkpoint=checkpoint) model.eval() if args.cuda: model = model.cuda() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() end = time.time() bar = Bar('Processing', max=args.test_batches_per_epoch) batch_idx = 0 for _ in range(dm.num_batch_test): # sample batch if args.encoder_type == 'transformer': sent1, sent1_posembinput, sent2, sent2_posembinput, targets = \ dm.get_next_test_batch(use_cuda=args.cuda) unsort1, unsort2 = None, None encoder_init_hidden = None elif args.encoder_type == 'rnn': sent1, sent2, unsort1, unsort2, targets = dm.get_next_test_batch( encoder_embed=model.embed, decoder_embed=model.embed, use_cuda=args.cuda, )
def evaluate_model(model, args, di, labels_avail=True, type='test', mode='report'): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to evaluate mode model.eval() if type == 'test': num_examples, chrms, cell_types = di.num_test_examples, di.test_chrms, di.test_cell_types elif type == 'validation': num_examples, chrms, cell_types = di.num_validation_examples, di.validation_chrms, di.validation_cell_types else: raise Exception("type is one of [train, validation, test]") end = time.time() max_batches = int( math.ceil(num_examples / (di.eval_subsample * args.batch_size))) + (len(chrms) * len(cell_types)) bar = Bar('Processing', max=max_batches) # + |test_chrms|*|test_cell_types| is to account for subsampling starting from each chromosome in the worst case batch_idx = 0 all_preds = [] for seq_batch, gene_batch in di.eval_generator(args.batch_size, type): data_time.update(time.time() - end) seq_batch = torch.from_numpy(seq_batch) gene_batch = torch.FloatTensor(gene_batch) if args.cuda: seq_batch, gene_batch = seq_batch.contiguous().cuda( ), gene_batch.contiguous().cuda() seq_batch, gene_batch = Variable(seq_batch, volatile=True), Variable( gene_batch, volatile=True) # compute output outputs = model(seq_batch, gene_batch) index = Variable(torch.LongTensor([1])) if args.cuda: index = index.cuda() all_preds.append( torch.index_select(outputs, 1, index=index).view(-1).cpu().data.numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) | Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format( batch=batch_idx, size=max_batches, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, ) bar.next() bar.finish() all_preds = np.concatenate(all_preds) if mode == 'save_preds': ctype_chr_pred_dict, *_ = di.populate_ctype_chr_pred_dict( cell_types, chrms, all_preds, ret_labels=labels_avail) # assumes outputs are log-softmaxed, taking exponents for ctype in ctype_chr_pred_dict: for chrm in ctype_chr_pred_dict[ctype]: ctype_chr_pred_dict[ctype][chrm]['preds'] = np.exp( ctype_chr_pred_dict[ctype][chrm]['preds']) print('ALL PREDICTIONS READY, SAVING THEM') matrix_preds = flatten_dict_of_dicts(ctype_chr_pred_dict) joblib.dump(ctype_chr_pred_dict, os.path.join(args.checkpoint, type + '_preds.joblib')) joblib.dump( matrix_preds, os.path.join(args.checkpoint, type + '_matrix_preds.joblib')) if labels_avail: matrix_labels = flatten_dict_of_dicts(ctype_chr_pred_dict, 'labels') joblib.dump( matrix_labels, os.path.join(args.checkpoint, type + '_matrix_labels.joblib')) elif mode == 'report': print('ALL PREDICTIONS READY, PREPARING PLOTS') di.evaluate_model(all_preds, type, args.checkpoint, args.report_filename) else: raise Exception("mode is one of [report, save_preds]")
def test(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() precis = AverageMeter() recall = AverageMeter() f1 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=args.batches_per_test_epoch) batch_idx = 0 all_preds = np.array([]) all_targets = np.array([]) while batch_idx < args.batches_per_test_epoch: # measure data loading time data_time.update(time.time() - end) seq_batch, gene_batch, target_batch = di.sample_validation_batch( args.batch_size) seq_batch = torch.from_numpy(seq_batch) gene_batch = torch.FloatTensor(gene_batch) targets = torch.from_numpy(target_batch) if args.cuda: seq_batch, gene_batch, targets = seq_batch.contiguous().cuda( ), gene_batch.contiguous().cuda(), targets.cuda() seq_batch, gene_batch, targets = Variable( seq_batch, volatile=True), Variable(gene_batch, volatile=True), Variable(targets) # compute output outputs = model(seq_batch, gene_batch) loss = criterion(outputs, targets) # concat to all_preds, all_targets index = Variable(torch.LongTensor([1])) if args.cuda: index = index.cuda() all_preds = np.concatenate( (all_preds, torch.index_select(outputs, 1, index=index).view(-1).cpu().data.numpy())) all_targets = np.concatenate((all_targets, targets.cpu().data.numpy())) # measure accuracy and record loss p, r, f = eval(outputs.data, targets.data, args) auprc = sklearn.metrics.average_precision_score(all_targets, all_preds) auc = sklearn.metrics.roc_auc_score(all_targets, all_preds) precis.update(p, seq_batch.size(0)) recall.update(r, seq_batch.size(0)) f1.update(f, seq_batch.size(0)) losses.update(loss.item(), seq_batch.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) | Loss: {loss:.4f} | precis: {precis:.3f} | recall: {recall:.3f} | f1: {f1:.3f} | auprc: {auprc:.3f} | auc: {auc:.3f}'.format( batch=batch_idx, size=args.batches_per_test_epoch, bt=batch_time.avg, total=bar.elapsed_td, loss=losses.avg, precis=precis.avg, recall=recall.avg, f1=f1.avg, auprc=auprc, auc=auc, ) bar.next() bar.finish() val_results = {'preds': all_preds, 'labels': all_targets} joblib.dump(val_results, os.path.join(args.checkpoint, 'validation_results.joblib')) return (losses.avg, auprc)
def train(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() precis = AverageMeter() recall = AverageMeter() f1 = AverageMeter() end = time.time() bar = Bar('Processing', max=args.batches_per_epoch) batch_idx = 0 all_preds = np.array([]) all_targets = np.array([]) while batch_idx < args.batches_per_epoch: seq_batch, gene_batch, target_batch = di.sample_train_batch( args.batch_size) seq_batch = torch.from_numpy(seq_batch) gene_batch = torch.FloatTensor(gene_batch) targets = torch.from_numpy(target_batch) # measure data loading time data_time.update(time.time() - end) # predict if args.cuda: seq_batch, gene_batch, targets = seq_batch.contiguous().cuda( ), gene_batch.contiguous().cuda(), targets.cuda(async=True) seq_batch, gene_batch, targets = Variable(seq_batch), Variable( gene_batch), Variable(targets) # compute output outputs = model(seq_batch, gene_batch) loss = criterion(outputs, targets) # concat to all_preds, all_targets index = Variable(torch.LongTensor([1])) if args.cuda: index = index.cuda() all_preds = np.concatenate( (all_preds, torch.index_select(outputs, 1, index=index).view(-1).cpu().data.numpy())) all_targets = np.concatenate((all_targets, targets.cpu().data.numpy())) # measure accuracy and record loss p, r, f = eval(outputs.data, targets.data, args) precis.update(p, seq_batch.size(0)) recall.update(r, seq_batch.size(0)) f1.update(f, seq_batch.size(0)) losses.update(loss.item(), seq_batch.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | prec: {precis:.3f} | rec: {recall:.3f} | f1: {f1:.3f}'.format( batch=batch_idx, size=args.batches_per_epoch, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, precis=precis.avg, recall=recall.avg, f1=f1.avg, ) bar.next() bar.finish() # compute train auprc/auc for direct comparison to test train_auprc = sklearn.metrics.average_precision_score( all_targets, all_preds) train_auc = sklearn.metrics.roc_auc_score(all_targets, all_preds) print('train auprc: {auprc: .3f} | train auc: {auc: .3f}'.format( auprc=train_auprc, auc=train_auc, )) return (losses.avg, f1.avg)
def main(): # global args args = parser.parse_args() # <editor-fold desc="Initialization"> if args.comment == "NONE": args.comment = args.method validate = args.validate == "true" if args.method == "coupled_vae_gan": trainer = coupled_vae_gan_trainer.coupled_vae_gan_trainer elif args.method == "coupled_vae": trainer = coupled_vae_trainer.coupled_vae_trainer elif args.method == "wgan": trainer = wgan_trainer.wgan_trainer elif args.method == "seq_wgan": trainer = seq_wgan_trainer.wgan_trainer elif args.method == "skip_thoughts": trainer = skipthoughts_vae_gan_trainer.coupled_vae_gan_trainer else: assert False, "Invalid method" # now = datetime.datetime.now() # current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge", "NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0 #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((.5, .5, .5), (.5, .5, .5)) # transforms.Normalize((0.485, 0.456, 0.406), # (0.229, 0.224, 0.225)) ]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> if args.dataset != "coco": args.vocab_path = "./data/cub_vocab.pkl" # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.word_embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") use_glove = args.use_glove == "true" if use_glove: emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) word_emb = nn.Embedding(emb.size(0), emb.size(1)) word_emb.weight = nn.Parameter(emb) else: word_emb = nn.Embedding(len(vocab), emb_size) # Freeze weighs if args.fixed_embeddings == "true": word_emb.weight.requires_grad = True # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") if args.dataset == 'coco': data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) else: data_path = "data/cub.h5" dataset = Text2ImageDataset(data_path, split=0, vocab=vocab, transform=transform) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dataset_val = Text2ImageDataset(data_path, split=1, vocab=vocab, transform=transform) val_loader = DataLoader(dataset_val, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) # </editor-fold> txt_rc_loss = self.networks["coupled_vae"].text_reconstruction_loss(captions, txt2txt_out, lengths) # <editor-fold desc="Network Initialization"> print("Setting up the trainer...") model_trainer = trainer(args, word_emb, vocab) # <\editor-fold desc="Network Initialization"> for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() end = time.time() bar = Bar(args.method if args.comment == "NONE" else args.method + "/" + args.comment, max=len(data_loader)) model_trainer.set_train_models() model_trainer.create_losses_meter(model_trainer.losses) for i, (images, captions, lengths) in enumerate(data_loader): if model_trainer.load_models(epoch): break # if i == 1: if i == len(data_loader) - 1: break images = to_var(images) # captions = to_var(captions[:,1:]) captions = to_var(captions) # lengths = to_var(torch.LongTensor(lengths) - 1) # print(captions.size()) lengths = to_var( torch.LongTensor(lengths)) # print(captions.size()) model_trainer.forward(epoch, images, captions, lengths, not i % args.image_save_interval) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if not model_trainer.iteration % args.log_step: # plot progress bar.suffix = bcolors.HEADER # bar.suffix += '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}\n'.format( bar.suffix += '({batch}/{size}) Iter: {bt:} | Time: {total:}-{eta:}\n'.format( batch=i, size=len(data_loader), # bt=batch_time.val, bt=model_trainer.iteration, total=bar.elapsed_td, eta=bar.eta_td, ) bar.suffix += bcolors.ENDC cnt = 0 for l_name, l_value in sorted(model_trainer.losses.items(), key=lambda x: x[0]): cnt += 1 bar.suffix += ' | {name}: {val:.3f}'.format( name=l_name, val=l_value.avg, ) if not cnt % 5: bar.suffix += "\n" bar.next() # </editor-fold desc = "Logging"> bar.finish() if validate: print('EPOCH ::: VALIDATION ::: ' + str(epoch + 1)) batch_time = AverageMeter() end = time.time() barName = args.method if args.comment == "NONE" else args.method + "/" + args.comment barName = "VAL:" + barName bar = Bar(barName, max=len(val_loader)) model_trainer.set_eval_models() model_trainer.create_metrics_meter(model_trainer.metrics) for i, (images, captions, lengths) in enumerate(val_loader): # if not model_trainer.keep_loading and not model_trainer.iteration % args.model: # model_trainer.save_models(epoch) if i == len(val_loader) - 1: break images = to_var(images) captions = to_var(captions[:, 1:]) # lengths = to_var(torch.LongTensor(lengths - 1)) # print(captions.size()) model_trainer.evaluate(epoch, images, captions, lengths, i == 0) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = bcolors.HEADER # bar.suffix += '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}\n'.format( bar.suffix += '({batch}/{size}) Iter: {bt:} | Time: {total:}-{eta:}\n'.format( batch=i, size=len(val_loader), # bt=batch_time.val, bt=model_trainer.iteration, total=bar.elapsed_td, eta=bar.eta_td, ) bar.suffix += bcolors.ENDC cnt = 0 for l_name, l_value in sorted(model_trainer.metrics.items(), key=lambda x: x[0]): cnt += 1 bar.suffix += ' | {name}: {val:.3f}'.format( name=l_name, val=l_value.avg, ) if not cnt % 5: bar.suffix += "\n" bar.next() bar.finish() # model_trainer.validate(val_loader) model_trainer.save_models(-1)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ #Generate a fixed sensing matrix if option is toggled to True. #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. #2)the folder which saves the fixed sensing matrix is empty if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') self.arena_game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type'] self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix #Save the fixed matrix self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- for i in range(1, self.args['numIters']+1): print('------ITER ' + str(i) + '------') if not self.skipFirstSelfPlay or i>1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue']) #bookkeeping objects contained in pytorch_classification.utils eps_time = AverageMeter() bar = Bar('Self Play', max=self.args['numEps']) end = time.time() #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. #----------------------------------------------------- for eps in range(self.args['numEps']): #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode() if self.args['fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) #generate a new sensing matrix self.game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])#generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!! self.mcts = MCTS(self.game, self.nnet, self.args, self.game_args, self.skip_nnet)#create new search tree for each game we play #TESTING------------------------- #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter)) #-------------------------------- #TESTING-------------------------- #print('Starting self-play game iteration: ' + str(eps)) #start_game = time.time() #-------------------------------- iterationTrainExamples += self.executeEpisode() #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game #TESTING-------------------------- #end_game = time.time() #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game)) #----------------------------------------------------- # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below. if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file by calling saveTrainExamples method # The examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) #save examples to self.args['checkpoint'] folder with given iteration name of i-1 # shuffle examples before training #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques, #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training #samples in a single list, shuffled trainExamples = [] for e in self.trainExamplesHistory: #Each e is a deque trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp') else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('trainExamples feature arrays: ' + str(trainExamples[0])) #print('trainExamples label arrays: ' + str(trainExamples[1])) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1)) self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')