示例#1
0
 def Initialize(self, eps):
     #Initializes MCTS_States_list
     #Initialize MCTS_States_list. Number of pairs in MCTS_States_list should equal eps_per_batch
     #used in Coach.playAllGames
     
     MCTS_States_list = []
     
     for ep in range(eps):
         #Initialize Game_args() for MCTS
         temp_game_args = Game_args()
         if self.args['fixed_matrix'] == False:
             temp_game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
         else:
             temp_game_args.sensing_matrix = self.game_args.sensing_matrix
         
         temp_game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])
         
         #Initialize MCTS and the first state for each MCTS
         temp_MCTS = MCTS(self.game, self.nnet, self.args, temp_game_args) 
         temp_init_state = self.game.getInitBoard(self.args, temp_game_args)
         
         #Append to MCTS_States_list
         MCTS_States_list.append([temp_MCTS, [temp_init_state]])
         
     return MCTS_States_list    
示例#2
0
 def learn(self):
     #generate or load a matrix if fixed matrix set to True. We save a Game_args object in Coach in case A is fixed so when we
     #initialize multiple MCTS objects below, we do not have to store multiple copies of A. 
     if self.args['fixed_matrix'] == True:
         if self.args['load_existing_matrix'] == True:
             self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
         else:
             self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
             self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])
     
     #keep track of learning time
     learning_start = time.time()
     
     #start training iterations
     for i in range(1, self.args['numIters']+1):
         print('------ITER ' + str(i) + '------')
         #If we are not loading a set of training data.... then:
         if not self.skipFirstSelfPlay or i>1:
             #1)Initialize empty deque for storing training data after every eps in the iteration has been processed
             iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue'])
             
             #3)Start search. A single search consists of a synchronous search over ALL eps in the current batch.
             #Essentially the number of MCTS trees that must be maintained at once is equal to number of eps in current batch
             for j in range(self.args['num_batches']):
                 #INITIALIZATION STEP---------------------------------------
                 #Each element in MCTS_States_list is in the form of (MCTS object, [list of States root traversed])
                 MCTS_States_list = []
                 batchTrainExamples = []
                 
                 #Initialize bookkeeping
                 print('Generating Self-Play Batch ' + str(j) + ':')
                 
                 bar = Bar('Self Play', max = self.args['eps_per_batch'])
                 
                 #Initialize MCTS_States_list. Number of pairs in MCTS_States_list should equal eps_per_batch
                 for ep in range(self.args['eps_per_batch']):
                     #Initialize Game_args() for MCTS
                     temp_game_args = Game_args()
                     if self.args['fixed_matrix'] == False:
                         temp_game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type'])
                     else:
                         temp_game_args.sensing_matrix = self.game_args.sensing_matrix
                     temp_game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])
                     #Initialize MCTS and the first state for each MCTS
                     temp_MCTS = MCTS(self.game, self.nnet, self.args, temp_game_args, identifier = int(str(j) + str(ep))) 
                     temp_init_state = self.game.getInitBoard(self.args, temp_game_args, identifier = int(str(j) + str(ep)))
                     #Append to MCTS_States_list
                     MCTS_States_list.append([temp_MCTS, [temp_init_state]])
                 
                 #initialize some variables for bookkeeping bar in terminal
                 current_MCTSStateslist_size = len(MCTS_States_list)
                 completed_episodes = 0
                 total_completed_eps = 0
             
                 #Initialize Threading Class. Needed to call threaded_mcts below. 
                 threaded_mcts = Threading_MCTS(self.args, self.nnet)
                 #----------------------------------------------------------
                     
                 #While MCTS_States_list is nonempty, advance each episode in MCTS_States_list by one move.
                 #continue advancing by one move until MCTS_States_list is empty, meaning that all games are completed.
                 #When a game is completed, its corresponding pair should be removed from MCTS_States_list
                 
                 #----------------------------------------------------------
                 self_play_batchstart = time.time()
                 
                 while MCTS_States_list:
                     #advanceEpisodes returns new MCTS_States_list with all elements having advanced one move, and removes all completed games
                     #advanceEpisodes also returns a set of new trainExamples for games which have been completed after calling advanceEpisodes
                     
                     MCTS_States_list, trainExamples = self.advanceEpisodes(MCTS_States_list, threaded_mcts)
                     #save the States_list states whose last arrived node is a terminal node. These will be used as new training samples.
                     batchTrainExamples += trainExamples
                     
                     #for bookkeeping bar in the output of algorithm
                     if len(MCTS_States_list) < current_MCTSStateslist_size:
                         completed_episodes = current_MCTSStateslist_size - len(MCTS_States_list)
                         current_MCTSStateslist_size = len(MCTS_States_list)
                         total_completed_eps += completed_episodes
                         #advance bookkeeping bar if size of MCTS_States_list becomes smaller. 
                         #bar.next() only advances and outputs the progress bar
                         #bar.suffix only outputs the suffix text after "|"
                         bar.suffix  = '({eps_completed}/{maxeps})'.format(eps_completed = total_completed_eps, maxeps=self.args['eps_per_batch'])
                         
                         #advance the progress bar completed_episodes times
                         for k in range(completed_episodes):
                             bar.next()       
                 #----------------------------------------------------------    
                 #end the tracking of the bookkeeping bar
                 bar.finish()
                 self_play_batchend = time.time()
                 print('All Self-Play Games in batch have been played to completion.')
                 print('Total time taken for batch: ', self_play_batchend - self_play_batchstart)
                 
                 iterationTrainExamples += batchTrainExamples
             
             #Add the training samples generated in a single training iteration to self.trainExamplesHistory
             #This step is the last line included in "if not self.skipFirstSelfPlay or i>1:" block
             self.trainExamplesHistory.append(iterationTrainExamples)
         
         #Jump to here if self.skipFirstSelfPlay returns True or i<=1
         #Once iterationTrainExamples has been completed, we will use these iterationTrainExamples to retrain the Neural Network. 
         if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']:
             print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
             self.trainExamplesHistory.pop(0)
         
         #save trainExamplesHistory list of Coach
         self.saveTrainExamples(i-1)
         
         #move all training samples from trainExamplesHistory to trainExamples for shuffling
         #shuffle trainExamples
         trainExamples = []
         for e in self.trainExamplesHistory: 
             trainExamples.extend(e)
         shuffle(trainExamples)
         
         #The Arena--------------------------------------------------------
         if self.args['Arena'] == True:
             self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one
             self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
         
             #convert trainExamples into a format recognizable by Neural Network and train
             trainExamples = self.nnet.constructTraining(trainExamples)
             self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated
         
             #Pit the two neural networks self.pnet and self.nnet in the arena            
             print('PITTING AGAINST PREVIOUS VERSION')
         
             arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. 
             pwins, nwins, draws = arena.playGames()
         
             print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
             if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
                 print('REJECTING NEW MODEL')
                 self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
             else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                 print('ACCEPTING NEW MODEL')
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                 self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best')
         #-----------------------------------------------------------------
         
         else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).  
             print('TRAINING NEW NEURAL NETWORK...')
             trainExamples = self.nnet.constructTraining(trainExamples)
             
             #FOR TESTING-----------------------------------------------------
             #print('')
             #print('feature arrays shape: ', trainExamples[0][0].shape, trainExamples[0][1].shape)
             #print('trainExamples feature arrays: ', trainExamples[0])
             #print('')
             #print('label arrays shape: ', trainExamples[1][0].shape, trainExamples[1][1].shape)
             #print('trainExamples label arrays: ', trainExamples[1])
             #END TESTING-----------------------------------------------------
                 
             self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1))    
             
             #FOR TESTING-----------------------------------------------------
             #weights = self.nnet.nnet.model.get_weights()
             #min_max = []
             #for layer_weights in weights:
                 #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                 #layer_weights_min = np.amin(layer_weights)
                 #layer_weights_max = np.amax(layer_weights)
                 #min_max.append([layer_weights_min, layer_weights_max])
             #print('')
             #print('The smallest and largest weights of each layer are: ')
             #for pair in min_max:
                 #print(pair)
             #print('')
             #END TESTING-----------------------------------------------------
                   
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
             self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')
     
     #Compute total time to run alphazero
     learning_end = time.time()
     print('----------TRAINING COMPLETE----------')
     print('Total training time: ', learning_end - learning_start)