1, #Recall the augmented probability aug_prob = beta * probs + (1-beta) * 1/(len(x)) * x_I, where x_I is the indicator vector of ones of the true sparse solution x. Hence, higher beta values increase the probabilities towards choosing the correct column choices. #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 10, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #START ALPHAZERO TRAINING: #Initialize Game_args, nnet, Game, and Alphazero Game_rules = Game_args() Game = CSGame() nnet = NNetWrapper(args) if args['load_nn_model'] == True: filename = 'best' nnet.load_checkpoint(args['network_checkpoint'], filename) Alphazero_train = Coach(Game, nnet, args, Game_rules) if args['load_training'] == True: print('Load trainExamples from file') Alphazero_train.loadTrainExamples() #Start Training Alphazero
1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. #---------------------------------------------------------------- 'loadtestData': True, 'test_samples_persparsity': 1000, } #Initialize Algorithms object to compare algorithms Algorithms = CSAlgorithms() #INITIALIZE ALPHAZERO FOR PREDICTION #-------------------------------------------------------------- #initialize Game_args #load sensing_matrix into game_args game_args = Game_args() matrix_filename = 'sensing_matrix.npy' A = np.load(matrix_filename) game_args.sensing_matrix = A #initialize neural network wrapper object #load weights and model we wish to predict with using nnet.load_checkpoint nnet = NNetWrapper(args) model_filename = 'best' nnet.load_checkpoint(os.getcwd(), model_filename) #initialize a new game object new_game = CSGame() #initialize skip_nnet if option is turned on if args['skip_rule'] == 'bootstrap': skip_nnet = NNetWrapper(args) skip_nnet.load_checkpoint(args['skip_nnet_folder'], args['skip_nnet_filename'])
'maxTreeDepth': 30, #sets the max tree depth of MCTS search. Once max tree depth is reached, and if sparsity > maxTreeDepth, then bootstrapped network(skip_rule) is used to pick remainder of the columns.Note that this means that maxTreeDepth does not count the root or terminal nodes as levels in the tree. This means real tree depth must add 2. 'skip_rule': 'OMP', #Current options: None(defaults to current policy/value network), OMP(uses OMP rule to pick next column), bootstrap(uses boostrapped network in bootstrap folder) 'skip_nnet_folder': os.getcwd() + '/skip_network', 'skip_nnet_filename': 'skip_nnet', 'beta': 1, #Recall the augmented probability aug_prob = beta * probs + (1-beta) * 1/(len(x)) * x_I, where x_I is the indicator vector of ones of the true sparse solution x. Hence, higher beta values increase the probabilities towards choosing the correct column choices. #SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 25, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'alpha':1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #START ALPHAZERO TRAINING: #Initialize Game_args, nnet, Game, and Alphazero Game_args = Game_args() Game = CSGame() nnet = NNetWrapper(args) if args['load_nn_model'] == True: filename = 'best' nnet.load_checkpoint(args['network_checkpoint'], filename) if args['skip_rule'] == 'bootstrap': skip_nnet = NNetWrapper(args) skip_nnet.load_checkpoint(args['skip_nnet_folder'], args['skip_nnet_filename']) elif args['skip_rule'] == None: skip_nnet = nnet
#SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 25, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #Test the search capabilities of multiple MCTS objects using Threading_MCTS #global Game_args object, global CSGame(for game rules and such), global policy/value net game_args = Game_args() game_args.generateSensingMatrix(args['m'], args['n'], args['matrix_type']) Game = CSGame() nnet = NNetWrapper(args) #--------------------------------------------------- #Initialize MCTS_States_list for i in range(args['num_batches']): MCTS_States_list = [] batchTrainExamples = [] #In loop below, we create a pair in the form of (MCTS_object, [list of States]) for ep in range(args['eps_per_batch']): #Initialize Game_args() for MCTS temp_game_args = Game_args()
def __init__(self, game, args, game_args): self.args = args self.game_args = game_args self.arena_game_args = Game_args() self.game = game