def acts(self, states): obs = torch.cat( tuple(Translator.encode_board(s).unsqueeze(0) for s in states), 0) valids = [ torch.tensor(list( map(Translator.encode_move_idx, s.micro_legal_moves)), dtype=torch.long) for s in states ] policy = self.net.forward(obs)[0] prob = [policy[i, valids[i]] for i in range(len(states))] prob = [prob[i] / prob[i].sum() for i in range(len(states))] idxs = [ valids[i][dist.Categorical(prob[i]).sample()] for i in range(len(states)) ] return [ Translator.decode_move(idxs[i].item(), states[i]) for i in range(len(states)) ]