def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision p: int = s.player if s.phase == Phase.ActionPhase: assert False, 'GreedyPlayer does not support action cards yet' elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: choices = d.card_choices + [None] X = s.lookahead_batch_featurize(choices).cpu() label_idx = np.argmin( self.model.classes_) if p == 1 else np.argmax( self.model.classes_) y = self.model.predict_proba(X) if self.train: card = np.random.choice(choices, p=softmax(y[:, label_idx], t=self.tau)) else: card = choices[np.argmax(y[:, label_idx])] response.single_card = card
def makePhaseDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision player = d.controlling_player if s.phase == Phase.ActionPhase: self.heuristic.makeGreedyActionDecision(s, response) elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: if not self.train: remove_first_card(Curse(), d.card_choices) response.single_card = self.heuristic.agenda.buy( s, player, d.card_choices) return
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision if s.phase == Phase.ActionPhase: assert False, 'MCTS does not support action cards yet' elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: choices = d.card_choices + [None] # the next node in the tree is the one that maximizes the UCB1 score card = self.rollout.select(choices, state=s) response.single_card = card
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision p: int = s.player if s.phase == Phase.ActionPhase: assert False, 'MCTS does not support action cards yet' elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: vals = [] choices = d.card_choices + [None] X = s.lookahead_batch_featurize(choices) vals = self.model(X).detach().cpu().numpy() choice = self.select(p, choices, vals) response.single_card = choice
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision p: int = s.player if s.phase == Phase.ActionPhase: assert False, 'GreedyMLPPlayer does not support action cards yet' elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: choices = d.card_choices + [None] X = s.lookahead_batch_featurize(choices) label_idx = 0 if p == 1 else 2 y_pred = self.model.forward(X) card_idx = torch.argmax(y_pred[:, label_idx]) response.single_card = choices[card_idx]
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision if s.phase == Phase.ActionPhase: if not d.active_card: self.heuristic.makeGreedyActionDecision(s, response) elif s.events: event = s.events[-1] if isinstance(event, DiscardDownToN): self.heuristic.makeDiscardDownDecision(s, response) elif isinstance(event, MoatReveal): self.heuristic.makeBaseDecision(s, response) else: raise ValueError(f'Event {type(event)} not supported') else: self.heuristic.makeBaseDecision(s, response) elif s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] else: # Remove Curse choices = list( filter(lambda x: not isinstance(x, Curse), d.card_choices + [None])) # Rollout (out-of-tree) case; tree actually isn't that good if not self.tree.in_tree or not self.use_tree: logging.log(level=BUY, msg='Rollout') response.single_card = self.rollout.select(choices, state=s) return # the next node in the tree is the one that maximizes the UCB1 score try: # Remove Copper and Victory cards -- tree never gets that deep anyways tree_choices = list( filter( lambda x: not isinstance(x, Copper) and not issubclass( type(x), VictoryCard), choices)) card = self.tree.select(tree_choices) logging.log(level=BUY, msg=f'Selection: {self.tree.node.n}') except ValueError: card = self.rollout.select(choices, state=s) response.single_card = card
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision if s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] return if d.type == DecisionType.DecisionSelectCards: cardsToPick = -1 d.print_card_choices() while (cardsToPick < d.min_cards or cardsToPick > d.max_cards): text = '' while not text: text = input( f'Pick between {d.min_cards} and {d.max_cards} of the above cards:\n' ) cardsToPick = int(text) responseIdxs = [] for i in range(cardsToPick): cardIdx = -1 while (cardIdx == -1 or cardIdx in responseIdxs or cardIdx >= len(d.card_choices)): d.print_card_choices() text = '' while not text: text = input('Choose another card:\n') cardIdx = int(text) responseIdxs.append(cardIdx) response.cards.append(d.card_choices[cardIdx]) elif d.type == DecisionType.DecisionDiscreteChoice: choice = -1 while choice == -1 or choice > d.min_cards: text = '' while not text: text = input( 'Please make a discrete choice from the above cards:\n' ) choice = int(text) d.print_card_choices() response.choice = choice else: logging.error(f'Player {s.player} given invalid decision type.')
def makeDecision(self, s: State, response: DecisionResponse): d: DecisionState = s.decision # Do not allow RandomPlayer to purchase curses if s.phase == Phase.BuyPhase and not self.train: remove_first_card(Curse(), d.card_choices) # Ensure random player plays all treasures if s.phase == Phase.TreasurePhase: response.single_card = d.card_choices[0] return if d.type == DecisionType.DecisionSelectCards: cards_to_pick = d.min_cards if d.max_cards > d.min_cards: cards_to_pick = random.randint(d.min_cards, d.max_cards) response.cards = random.sample(d.card_choices, k=min(cards_to_pick, len(d.card_choices))) elif d.type == DecisionType.DecisionDiscreteChoice: response.choice = random.randint(0, d.min_cards) else: logging.error('Invalid decision type')