def end(self): if self.skip: # 50 % of the time add 0 reward for no_game if np.random.rand() < 0.5: for player in self.match.players: self.match.rl_agent.update_game_memory_with_reward( player.position, 0) self.match.rl_agent.flush_game_memory() self.match.rl_agent.train_game_network() return self.winners = Rules.calc_game_winner(self) if len(self.winners) == 1 and self.match.rl_agent.explore == False: self.log_msgs.append("WINNER WINNER") self.payout = Rules.calc_game_payout(self) #print("Points: ", list(map(lambda player: player.game_points, self.match.players))) old_coins = list(map(lambda player: player.coins, self.match.players)) for player in self.match.players: if player in self.winners: continue for winning_player in self.winners: #TODO: this will not work with callgame!! player.coins -= self.payout winning_player.coins += self.payout new_coins = list(map(lambda player: player.coins, self.match.players)) rewards = [x[0] - x[1] for x in zip(new_coins, old_coins)] self.log_msgs.append( "Player(s) {0} won this game. Rewards: {1}".format( list(map(lambda player: player.position, self.winners)), rewards)) for i, reward in enumerate(rewards): # Update game memory and card memory with reward - 8.0 is number of cards per_card_reward = reward / 8.0 self.match.rl_agent.update_game_memory_with_reward( self.match.players[i].position, reward) self.match.rl_agent.update_card_memory_with_next_state( self.match.players[i].position, None, True) self.match.rl_agent.update_card_memory_with_reward( self.match.players[i].position, per_card_reward) self.match.players[i].old_state = None self.match.rl_agent.flush_card_memory(self.game_type["game"]) self.match.rl_agent.flush_game_memory() self.match.current_starting_position = \ (self.match.current_starting_position + 1) % self.match.num_players self.match.rl_agent.train_game_network() self.match.rl_agent.train_action_network(self.game_type['game']) # only print logs if caller won the game if not self.random_game: print("\n".join(self.log_msgs))