def test_p1_win_over(self): model = PatchworkModel() self.assertEqual(model.game_over(), False) model.p1.move(53, model.time_track, model.p2) self.assertEqual(model.game_over(), False) model.p2.move(53, model.time_track, model.p1) self.assertEqual(model.game_over(), True)
class PatchworkControllerAIvAI(): INTERVAL_SIZE = 5 def __init__(self): self.model = PatchworkModel() self.ai = PatchworkAI() self.feature_weights = FeatureWeights() self.p1_feature_state = FeatureStateModel(self.model.p1, self.model.p2) self.p2_feature_state = FeatureStateModel(self.model.p2, self.model.p1) self.running = True def mainloop(self, num_samples): #run "num_samples" games and then calculate the win percentage p1_win = 0 p1_running_sum = 0 p2_running_sum = 0 p1_win_count = 0 p1_game_avgs = [] p2_game_avgs = [] p1_win_counts = [] for i in range(num_samples): #reset the model for next game self.model = PatchworkModel() self.running = True while self.running: if self.model.p1_turn(): player = self.model.p1 other_player = self.model.p2 else: player = self.model.p2 other_player = self.model.p1 #p1 is smart, p2 is dumb if self.model.p1_turn(): turn = self.ai.choose_turn_hand_craft(self.model) else: turn = self.ai.choose_turn_random(self.model) if isinstance(turn, BuyTurn): #TEMPORARY HANDLING FOR IF THE PIECE CANT BE PLACED, NEED NEW SOLUTION FOR THIS if not self.ai.can_place( self.model.patch_list[turn.patch_idx], player.quilt): turn = JumpTurn() else: row, col, patch_orientation = self.ai.choose_placement( self.model.patch_list[turn.patch_idx], player.quilt) self.model.place_patch(player, patch_orientation, row, col) #run the turn (buy piece for buy, jump for jump), and check if patch is passed on time track passed_patch, passed_button_gen = turn.run(self.model) if passed_patch: row, col, patch_orientation = self.ai.choose_placement( Patch(34, [[1]], 0, 0, 0), other_player.quilt) self.model.place_patch(other_player, Patch(34, [[1]], 0, 0, 0), row, col) #if game is over, exit running loop and update p1_win counter if self.model.game_over(): if self.model.p1_win() == 1: p1_win += 1 #keep track of p1/p2 end scores to calculate averages at end (hoping to see progress) if ((i + 1) % self.INTERVAL_SIZE) == 0: p1_game_avgs.append(p1_running_sum / self.INTERVAL_SIZE) p2_game_avgs.append(p2_running_sum / self.INTERVAL_SIZE) p1_running_sum = 0 p2_running_sum = 0 p1_win_count = 0 p1_running_sum += self.model.p1.get_score() p2_running_sum += self.model.p2.get_score() if self.model.p1_win() == 1: p1_win_count += 1 self.running = False #If there's only 1 sample just print out the basic output if num_samples == 2 or num_samples == 1: print("P1 score: " + str(self.model.p1.get_score()) + ", P2 score: " + str(self.model.p2.get_score())) print("P1 buttons: " + str(self.model.p1.buttons) + ", P2 buttons: " + str(self.model.p2.buttons)) print("-------------------------------") print("P1 board") for row in range(9): for col in range(9): print(self.model.p1.quilt.board_array[row][col], end="") print() print("-------------------------------") print("P2 board") for row in range(9): for col in range(9): print(self.model.p2.quilt.board_array[row][col], end="") print() print("-------------------------------") if i == 0: print("Simulating", end="") sys.stdout.flush() elif (i % 4) == 0: sys.stdout.write("\b\b\b \b\b\b") sys.stdout.flush() else: sys.stdout.write(".") sys.stdout.flush() print("Games run: " + str(num_samples)) print("P1 wins: " + str(p1_win) + ", P2 wins: " + str(num_samples - p1_win)) print("P1 win %: " + str((p1_win / num_samples) * 100) + ", P2 win %: " + str(((num_samples - p1_win) / num_samples) * 100)) print() for i in range(len(self.ai.feature_weights["patch_weights_early"])): print("Patch ID: " + str(i + 1) + ", Early Weight: " + str(self.ai.feature_weights["patch_weights_early"][i]) + ", Mid Weight: " + str(self.ai.feature_weights["patch_weights_mid"][i]) + ", Late Weight: " + str(self.ai.feature_weights["patch_weights_late"][i])) print() print("GAME AVGS: ") for i in range(len(p1_game_avgs)): print( str(i * self.INTERVAL_SIZE) + " - " + str((i * self.INTERVAL_SIZE) + self.INTERVAL_SIZE) + " | Player 1: " + str(p1_game_avgs[i]) + ", Player 2: " + str(p2_game_avgs[i]) + ", Difference: " + str(p1_game_avgs[i] - p2_game_avgs[i])) def mainloop_learning(self, num_samples): #run "num_samples" games and then calculate the win percentage p1_win = 0 p1_running_sum = 0 p2_running_sum = 0 p1_win_count = 0 p1_game_avgs = [] p2_game_avgs = [] p1_win_counts = [] p1_button_gen_sum = 0 p1_buttons_sum = 0 p1_board_util_sum = 0 p1_button_gen_sums = [] p1_buttons_sums = [] p1_board_util_sums = [] p1_quilts = [] p2_button_gen_sum = 0 p2_buttons_sum = 0 p2_board_util_sum = 0 p2_button_gen_sums = [] p2_buttons_sums = [] p2_board_util_sums = [] p1_jump_ct = 0 p2_jump_ct = 0 p1_buy_ct = 0 p2_buy_ct = 0 p1_delayed_jump_ct = 0 p2_delayed_jump_ct = 0 for i in range(num_samples): #reset the model for next game self.model = PatchworkModel() self.running = True self.p1_feature_state = FeatureStateModel(self.model.p1, self.model.p2) self.p2_feature_state = FeatureStateModel(self.model.p2, self.model.p1) while self.running: if self.model.p1_turn(): player = self.model.p1 state_model = self.p1_feature_state other_player = self.model.p2 else: player = self.model.p2 state_model = self.p2_feature_state other_player = self.model.p1 #p1 is learning, p2 is hand crafted ai if self.model.p1_turn(): #picks turn based on learned weights and updates those weights turn = self.ai.choose_turn_learning( self.model, self.p1_feature_state, self.feature_weights) else: turn = self.ai.choose_turn_random(self.model) #update weights based on p2 moves as well future_state = self.ai.find_future_state( turn, self.p2_feature_state) reward = self.p2_feature_state.calculate_reward( turn.passes_player, turn.passes_econ, 0) self.feature_weights.update_feature_weights( reward, future_state.get_state_utility(self.feature_weights), self.p2_feature_state) if isinstance(turn, JumpTurn): #DEBUGGING if self.model.p1_turn(): p1_jump_ct += 1 else: p2_jump_ct += 1 #handling turn running if isinstance(turn, BuyTurn): #DEBUGGING if self.model.p1_turn(): p1_buy_ct += 1 else: p2_buy_ct += 1 #check if it's possible for the player to place the selected patch if not self.ai.can_place( self.model.patch_list[turn.patch_idx], player.quilt): passes_patch, passes_econ, passes_player = player.will_pass_tile( other_player.position - player.position + 1, self.model.time_track, other_player) turn = JumpTurn(passes_patch, passes_econ, passes_player) if self.model.p1_turn(): p1_buy_ct -= 1 p1_delayed_jump_ct += 1 else: p2_buy_ct -= 1 p2_delayed_jump_ct += 1 else: row, col, patch_orientation = self.ai.choose_placement( self.model.patch_list[turn.patch_idx], player.quilt) self.model.place_patch(player, patch_orientation, row, col) #run the turn (buy piece for buy, jump for jump), and update passed_patch/passed_button_gen booleans passed_patch, passed_button_gen = turn.run( self.model, state_model) #placing 1x1 patch if it was passed if passed_patch: row, col, patch_orientation = self.ai.choose_placement( Patch(34, [[1]], 0, 0, 0), other_player.quilt) self.model.place_patch(other_player, Patch(34, [[1]], 0, 0, 0), row, col) #if game is over, exit running loop and update p1_win counter if self.model.game_over(): p1_rewards = self.p1_feature_state.calculate_reward( False, True, self.model.p1_win()) p2_rewards = self.p2_feature_state.calculate_reward( False, True, self.model.p1_win()) self.feature_weights.update_feature_weights( p1_rewards, self.p1_feature_state.get_state_utility( self.feature_weights), self.p1_feature_state) self.feature_weights.update_feature_weights( p2_rewards, self.p2_feature_state.get_state_utility( self.feature_weights), self.p2_feature_state) p1_quilts.append(self.model.p1.quilt.board_array) #CALCULATING OUTPUTS FOR PROGRESS TRACKING #keep track of p1/p2 end scores to calculate averages at end (hoping to see progress) if ((i + 1) % self.INTERVAL_SIZE) == 0: p1_game_avgs.append(p1_running_sum / self.INTERVAL_SIZE) p2_game_avgs.append(p2_running_sum / self.INTERVAL_SIZE) p1_win_counts.append(p1_win_count) p1_running_sum = 0 p2_running_sum = 0 p1_win_count = 0 p1_button_gen_sums.append(p1_button_gen_sum / self.INTERVAL_SIZE) p1_buttons_sums.append(p1_buttons_sum / self.INTERVAL_SIZE) p1_board_util_sums.append(p1_board_util_sum / self.INTERVAL_SIZE) p1_button_gen_sum = 0 p1_buttons_sum = 0 p1_board_util_sum = 0 p2_button_gen_sums.append(p2_button_gen_sum / self.INTERVAL_SIZE) p2_buttons_sums.append(p2_buttons_sum / self.INTERVAL_SIZE) p2_board_util_sums.append(p2_board_util_sum / self.INTERVAL_SIZE) p2_button_gen_sum = 0 p2_buttons_sum = 0 p2_board_util_sum = 0 if self.model.p1_win() == 1: p1_win_count += 1 p1_win += 1 p1_running_sum += self.model.p1.get_score() p2_running_sum += self.model.p2.get_score() p1_button_gen_sum += self.model.p1.quilt.button_gen p1_buttons_sum += self.model.p1.buttons p1_board_util_sum += self.ai.get_quilt_utility( self.model.p1.quilt) p2_button_gen_sum += self.model.p2.quilt.button_gen p2_buttons_sum += self.model.p2.buttons p2_board_util_sum += self.ai.get_quilt_utility( self.model.p2.quilt) #exit while loop, game is over self.running = False #If there's only 1 sample just print out the basic output if num_samples == 2 or num_samples == 1: print("P1 score: " + str(self.model.p1.get_score()) + ", P2 score: " + str(self.model.p2.get_score())) print("P1 buttons: " + str(self.model.p1.buttons) + ", P2 buttons: " + str(self.model.p2.buttons)) print("-------------------------------") print("P1 board") for row in range(9): for col in range(9): print(self.model.p1.quilt.board_array[row][col], end="") print() print("-------------------------------") print("P2 board") for row in range(9): for col in range(9): print(self.model.p2.quilt.board_array[row][col], end="") print() print("-------------------------------") if i == 0: print("Simulating", end="") sys.stdout.flush() elif (i % 4) == 0: sys.stdout.write("\b\b\b \b\b\b") sys.stdout.flush() else: sys.stdout.write(".") sys.stdout.flush() print() print("Games run: " + str(num_samples)) print("P1 wins: " + str(p1_win) + ", P2 wins: " + str(num_samples - p1_win)) print("P1 win %: " + str((p1_win / num_samples) * 100) + ", P2 win %: " + str(((num_samples - p1_win) / num_samples) * 100)) print() print("WEIGHTS: ") print("Button Gen Weight: " + str(self.feature_weights.button_gen_weight)) print("Board Coverage Weight: " + str(self.feature_weights.board_coverage_weight)) print("Button Weight: " + str(self.feature_weights.buttons_weight)) print("Player Distance Weight: " + str(self.feature_weights.player_distance_weight)) print() #f1 = open("ai_button_gen.txt", "w") #f2 = open("ai_button_total.txt", "w") #f3 = open("ai_quilt_coverage.txt", "w") #f4 = open("ai_score.txt", "w") #f5 = open("ai_score_diff.txt", "w") print("GAME AVGS: ") for i in range(len(p1_game_avgs)): print( str(i * self.INTERVAL_SIZE) + " - " + str((i * self.INTERVAL_SIZE) + self.INTERVAL_SIZE) + " | Player 1: " + str(p1_game_avgs[i]) + ", Player 2: " + str(p2_game_avgs[i]) + ", Difference: " + str(p1_game_avgs[i] - p2_game_avgs[i]) + ", P1 Wins: " + str(p1_win_counts[i]) + ", P2 Wins: " + str(self.INTERVAL_SIZE - p1_win_counts[i]) + ", P1 Win %: " + str((p1_win_counts[i] / self.INTERVAL_SIZE) * 100)) #f.write(str((p1_win_counts[i]/5)*100) + ", ") print("AI TENDENCIES: ") print("P1 (LEARNED AI) FEATURE AVERAGES") for i in range(len(p1_button_gen_sums)): print( str(i * self.INTERVAL_SIZE) + " - " + str((i * self.INTERVAL_SIZE) + self.INTERVAL_SIZE) + " | Button Gen: " + str(p1_button_gen_sums[i]) + ", Buttons: " + str(p1_buttons_sums[i]) + ", Board Util: " + str(p1_board_util_sums[i])) #f1.write(str(p1_button_gen_sums[i]) + ", ") #f2.write(str(p1_buttons_sums[i]) + ", ") #f3.write(str(p1_board_util_sums[i]) + ", ") #f4.write(str(p1_game_avgs[i]) + ", ") #f5.write(str(p1_game_avgs[i] - p2_game_avgs[i]) + ", ") print() #print("QUILTS: ") #for i in range(len(p1_quilts)): # print(str(i) + ":") # for row in range(len(p1_quilts[i])): # for col in range(len(p1_quilts[i][row])): # print(p1_quilts[i][row][col], end = "") # print() # print() # print() print("P2 (RANDOM AI) FEATURE AVERAGES") for i in range(len(p2_button_gen_sums)): print( str(i * self.INTERVAL_SIZE) + " - " + str((i * self.INTERVAL_SIZE) + self.INTERVAL_SIZE) + " | Button Gen: " + str(p2_button_gen_sums[i]) + ", Buttons: " + str(p2_buttons_sums[i]) + ", Board Util: " + str(p2_board_util_sums[i]))