if next_move_val < min_val: min_val = next_move_val best_move = m if next_move_val < 0: break if i == 0: return best_move return max_val if i % 2 == 0 else min_val if (x_first): print("valid moves are ", game.valid_moves) move = int(input("player one(X), make first move: ")) game.make_move(move) while not game.is_over(): if game.player == X: print("valid moves are ", game.valid_moves) move = int(input("please make move: ")) else: move = minimax(game, 0) print("computer is now making move ", move) game.make_move(move) print(game) if game.winner_exists(): print("You win!" if game.get_winner() == X else "You lose!") else: print("player one and two ties")
game.board.reshape(-1))] y[0][move] = reward loss = criterion(q, y) losses += loss.item() counter += 1 if counter % 1000 == 0: avg_losses.append(str(losses / 1000)) losses = 0 optimizer.zero_grad() loss.backward() optimizer.step() y_turn = False else: game.make_move(get_random_valid_move(game)) y_turn = True state = t.Tensor(game.board.reshape(1, -1)) if game.is_over(): break if epsilon > 0.05: epsilon -= 2 / epochs t.save(model.state_dict(), "q_learning_meta/model.pth") f = open("q_learning_meta/losses_over_time", "w") f.write("\n".join(avg_losses)) end = time.time() print(end - start)