Пример #1
0
def resume_self_play():
    env = TicTacToeEnv()
    saves = [f for f in listdir(save_dir) if isfile(join(save_dir, f))]
    recent_file = max(saves)
    policy = EpsilonGreedy(QConvTicTacToe(env), 0)
    opposing_policy = EpsilonGreedy(QConvTicTacToe(env), 1)
    self_play = SelfPlay(policy, opposing_policy)
    policy.q.policy_net.load_state_dict(torch.load(join(save_dir, recent_file)))
    self_play.evaluate_policy(100)
Пример #2
0
def resume_self_play():
    env = Connect4Env()
    saves = [f for f in listdir(save_dir) if isfile(join(save_dir, f))]
    recent_file = max(saves)
    policy = EpsilonGreedy(QLinear(env), 0)
    opposing_policy = EpsilonGreedy(QLinear(env), 0)  # Acts greedily
    self_play = SelfPlay(policy, opposing_policy)
    policy.q.policy_net.load_state_dict(torch.load(join(save_dir, recent_file)))
    self_play.evaluate_policy(100)