def example_randomwalk(): """ An example on random walk MDP """ # create an MDP env = RandomWalk(19, -1) # create n-step TreeBackup agent agent = TreeBackup(env, env.init(), 3) agent2 = TreeBackup(env, env.init(), 3) # act using equiprobable random policy with discount = 0.9 and step size = 0.1 num_episode = 1000 for iter in range(num_episode): agent.episode(0.9, 0.1) agent.init() agent2.set_policy_eps_greedy(0.1) for iter in range(num_episode): agent2.episode(0.9, 0.1) agent2.init() print('Q_DP[s][a] ', env.Q_equiprobable(0.9)) print('Q_eps_greedy[s][a] ', env.Q_eps_greedy(0.1, 0.9)) print('Equiprobable Q_TreeBackup[s][a]', agent.Q) print('Eps greedy Q_TreeBackup[s][a]', agent2.Q)
def example_randomwalk(): """ An example on random walk MDP """ # create an MDP env = RandomWalk(19, -1) # create n-step QSigma agent agent = QSigma(env, 0.5, env.init(), 3) #Psigma=0.5, init_state=env.init(), steps=3 agent2 = QSigma(env, 0.5, env.init(), 3) # act using equiprobable random policy with discount = 0.9 and step size = 0.1 num_episode = 1000 for iter in range(num_episode): agent.episode(0.9, 0.1) agent.init() agent2.set_policy_eps_greedy(0.1) for iter in range(num_episode): agent2.episode(0.9, 0.1) agent2.init() print('Q_DP[s][a] ', env.Q_equiprobable(0.9)) print('Q_eps_greedy[s][a] ', env.Q_eps_greedy(0.1, 0.9)) print('Equiprobable Q_Q(sigma)[s][a]', agent.Q) print('Eps greedy Q_Q(sigma)[s][a]', agent2.Q)