import gym from mdps import solvers LEFT = 0 DOWN = 1 RIGHT = 2 UP = 3 env_spec = gym.spec('FrozenLake-v0') env_spec._kwargs['is_slippery'] = False env = gym.make('FrozenLake-v0') env.reset() print solvers.value_iteration(env.env)
four_slip_policy.axis('off') four_slip_value.axis('off') eight_value.axis('off') eight_policy.axis('off') f.suptitle("Final Ice Policies") _, env = get_env('4x4', slippery=True) policy, _, iters = policy_improvement(env, discount_factor=DISCOUNT_FACTOR) visualize_ice_policy(env, policy, ax=four_slip_policy) four_slip_policy.set_title("Policy Improvement {} Iters".format(iters)) policy, _, iters = value_iteration(env, discount_factor=DISCOUNT_FACTOR) visualize_ice_policy(env, policy, ax=four_slip_value) four_slip_value.set_title("Value Iteration {} Iters".format(iters)) _, env = get_env('8x8', slippery=True) policy, _, iters = policy_improvement(env, discount_factor=DISCOUNT_FACTOR) visualize_ice_policy(env, policy, ax=eight_policy) eight_policy.set_title("Policy Improvement {} Iters".format(iters)) policy, _, iters = value_iteration(env, discount_factor=DISCOUNT_FACTOR) visualize_ice_policy(env, policy, ax=eight_value) eight_value.set_title("Value Iteration {} Iters".format(iters))
import gym from mdps import solvers from experiments import get_env from mdps.visualize_policy import visualize_solution name, env = get_env('taxi') p, _, _ = solvers.value_iteration(env, discount_factor=.95) visualize_solution(env, p)
import gym from mdps import solvers from mdps.util import make_random_policy from mdps.visualize_policy import visualize_ice LEFT = 0 DOWN = 1 RIGHT = 2 UP = 3 environment_name = 'Taxi-v2' env_spec = gym.spec('Taxi-v2') #env_spec._kwargs['is_slippery'] = False env = gym.make('FrozenLake-v0') env.reset() visualize_ice(env.env) print solvers.value_iteration(env.env, discount_factor=.9)[0] print solvers.policy_improvement(env.env, discount_factor=.9)[0]
from experiments.get_experiment import get_env from mdps.solvers import policy_eval, policy_improvement, value_iteration from mdps.visualize_policy import visualize_ice_policy, visualize_solution import numpy as np import matplotlib.pyplot as plt from mdps import evaluate_solutions name, env = get_env('taxi') pol, rewards, scores = value_iteration(env, discount_factor=.92) print env def experiment(current_env, eval_func): x, scores_expected, num_iters, scores_actual = [], [], [], [] for i in np.linspace(0.3, .98, 25): print i policy, score, iters = eval_func(current_env, discount_factor=i) # we just grab the score from the expected starting state scores_expected.append(score[0]) if iters > 9999: iters = 0 num_iters.append(iters) score_actual = evaluate_solutions(current_env, policy) print score_actual scores_actual.append(score_actual)