settings["lambda_"] = 0 fa = FA_task1() tdAgent = Tdlambda_replacing(settings, fa) # Environment n_states = 11 transition_prob = 0.9 walker = Randomwalk(n_states, transition_prob) # Experiment confg = {} confg["episodes"] = 10 confg["independent_runs"] = 25 alpha_vect = np.arange(0, 1.6, 0.1) lambda_vect = np.arange(0, 1.1, 0.1) rmse = np.zeros((len(lambda_vect), len(alpha_vect))) # Loop over the lambda and alpha parameters for i, lambda_ in enumerate(lambda_vect): for j, alpha in enumerate(alpha_vect): setattr(tdAgent, "lambda_", lambda_) setattr(tdAgent, "alpha", alpha) rmse[i][j] = performance(tdAgent, walker, confg) print rmse plot_curves(alpha_vect, rmse, save=1, name="TDlambda_replacing_task1")
from agent import Td from functionApproximator import FA_task1 from environment import Randomwalk from exputils import param_sweep, plot_curves # Agent settings = {} settings["gamma"] = 0.99 settings["alpha"] = 1 fa = FA_task1() tdAgent = Td(settings, fa) # Environment n_states = 11 transition_prob = 0.9 walker = Randomwalk(n_states, transition_prob) # Experiment confg = {} confg["episodes"] = 10 confg["independent_runs"] = 20 param = "alpha" alpha_vect = np.arange(0, 1.6, 0.1) rmse = param_sweep(tdAgent, walker, confg, param, alpha_vect) print rmse plot_curves(alpha_vect, rmse)
settings["lambda_"] = 0 fa = FA_task1() tdAgent = True_online_Tdlambda(settings, fa) # Environment n_states = 11 transition_prob = 0.9 walker = Randomwalk(n_states, transition_prob) # Experiment confg = {} confg["episodes"] = 10 confg["independent_runs"] = 25 alpha_vect = np.arange(0, 1.6, 0.1) lambda_vect = np.arange(0, 1.1, 0.1) rmse = np.zeros((len(lambda_vect), len(alpha_vect))) # Loop over the lambda and alpha parameters for i, lambda_ in enumerate(lambda_vect): for j, alpha in enumerate(alpha_vect): setattr(tdAgent, "lambda_", lambda_) setattr(tdAgent, "alpha", alpha) rmse[i][j] = performance(tdAgent, walker, confg) print rmse plot_curves(alpha_vect, rmse, save=1, name="TrueOnlineTD_task1")