import numpy as np from gym.envs.toy_text import gridworld_cont from util.util_cgridworld import * from util.policy_gaussian import * from util.learner import * np.set_printoptions(precision=6) np.set_printoptions(suppress=True) mdp = gridworld_cont.GridworldContEnv(changeProbs=[0.5, 1], posBounds1=[-1, -1, 2, 2], posBounds2=[-1, -1, -1, -1], goalBounds1=[-2.5, 2.5, 0, 2.5]) mdp.horizon = 50 sfMask = np.ones(shape=50, dtype=bool) # state features mask sfMask[40:50] = False # AGENT SPACE agent_policy = GaussianPolicy(np.count_nonzero(sfMask), 2) agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98) clearn(agent_learner, steps=0, nEpisodes=500, sfmask=sfMask, adamOptimizer=True, learningRate=0.03, loadFile=None, saveFile=None, autosave=True,
import numpy as np from gym.envs.toy_text import gridworld_cont from util.util_cgridworld import * from util.policy_gaussian import * from util.learner import * np.set_printoptions(precision=6) np.set_printoptions(suppress=True) mdp = gridworld_cont.GridworldContEnv() mdp.horizon = 50 sfMask = np.ones(shape=50, dtype=bool) # state features mask sfMask[30:50] = False # AGENT SPACE agent_policy = GaussianPolicy(nStateFeatures=np.count_nonzero(sfMask), actionDim=2) agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98) clearn( agent_learner, steps=25, nEpisodes=100, sfmask=sfMask, adamOptimizer=True, learningRate=0.1, loadFile=None, #"cparams30.npy", saveFile=None, autosave=True, plotGradient=False)
import scipy as sp from gym.envs.toy_text import gridworld_cont_normal from gym.envs.toy_text import gridworld_cont from util.util_cgridworld import * from util.policy_gaussian import * from util.learner import * np.set_printoptions(precision=6) np.set_printoptions(suppress=True) mean_model1 = [-2,-2,2,2] var_model1 = [0.1,0.1,0.1,0.1] mdp = gridworld_cont_normal.GridworldContNormalEnv(mean=mean_model1,var=var_model1) mdp.horizon = 50 mdp_uniform = gridworld_cont.GridworldContEnv() mdp_uniform.horizon = 50 sfMask = np.ones(shape=50,dtype=np.bool) # state features mask sfMask[45:50] = False # AGENT SPACE agent_policy = GaussianPolicy(np.count_nonzero(sfMask),2) agent_learner = GpomdpLearner(mdp,agent_policy,gamma=0.98) clearn( agent_learner, steps=100, nEpisodes=250, sfmask=sfMask, adamOptimizer=True,