def test_fdcheck_dlogpi(): """Finite differences check for the dlogpi of the gibbs policy""" logger = Logger() domain = GridWorld() representation = Tabular(logger=logger, domain=domain, discretization=20) policy = GibbsPolicy(representation=representation, logger=logger) def f(theta, s, a): policy.representation.theta = theta return np.log(policy.prob(s, a)) def df(theta, s, a): policy.representation.theta = theta return policy.dlogpi(s, a) def df_approx(theta, s, a): return approx_fprime(theta, f, 1e-10, s, a) thetas = np.random.rand(10, len(representation.theta)) for i in range(10): s = np.array([np.random.randint(4), np.random.randint(5)]) a = np.random.choice(domain.possibleActions(s)) for theta in thetas: # print "s", s # print "a", a # print "f", f(theta, s, a) # print "df", df(theta, s, a) # print "df_approx", df_approx(theta, s, a) error = check_grad(f, df, theta, s, a) print error assert np.abs(error) < 1e-6
def makeComponents(self): map_type = str(self.lstMap.currentItem().text()) noise = self.spNoise.value() maze = os.path.join(GridWorld.default_map_dir, map_type + '.txt') domain = GridWorld(maze, noise=noise) domain.GOAL_REWARD = self.spGoalReward.value() domain.PIT_REWARD = self.spPitReward.value() domain.STEP_REWARD = self.spStepReward.value() representation = RepresentationFactory.get( config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str( self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def test_bag_creation(): """ Ensure create appropriate # of conjunctions, that they have been instantiated properly, and there are no duplicates. """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) initial_representation = IndependentDiscretization(domain) maxBatchDiscovery=np.inf batchThreshold=1e-10 discretization=20 bagSize=100000 # We add all possible features rep = OMPTD(domain, initial_representation, discretization, maxBatchDiscovery, batchThreshold, bagSize, sparsify=False) assert rep.totalFeatureSize == 9+20 assert rep.features_num == 9 # Compute full (including non-discovered) feature vec for a few states states = np.array([[0,0], [0,1], [1,0], [1,1]]) s0_unused = domain.s0() # just to initialize domain.state, etc rep.calculateFullPhiNormalized(states) phi_states = rep.fullphi phi_states[phi_states>0] = True true_phi_s1 = np.zeros(len(phi_states[0,:])) true_phi_s1[0] = True true_phi_s1[4] = True # TODO - could be [4] depending on axes, check. true_phi_s1[9] = True # The conjunction of [0,0] assert np.all(true_phi_s1 == phi_states[0,:]) # expected feature vec returned assert sum(phi_states[0,:]) == 3 # 2 original basic feats and 1 conjunction true_phi_s2 = np.zeros(len(phi_states[0,:])) true_phi_s2[0] = True true_phi_s2[5] = True # TODO - could be [4] depending on axes, check. true_phi_s2[10] = True # The conjunction of [0,0] assert np.all(true_phi_s2 == phi_states[1,:]) # expected feature vec returned assert sum(phi_states[1,:]) == 3 # 2 original basic feats and 1 conjunction true_phi_s3 = np.zeros(len(phi_states[0,:])) true_phi_s3[1] = True true_phi_s3[4] = True # TODO - could be [4] depending on axes, check. true_phi_s3[14] = True # The conjunction of [0,0] assert np.all(true_phi_s3 == phi_states[2,:]) # expected feature vec returned assert sum(phi_states[2,:]) == 3 # 2 original basic feats and 1 conjunction true_phi_s4 = np.zeros(len(phi_states[0,:])) true_phi_s4[1] = True true_phi_s4[5] = True # TODO - could be [4] depending on axes, check. true_phi_s4[15] = True # The conjunction of [0,0] assert np.all(true_phi_s4 == phi_states[3,:]) # expected feature vec returned assert sum(phi_states[3,:]) == 3 # 2 original basic feats and 1 conjunction
def test_batch_discovery(): """ Test feature discovery from features available in bag, and that appropriate feats are activiated in later calls to phi_nonterminal() """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) s0_unused = domain.s0() # just to initialize domain.state, etc initial_representation = IndependentDiscretization(domain) maxBatchDiscovery = np.inf batchThreshold = 1e-10 discretization = 20 bagSize = 100000 # We add all possible features rep = OMPTD(domain, initial_representation, discretization, maxBatchDiscovery, batchThreshold, bagSize, sparsify=False) states = np.array([[0, 0], [0, 2]]) activePhi_s1 = rep.phi_nonTerminal(states[0, :]) activePhi_s2 = rep.phi_nonTerminal(states[1, :]) phiMatr = np.zeros((2, len(activePhi_s1))) phiMatr[0, :] = activePhi_s1 phiMatr[1, :] = activePhi_s2 td_errors = np.array([2, 5]) flagAddedFeat = rep.batchDiscover(td_errors, phiMatr, states) assert flagAddedFeat # should have added at least one assert rep.selectedFeatures[-1] == 9 # feat conj that yields state [0,2] assert rep.selectedFeatures[-2] == 11 # feat conj that yields state [0,0] # Ensure that discovered features are now active true_phi_s1 = np.zeros(rep.features_num) true_phi_s1[0] = True true_phi_s1[4] = True # TODO - could be [4] depending on axes, check. true_phi_s1[10] = True # The conjunction of [0,0] assert np.all(true_phi_s1 == rep.phi_nonTerminal(states[0, :])) true_phi_s2 = np.zeros(rep.features_num) true_phi_s2[0] = True true_phi_s2[6] = True # TODO - could be [4] depending on axes, check. true_phi_s2[ 9] = True # The conjunction of [0,2] [[note actual id is 11, but in index 10]] assert np.all(true_phi_s2 == rep.phi_nonTerminal(states[1, :]))
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id ## Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrTabularTut(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def test_cell_expansion(): """ Ensure start with 0 cells, add one for each state uniquely. """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) rep = IncrementalTabular(domain, discretization=100) assert rep.features_num == 0 # start with 0 cells sOrigin = np.array([0, 0]) s2 = np.array([1, 2]) terminal = False # nonterminal state a = 1 # arbitrary action # Expect to add feats for these newly seen states numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal) assert numAdded == 2 assert rep.features_num == 2 phiVecOrigin = rep.phi(sOrigin, terminal) phiVec2 = rep.phi(s2, terminal) assert sum(phiVecOrigin) == 1 assert sum(phiVec2) == 1 phiVecOrigin2 = rep.phi(np.array([0, 0]), terminal=False) assert rep.features_num == 2 # didn't duplicate the feature assert sum(phiVecOrigin2) == 1 # Make sure we dont duplicate feats anywhere numAdded = rep.pre_discover(np.array([0, 0]), terminal, a, s2, terminal) assert numAdded == 0 assert rep.features_num == 2
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def test_number_of_cells(): """ Ensure create appropriate # of cells (despite ``discretization``) """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) memory = 30 # Determines number of feats; it is the size of cache num_tilings = [ 2, ] # has 2 tilings resolutions = [ 4, ] # resolution of staterange / 4 dimensions = [ [0, 1], ] # tiling over dimensions 0 and 1 rep = TileCoding(domain, memory, num_tilings, resolutions, resolution_matrix=None, dimensions=dimensions, safety="super") # super safety prevents any collisions assert rep.features_num == memory
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 10 # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = GibbsPolicy(representation) # Agent opt["agent"] = NaturalActorCritic(policy, representation, domain.discount_factor, 0.3, 100, 1000, .7, 0.1) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp", show=False): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) # Representation representation = Tabular(domain, discretization=20) # MDP Solver agent = PolicyIteration(exp_id, representation, domain, project_path=path, show=show) return MDPSolverExperiment(agent, domain)
def test_batch_discovery(): """ Test feature discovery from features available in bag, and that appropriate feats are activiated in later calls to phi_nonterminal() """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) s0_unused = domain.s0() # just to initialize domain.state, etc initial_representation = IndependentDiscretization(domain) maxBatchDiscovery=np.inf batchThreshold=1e-10 discretization=20 bagSize=100000 # We add all possible features rep = OMPTD(domain, initial_representation, discretization, maxBatchDiscovery, batchThreshold, bagSize, sparsify=False) states = np.array([[0,0], [0,2]]) activePhi_s1 = rep.phi_nonTerminal(states[0,:]) activePhi_s2 = rep.phi_nonTerminal(states[1,:]) phiMatr = np.zeros(( 2, len(activePhi_s1) )) phiMatr[0,:] = activePhi_s1 phiMatr[1,:] = activePhi_s2 td_errors = np.array([2, 5]) flagAddedFeat = rep.batchDiscover(td_errors, phiMatr, states) assert flagAddedFeat # should have added at least one assert rep.selectedFeatures[-1] == 9 # feat conj that yields state [0,2] assert rep.selectedFeatures[-2] == 11 # feat conj that yields state [0,0] # Ensure that discovered features are now active true_phi_s1 = np.zeros(rep.features_num) true_phi_s1[0] = True true_phi_s1[4] = True # TODO - could be [4] depending on axes, check. true_phi_s1[10] = True # The conjunction of [0,0] assert np.all(true_phi_s1 == rep.phi_nonTerminal(states[0,:])) true_phi_s2 = np.zeros(rep.features_num) true_phi_s2[0] = True true_phi_s2[6] = True # TODO - could be [4] depending on axes, check. true_phi_s2[9] = True # The conjunction of [0,2] [[note actual id is 11, but in index 10]] assert np.all(true_phi_s2 == rep.phi_nonTerminal(states[1,:]))
def test_number_of_cells(): """ Ensure create appropriate # of cells (despite ``discretization``) """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) rep = Tabular(domain, discretization=100) assert rep.features_num == 20 rep = Tabular(domain, discretization=5) assert rep.features_num == 20
def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000000 opt["num_policy_checks"] = 50 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation discover_threshold = 1. lambda_ = 0.3 initial_learn_rate = 0.11 boyan_N0 = 100 initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=True, useCache=True, lazy=True, lambda_=lambda_) # Policy policy = eGreedyDecay(representation, epsilonInit=0.9) # Agent opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def makeComponents(self): map_type = str(self.lstMap.currentItem().text()) noise = float(self.spNoise.value()) maze = os.path.join(GridWorld.default_map_dir, map_type+'.txt') domain = GridWorld(maze, noise=noise) domain.GOAL_REWARD = float(self.spGoalReward.value()) domain.PIT_REWARD = float(self.spPitReward.value()) domain.STEP_REWARD = float(self.spStepReward.value()) representation = RepresentationFactory.get(config=self.representationConfig, name=str(self.lstRepresentation.currentItem().text()), domain=domain) policy = PolicyFactory.get(config=self.policyConfig, name=str(self.lstPolicy.currentItem().text()), representation=representation) agent = AgentFactory.get(config=self.agentConfig, name=str(self.lstAgent.currentItem().text()), representation=representation, policy=policy) return domain, agent
def test_phi_cells(): """ Ensure correct features are activated for corresponding state """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) rep = IndependentDiscretization(domain) for r in np.arange(4): for c in np.arange(5): phiVec = rep.phi(np.array([r, c]), terminal=False) assert sum(phiVec) == 2 # 1 for each dimension assert phiVec[r] == 1 # correct row activated assert phiVec[4 + c] == 1 # correct col activated
def test_phi_cells(): """ Ensure correct feature is activated for corresponding state """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) # Allow internal represnetation to change -- just make sure each state has # a unique id that is consistently activated. rep = Tabular(domain) seenStates = -1 * np.ones(rep.features_num) for r in np.arange(4): for c in np.arange(5): phiVec = rep.phi(np.array([r,c]), terminal=False) assert sum(phiVec) == 1 # only 1 active feature activeInd = np.where(phiVec > 0) assert seenStates[activeInd] != True # havent seen it before seenStates[activeInd] = True assert np.all(seenStates == True) # we've covered all states
def make_experiment(exp_id=2, path="./Results/MetaRLSarsa", boyan_N0=680.715, discount_factor=0.9, initial_learn_rate=1, lambda_=0.106): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 50 opt["checks_per_policy"] = 100 # start_at = np.array([4, 5]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/") maze = os.path.join(map_dir, "12x12-Bridge.txt") print maze domain = GridWorld(maze, # random_start=True, noise=0.1, # start_at=np.array([4,6]) ) representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.3) opt['agent'] = Q_Learning(representation=representation, policy=policy, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0, lambda_=lambda_, initial_learn_rate=initial_learn_rate, discount_factor=discount_factor) opt['domain'] = domain experiment = Experiment(**opt) # print opt return experiment
def make_experiment(batch_id, exp_id, grid, max_steps=10000, weight_vec=None): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ path = ('./Results/Experiment%d' % batch_id) opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: domain = GridWorld(grid, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0) opt["checks_per_policy"] = 10 opt["max_steps"] = max_steps opt["num_policy_checks"] = 1 experiment = Experiment(**opt) # Insert weight vector as start point if provided. if weight_vec != None: representation.weight_vec = weight_vec return experiment
def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 50 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join('.', '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain) # Policy policy = eGreedy(representation, epsilon=0.1) # Agent opt["agent"] = LSPI(policy, representation, domain.discount_factor, opt["max_steps"], 1000) experiment = Experiment(**opt) return experiment
from rlpy.Agents import Agent, LSPI, NaturalActorCritic, Q_Learning from rlpy.Domains import GridWorld from rlpy.Representations import iFDDK, IndependentDiscretization, Tabular from rlpy.Policies import eGreedy, GibbsPolicy import os from typing import Optional from common import run_cli MAZE = os.path.join(GridWorld.default_map_dir, '11x11-Rooms.txt') DOMAIN = GridWorld(MAZE, noise=0.3) MAX_STEPS = 10000 def select_agent(name: Optional[str], _seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'nac': return NaturalActorCritic(GibbsPolicy(tabular), tabular, DOMAIN.discount_factor, forgetting_rate=0.3, min_steps_between_updates=100, max_steps_between_updates=1000, lambda_=0.7, learn_rate=0.1) elif name == 'tabular-q': return Q_Learning( eGreedy(tabular, epsilon=0.1),
def test_bag_creation(): """ Ensure create appropriate # of conjunctions, that they have been instantiated properly, and there are no duplicates. """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) initial_representation = IndependentDiscretization(domain) maxBatchDiscovery = np.inf batchThreshold = 1e-10 discretization = 20 bagSize = 100000 # We add all possible features rep = OMPTD(domain, initial_representation, discretization, maxBatchDiscovery, batchThreshold, bagSize, sparsify=False) assert rep.totalFeatureSize == 9 + 20 assert rep.features_num == 9 # Compute full (including non-discovered) feature vec for a few states states = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) s0_unused = domain.s0() # just to initialize domain.state, etc rep.calculateFullPhiNormalized(states) phi_states = rep.fullphi phi_states[phi_states > 0] = True true_phi_s1 = np.zeros(len(phi_states[0, :])) true_phi_s1[0] = True true_phi_s1[4] = True # TODO - could be [4] depending on axes, check. true_phi_s1[9] = True # The conjunction of [0,0] assert np.all( true_phi_s1 == phi_states[0, :]) # expected feature vec returned assert sum( phi_states[0, :]) == 3 # 2 original basic feats and 1 conjunction true_phi_s2 = np.zeros(len(phi_states[0, :])) true_phi_s2[0] = True true_phi_s2[5] = True # TODO - could be [4] depending on axes, check. true_phi_s2[10] = True # The conjunction of [0,0] assert np.all( true_phi_s2 == phi_states[1, :]) # expected feature vec returned assert sum( phi_states[1, :]) == 3 # 2 original basic feats and 1 conjunction true_phi_s3 = np.zeros(len(phi_states[0, :])) true_phi_s3[1] = True true_phi_s3[4] = True # TODO - could be [4] depending on axes, check. true_phi_s3[14] = True # The conjunction of [0,0] assert np.all( true_phi_s3 == phi_states[2, :]) # expected feature vec returned assert sum( phi_states[2, :]) == 3 # 2 original basic feats and 1 conjunction true_phi_s4 = np.zeros(len(phi_states[0, :])) true_phi_s4[1] = True true_phi_s4[5] = True # TODO - could be [4] depending on axes, check. true_phi_s4[15] = True # The conjunction of [0,0] assert np.all( true_phi_s4 == phi_states[3, :]) # expected feature vec returned assert sum( phi_states[3, :]) == 3 # 2 original basic feats and 1 conjunction