def test_terminal_context(self): # Set up the agent param_funcs = { 'alpha': 0.05, 'gm': vcf.Constant(0.9999, 0), 'gm_p': vcf.Constant(0.9999, 0), 'lm': 0.1 } phi = vcf.BinaryVector(10) algo = vcf.TD(len(phi)) agent = vcf.Agent(algo, phi, param_funcs) # No base context base_ctx = {} term_ctx = agent.terminal_context(base_ctx) assert (isinstance(term_ctx, dict)) assert (term_ctx['done'] == True) assert (term_ctx['r'] == 0) assert (all(term_ctx['xp'] == 0)) # Nonsense base context (should still be present) base_ctx = {'__' + str(i): i**2 for i in range(10)} term_ctx = agent.terminal_context(base_ctx) assert (isinstance(term_ctx, dict)) assert (term_ctx['done'] == True) assert (term_ctx['r'] == 0) assert (all(term_ctx['xp'] == 0)) assert (all(key in term_ctx for key in base_ctx.keys())) assert (term_ctx[key] == val for key, val in base_ctx.items())
def test_setup(self): # Set up the agent param_funcs = { 'alpha': 0.05, 'gm': vcf.Constant(0.9999, 0), 'gm_p': vcf.Constant(0.9999, 0), 'lm': 0.1 } phi = vcf.BinaryVector(10) algo = vcf.TD(len(phi)) agent = vcf.Agent(algo, phi, param_funcs)
env = gym.make('MountainCar-v0') na = env.action_space.n # Tile coding for discretization to binary vectors tiling_1 = vcf.features.BinaryTiling(env.observation_space, 11) tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19) tiling_3 = vcf.features.BinaryTiling(env.observation_space, 31) # Concatenate binary vectors phi = vcf.Union(tiling_1, tiling_2, tiling_3) # Define the control (discrete actions Q-learning) dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002) dq_params = { 'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25), 'gm' : 0.9999, 'gm_p' : vcf.Constant(0.9999, 0), 'lm' : vcf.Constant(0.5, 0), } control = vcf.Agent(dq, phi, dq_params) # List of agents to update learners = [control] # Set up the experiment experiment = vcf.LiveExperiment(env, control, learners=learners) # Set up callbacks hist_cbk = vcf.callbacks.History() cbk_lst = [ vcf.callbacks.Progress(), hist_cbk,
# Parameters to search over base = { 'alpha': 0.001, 'gamma': 1.0, } vary = { 'lmbda': [0.0, 0.9, 1.0], 'lmbda_bar': [0, 0.9, 1.0], 'kappa': [0, 0.9, 1.0] } for params in parameter_search(base, vary): gamma = params['gamma'] lmbda = params['lmbda'] kappa = vcf.Constant(params['kappa']) kappa_p = vcf.Constant(params['kappa'], 0) lmbda_bar = params['lmbda_bar'] # Specify the parameters for the agents value_params = { 'alpha': params['alpha'], 'gm': vcf.Constant(gamma), 'gm_p': vcf.Constant(gamma, 0), 'lm': lmbda, 'lm_p': lmbda, } direct_params = { 'alpha': params['alpha'], 'gm': lambda x: (value_params['gm'](x) * kappa(x))**2,
self.episode['updates'].append(info['update_results']) def __str__(self): return json_tricks.dumps(self.hist, indent=2) # An example using the simple MDP if __name__ == "__main__" and True: import gym env = gym.make('SimpleMDP-v0') ns = env.observation_space.n na = env.action_space.n q_params = { 'alpha': vcf.Constant(0.01), 'gm': vcf.Constant(0.999, 0), 'gm_p': vcf.Constant(0.999, 0), 'lm': vcf.Constant(0.01, 0), } q_phi = vcf.BinaryVector(ns) q_algo = vcf.DiscreteQ(len(q_phi), na, epsilon=0.05) control = vcf.Agent(q_algo, q_phi, q_params) # Define some other agents that simply learn the value function phi1 = vcf.BinaryVector(ns) td_params = { 'alpha': vcf.Constant(0.01), 'gm': vcf.Constant(0.999, 0), 'gm_p': vcf.Constant(0.999, 0), 'lm': vcf.Constant(0.01, 0),
# The parameter search logger.info("Running parameter search...") for params in parameter_search(base, vary): print("Running experiments with λ={lmbda}, κ={kappa}, λ_bar={lmbda_bar}".format(**params)) _kappa = params['kappa'] _gamma = params['gamma'] _lmbar = params['lmbda_bar'] # Basename for output files basename = NAME_FMT.format(**params) value_params = { 'alpha': params['alpha'], 'gm': params['gamma'], 'gm_p': vcf.Constant(params['gamma'], 0), 'lm': params['lmbda'], 'lm_p': params['lmbda'], } direct_params = { 'alpha': params['alpha'], 'gm' : (params['gamma']*params['kappa'])**2, 'gm_p' : vcf.Constant((params['gamma']*params['kappa'])**2, 0), 'lm' : params['lmbda_bar'], 'lm_p' : params['lmbda_bar'], } second_params = { 'alpha': params['alpha'], 'gm' : (params['gamma']*params['kappa'])**2,