예제 #1
0
    def test_terminal_context(self):
        # Set up the agent
        param_funcs = {
            'alpha': 0.05,
            'gm': vcf.Constant(0.9999, 0),
            'gm_p': vcf.Constant(0.9999, 0),
            'lm': 0.1
        }
        phi = vcf.BinaryVector(10)
        algo = vcf.TD(len(phi))
        agent = vcf.Agent(algo, phi, param_funcs)

        # No base context
        base_ctx = {}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))

        # Nonsense base context (should still be present)
        base_ctx = {'__' + str(i): i**2 for i in range(10)}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))
        assert (all(key in term_ctx for key in base_ctx.keys()))
        assert (term_ctx[key] == val for key, val in base_ctx.items())
예제 #2
0
 def test_setup(self):
     # Set up the agent
     param_funcs = {
         'alpha': 0.05,
         'gm': vcf.Constant(0.9999, 0),
         'gm_p': vcf.Constant(0.9999, 0),
         'lm': 0.1
     }
     phi = vcf.BinaryVector(10)
     algo = vcf.TD(len(phi))
     agent = vcf.Agent(algo, phi, param_funcs)
예제 #3
0
    env = gym.make('MountainCar-v0')
    na = env.action_space.n

    # Tile coding for discretization to binary vectors
    tiling_1    = vcf.features.BinaryTiling(env.observation_space, 11)
    tiling_2    = vcf.features.BinaryTiling(env.observation_space, 19)
    tiling_3    = vcf.features.BinaryTiling(env.observation_space, 31)
    # Concatenate binary vectors
    phi         = vcf.Union(tiling_1, tiling_2, tiling_3)

    # Define the control (discrete actions Q-learning)
    dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002)
    dq_params = {
        'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25),
        'gm'    : 0.9999,
        'gm_p'  : vcf.Constant(0.9999, 0),
        'lm'    : vcf.Constant(0.5, 0),
    }
    control = vcf.Agent(dq, phi, dq_params)

    # List of agents to update
    learners = [control]

    # Set up the experiment
    experiment = vcf.LiveExperiment(env, control, learners=learners)

    # Set up callbacks
    hist_cbk = vcf.callbacks.History()
    cbk_lst = [
        vcf.callbacks.Progress(),
        hist_cbk,
    # Parameters to search over
    base = {
        'alpha': 0.001,
        'gamma': 1.0,
    }

    vary = {
        'lmbda': [0.0, 0.9, 1.0],
        'lmbda_bar': [0, 0.9, 1.0],
        'kappa': [0, 0.9, 1.0]
    }

    for params in parameter_search(base, vary):
        gamma = params['gamma']
        lmbda = params['lmbda']
        kappa = vcf.Constant(params['kappa'])
        kappa_p = vcf.Constant(params['kappa'], 0)
        lmbda_bar = params['lmbda_bar']

        # Specify the parameters for the agents
        value_params = {
            'alpha': params['alpha'],
            'gm': vcf.Constant(gamma),
            'gm_p': vcf.Constant(gamma, 0),
            'lm': lmbda,
            'lm_p': lmbda,
        }

        direct_params = {
            'alpha': params['alpha'],
            'gm': lambda x: (value_params['gm'](x) * kappa(x))**2,
예제 #5
0
        self.episode['updates'].append(info['update_results'])

    def __str__(self):
        return json_tricks.dumps(self.hist, indent=2)


# An example using the simple MDP
if __name__ == "__main__" and True:
    import gym

    env = gym.make('SimpleMDP-v0')
    ns = env.observation_space.n
    na = env.action_space.n

    q_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
    }
    q_phi = vcf.BinaryVector(ns)
    q_algo = vcf.DiscreteQ(len(q_phi), na, epsilon=0.05)
    control = vcf.Agent(q_algo, q_phi, q_params)

    # Define some other agents that simply learn the value function
    phi1 = vcf.BinaryVector(ns)
    td_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
# The parameter search
logger.info("Running parameter search...")
for params in parameter_search(base, vary):
    print("Running experiments with λ={lmbda}, κ={kappa}, λ_bar={lmbda_bar}".format(**params))
    _kappa = params['kappa']
    _gamma = params['gamma']
    _lmbar = params['lmbda_bar']

    # Basename for output files
    basename = NAME_FMT.format(**params)

    value_params = {
        'alpha': params['alpha'],
        'gm': params['gamma'],
        'gm_p': vcf.Constant(params['gamma'], 0),
        'lm': params['lmbda'],
        'lm_p': params['lmbda'],
    }

    direct_params = {
        'alpha': params['alpha'],
        'gm'   : (params['gamma']*params['kappa'])**2,
        'gm_p' : vcf.Constant((params['gamma']*params['kappa'])**2, 0),
        'lm'   : params['lmbda_bar'],
        'lm_p' : params['lmbda_bar'],
    }

    second_params = {
        'alpha': params['alpha'],
        'gm'   : (params['gamma']*params['kappa'])**2,