Exemplo n.º 1
0
    def test_run(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Try running the experiment
        num_eps = 10
        max_steps = 10

        experiment.run(num_eps, max_steps, callbacks=[])
Exemplo n.º 2
0
    def test_run_with_callbacks(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Set up testing callbacks
        cbk = _CheckCallback()

        # Try running the experiment
        num_eps = 10
        max_steps = 10
        experiment.run(num_eps, max_steps, callbacks=[cbk])

        # Check that the callbacks ran properly
        assert (cbk.experiment_begin > 0)
        assert (cbk.experiment_end > 0)
        assert (cbk.episode_begin > 0)
        assert (cbk.episode_end > 0)
        assert (cbk.step_begin > 0)
        assert (cbk.step_end > 0)
Exemplo n.º 3
0
    def test_terminal_context(self):
        # Set up the agent
        param_funcs = {
            'alpha': 0.05,
            'gm': vcf.Constant(0.9999, 0),
            'gm_p': vcf.Constant(0.9999, 0),
            'lm': 0.1
        }
        phi = vcf.BinaryVector(10)
        algo = vcf.TD(len(phi))
        agent = vcf.Agent(algo, phi, param_funcs)

        # No base context
        base_ctx = {}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))

        # Nonsense base context (should still be present)
        base_ctx = {'__' + str(i): i**2 for i in range(10)}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))
        assert (all(key in term_ctx for key in base_ctx.keys()))
        assert (term_ctx[key] == val for key, val in base_ctx.items())
Exemplo n.º 4
0
 def test_setup(self):
     # Set up the agent
     param_funcs = {
         'alpha': 0.05,
         'gm': vcf.Constant(0.9999, 0),
         'gm_p': vcf.Constant(0.9999, 0),
         'lm': 0.1
     }
     phi = vcf.BinaryVector(10)
     algo = vcf.TD(len(phi))
     agent = vcf.Agent(algo, phi, param_funcs)
Exemplo n.º 5
0
    # Tile coding for discretization to binary vectors
    tiling_1    = vcf.features.BinaryTiling(env.observation_space, 11)
    tiling_2    = vcf.features.BinaryTiling(env.observation_space, 19)
    tiling_3    = vcf.features.BinaryTiling(env.observation_space, 31)
    # Concatenate binary vectors
    phi         = vcf.Union(tiling_1, tiling_2, tiling_3)

    # Define the control (discrete actions Q-learning)
    dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002)
    dq_params = {
        'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25),
        'gm'    : 0.9999,
        'gm_p'  : vcf.Constant(0.9999, 0),
        'lm'    : vcf.Constant(0.5, 0),
    }
    control = vcf.Agent(dq, phi, dq_params)

    # List of agents to update
    learners = [control]

    # Set up the experiment
    experiment = vcf.LiveExperiment(env, control, learners=learners)

    # Set up callbacks
    hist_cbk = vcf.callbacks.History()
    cbk_lst = [
        vcf.callbacks.Progress(),
        hist_cbk,
    ]
    # Run the experiment
    experiment.run(150, 2000, callbacks=cbk_lst)
            v_lm = kappa(ctx)
            v_lm_p = kappa_p(ctx)
            v_nxt = value_agent.get_value(ctx['obs_p'])

            # Compute next "reward"
            g_bar = ctx['r'] + v_gm_p * (1 - v_lm_p) * v_nxt
            r_bar = g_bar**2 + 2 * v_gm_p * v_lm_p * g_bar * v_nxt
            return r_bar

        # Set up storage for run data
        frames = []

        # Perform multiple runs of the given number of episodes
        for run in range(num_runs):
            # Set up (or reset) the agents
            value_agent = vcf.Agent(vcf.algos.TD(num_states), phi,
                                    value_params)
            direct_agent = vcf.Agent(vcf.algos.TD(num_states),
                                     phi,
                                     direct_params,
                                     reward_func=direct_reward)
            second_agent = vcf.Agent(vcf.algos.TD(num_states),
                                     phi,
                                     second_params,
                                     reward_func=second_moment_reward)

            # Set up the experiment
            learners = [direct_agent, second_agent, value_agent]
            experiment = vcf.LiveExperiment(env, control, learners=learners)

            # Set up callbacks to record runs
            exclusions = ['x', 'xp']
Exemplo n.º 7
0
    tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19)
    bias = vcf.features.BiasUnit()
    # Concatenate binary vectors
    phi = vcf.Union(bias, tiling_1, tiling_2)

    # Parameters for the agent
    td_params = {
        'alpha': vcf.parameters.EpisodicPowerLaw(0.15, 0.5),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.1, 0),
    }
    # Specify the algorithm
    algo = vcf.algos.TD(len(phi))
    # Combine into agent
    agent = vcf.Agent(algo, phi, td_params)

    # List of agents to update
    learners = [agent]

    # Set up the experiment
    experiment = vcf.LiveExperiment(env, control, learners=learners)

    # Set up callbacks
    hist_cbk = vcf.callbacks.History()
    cbk_lst = [
        vcf.callbacks.Progress(),
        hist_cbk,
    ]

    # Initialize via grid-search
Exemplo n.º 8
0
if __name__ == "__main__" and True:
    import gym

    env = gym.make('SimpleMDP-v0')
    ns = env.observation_space.n
    na = env.action_space.n

    q_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
    }
    q_phi = vcf.BinaryVector(ns)
    q_algo = vcf.DiscreteQ(len(q_phi), na, epsilon=0.05)
    control = vcf.Agent(q_algo, q_phi, q_params)

    # Define some other agents that simply learn the value function
    phi1 = vcf.BinaryVector(ns)
    td_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
    }
    td_agent1 = vcf.Agent(vcf.TD(len(phi1)), phi1, td_params)

    phi2 = vcf.BiasUnit()
    td_params2 = {
        'alpha': 0.01,
        'gm': 0.9,