Exemplo n.º 1
0
    def test_run(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Try running the experiment
        num_eps = 10
        max_steps = 10

        experiment.run(num_eps, max_steps, callbacks=[])
Exemplo n.º 2
0
    def test_run_with_callbacks(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Set up testing callbacks
        cbk = _CheckCallback()

        # Try running the experiment
        num_eps = 10
        max_steps = 10
        experiment.run(num_eps, max_steps, callbacks=[cbk])

        # Check that the callbacks ran properly
        assert (cbk.experiment_begin > 0)
        assert (cbk.experiment_end > 0)
        assert (cbk.episode_begin > 0)
        assert (cbk.episode_end > 0)
        assert (cbk.step_begin > 0)
        assert (cbk.step_end > 0)
Exemplo n.º 3
0
    tiling_2    = vcf.features.BinaryTiling(env.observation_space, 19)
    tiling_3    = vcf.features.BinaryTiling(env.observation_space, 31)
    # Concatenate binary vectors
    phi         = vcf.Union(tiling_1, tiling_2, tiling_3)

    # Define the control (discrete actions Q-learning)
    dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002)
    dq_params = {
        'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25),
        'gm'    : 0.9999,
        'gm_p'  : vcf.Constant(0.9999, 0),
        'lm'    : vcf.Constant(0.5, 0),
    }
    control = vcf.Agent(dq, phi, dq_params)

    # List of agents to update
    learners = [control]

    # Set up the experiment
    experiment = vcf.LiveExperiment(env, control, learners=learners)

    # Set up callbacks
    hist_cbk = vcf.callbacks.History()
    cbk_lst = [
        vcf.callbacks.Progress(),
        hist_cbk,
    ]
    # Run the experiment
    experiment.run(150, 2000, callbacks=cbk_lst)

Exemplo n.º 4
0
 def test_init(self):
     env = gym.make('MountainCar-v0')
     pol = vcf.DiscreteRandomControl(env.action_space.n)
     agents = []
     experiment = vcf.LiveExperiment(env, pol, agents)