示例#1
0
def test_PGPE():
    distribution = learn(PGPE, learning_rate=AdaptiveParameter(1.5))
    w = distribution.get_parameters()
    w_test = np.array([0.02489092, 0.31062211, 0.2051433, 0.05959651,
                       -0.78302236, 0.77381954, 0.23676176, -0.29855654])

    assert np.allclose(w, w_test)
示例#2
0
def experiment(n_epochs, n_iterations, ep_per_run, save_states_to_disk):
    np.random.seed()

    # MDP
    mdp = LQR.generate(dimensions=2, max_pos=10., max_action=5., episodic=True)

    approximator = Regressor(LinearApproximator,
                             input_shape=mdp.info.observation_space.shape,
                             output_shape=mdp.info.action_space.shape)

    sigma = Regressor(LinearApproximator,
                      input_shape=mdp.info.observation_space.shape,
                      output_shape=mdp.info.action_space.shape)

    sigma_weights = 2 * np.ones(sigma.weights_size)
    sigma.set_weights(sigma_weights)

    policy = StateStdGaussianPolicy(approximator, sigma)

    # Agent
    learning_rate = AdaptiveParameter(value=.01)
    algorithm_params = dict(learning_rate=learning_rate)
    agent = REINFORCE(mdp.info, policy, **algorithm_params)

    # normalization callback
    prepro = MinMaxPreprocessor(mdp_info=mdp.info)

    # plotting callback
    plotter = PlotDataset(mdp.info, obs_normalized=True)

    # Train
    core = Core(agent, mdp, callback_step=plotter, preprocessors=[prepro])

    # training loop
    for n in range(n_epochs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset = core.evaluate(n_episodes=ep_per_run, render=False)
        print('Epoch: ', n, '  J: ', np.mean(compute_J(dataset,
                                                       mdp.info.gamma)))

    if save_states_to_disk:
        # save normalization / plot states to disk path
        os.makedirs("./temp/", exist_ok=True)
        prepro.save_state("./temp/normalization_state")
        plotter.save_state("./temp/plotting_state")

        # load states from disk path
        prepro.load_state("./temp/normalization_state")
        plotter.load_state("./temp/plotting_state")
示例#3
0
def test_PGPE_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(
        datetime.now().strftime("%H%M%S%f"))

    agent_save = learn(PGPE, learning_rate=AdaptiveParameter(1.5))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
示例#4
0
def test_PGPE_save():

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save = learn(PGPE, learning_rate=AdaptiveParameter(1.5))

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
示例#5
0
def test_GPOMDP_save(tmpdir):
    agent_path = tmpdir / 'agent_{}'.format(
        datetime.now().strftime("%H%M%S%f"))

    params = dict(learning_rate=AdaptiveParameter(value=.01))

    agent_save = learn(GPOMDP, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    for att, method in vars(agent_save).items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)

        tu.assert_eq(save_attr, load_attr)
示例#6
0
def test_GPOMDP_save():
    params = dict(learning_rate=AdaptiveParameter(value=.01))

    agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f"))

    agent_save = learn(GPOMDP, params)

    agent_save.save(agent_path)
    agent_load = Agent.load(agent_path)

    shutil.rmtree(agent_path)

    for att, method in agent_save.__dict__.items():
        save_attr = getattr(agent_save, att)
        load_attr = getattr(agent_load, att)
        #print('{}: {}'.format(att, type(save_attr)))

        tu.assert_eq(save_attr, load_attr)
示例#7
0
def experiment(alg, n_epochs, n_iterations, ep_per_run):
    np.random.seed()

    # MDP
    mdp = LQR.generate(dimensions=1)

    approximator = Regressor(LinearApproximator,
                             input_shape=mdp.info.observation_space.shape,
                             output_shape=mdp.info.action_space.shape)

    sigma = Regressor(LinearApproximator,
                      input_shape=mdp.info.observation_space.shape,
                      output_shape=mdp.info.action_space.shape)

    sigma_weights = 2 * np.ones(sigma.weights_size)
    sigma.set_weights(sigma_weights)

    policy = StateStdGaussianPolicy(approximator, sigma)

    # Agent
    learning_rate = AdaptiveParameter(value=.01)
    algorithm_params = dict(learning_rate=learning_rate)
    agent = alg(mdp.info, policy, **algorithm_params)

    # Train
    core = Core(agent, mdp)
    dataset_eval = core.evaluate(n_episodes=ep_per_run)
    print('policy parameters: ', policy.get_weights())
    J = compute_J(dataset_eval, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))

    for i in range(n_epochs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval = core.evaluate(n_episodes=ep_per_run)
        print('policy parameters: ', policy.get_weights())
        J = compute_J(dataset_eval, gamma=mdp.info.gamma)
        print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))
示例#8
0
                   n_episodes_per_fit=n_ep_per_fit,
                   render=False)
        J = compute_J(dataset_callback.get(), gamma=mdp.info.gamma)
        dataset_callback.clean()

        p = dist.get_parameters()

        print('mu:    ', p[:n_weights])
        print('sigma: ', p[n_weights:])
        print('Reward at iteration ' + str(i) + ': ' + str(np.mean(J)))

    print('Press a button to visualize the segway...')
    input()
    core.evaluate(n_episodes=3, render=True)


if __name__ == '__main__':
    algs_params = [
        (REPS, {
            'eps': 0.05
        }),
        (RWR, {
            'beta': 0.01
        }),
        (PGPE, {
            'learning_rate': AdaptiveParameter(value=0.3)
        }),
    ]
    for alg, params in algs_params:
        experiment(alg, params, n_epochs=20, n_episodes=100, n_ep_per_fit=25)
示例#9
0
def test_eNAC():
    params = dict(learning_rate=AdaptiveParameter(value=.01))
    policy = learn(eNAC, params)
    w = np.array([-0.03668018, 2.05112355])

    assert np.allclose(w, policy.get_weights())
示例#10
0
def test_GPOMDP():
    params = dict(learning_rate=AdaptiveParameter(value=.01))
    policy = learn(GPOMDP, params)
    w = np.array([-0.07623939, 2.05232858])

    assert np.allclose(w, policy.get_weights())
示例#11
0
def test_REINFORCE():
    params = dict(learning_rate=AdaptiveParameter(value=.01))
    policy = learn(REINFORCE, params)
    w = np.array([-0.0084793, 2.00536528])

    assert np.allclose(w, policy.get_weights())
示例#12
0
    distribution = GaussianCholeskyDistribution(mu, sigma)

    # Agent
    agent = alg(mdp.info, distribution, policy, **params)

    # Train
    core = Core(agent, mdp)
    dataset_eval = core.evaluate(n_episodes=ep_per_run)
    print('distribution parameters: ', distribution.get_parameters())
    J = compute_J(dataset_eval, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))

    for i in range(n_epochs):
        core.learn(n_episodes=fit_per_run * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval = core.evaluate(n_episodes=ep_per_run)
        print('distribution parameters: ', distribution.get_parameters())
        J = compute_J(dataset_eval, gamma=mdp.info.gamma)
        print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))


if __name__ == '__main__':
    learning_rate = AdaptiveParameter(value=0.05)

    algs = [REPS, RWR, PGPE]
    params = [{'eps': 0.5}, {'beta': 0.7}, {'learning_rate': learning_rate}]

    for alg, params in zip(algs, params):
        print(alg.__name__)
        experiment(alg, params, n_epochs=4, fit_per_run=10, ep_per_run=100)