def test_PGPE(): distribution = learn(PGPE, learning_rate=AdaptiveParameter(1.5)) w = distribution.get_parameters() w_test = np.array([0.02489092, 0.31062211, 0.2051433, 0.05959651, -0.78302236, 0.77381954, 0.23676176, -0.29855654]) assert np.allclose(w, w_test)
def experiment(n_epochs, n_iterations, ep_per_run, save_states_to_disk): np.random.seed() # MDP mdp = LQR.generate(dimensions=2, max_pos=10., max_action=5., episodic=True) approximator = Regressor(LinearApproximator, input_shape=mdp.info.observation_space.shape, output_shape=mdp.info.action_space.shape) sigma = Regressor(LinearApproximator, input_shape=mdp.info.observation_space.shape, output_shape=mdp.info.action_space.shape) sigma_weights = 2 * np.ones(sigma.weights_size) sigma.set_weights(sigma_weights) policy = StateStdGaussianPolicy(approximator, sigma) # Agent learning_rate = AdaptiveParameter(value=.01) algorithm_params = dict(learning_rate=learning_rate) agent = REINFORCE(mdp.info, policy, **algorithm_params) # normalization callback prepro = MinMaxPreprocessor(mdp_info=mdp.info) # plotting callback plotter = PlotDataset(mdp.info, obs_normalized=True) # Train core = Core(agent, mdp, callback_step=plotter, preprocessors=[prepro]) # training loop for n in range(n_epochs): core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset = core.evaluate(n_episodes=ep_per_run, render=False) print('Epoch: ', n, ' J: ', np.mean(compute_J(dataset, mdp.info.gamma))) if save_states_to_disk: # save normalization / plot states to disk path os.makedirs("./temp/", exist_ok=True) prepro.save_state("./temp/normalization_state") plotter.save_state("./temp/plotting_state") # load states from disk path prepro.load_state("./temp/normalization_state") plotter.load_state("./temp/plotting_state")
def test_PGPE_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format( datetime.now().strftime("%H%M%S%f")) agent_save = learn(PGPE, learning_rate=AdaptiveParameter(1.5)) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_PGPE_save(): agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save = learn(PGPE, learning_rate=AdaptiveParameter(1.5)) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_GPOMDP_save(tmpdir): agent_path = tmpdir / 'agent_{}'.format( datetime.now().strftime("%H%M%S%f")) params = dict(learning_rate=AdaptiveParameter(value=.01)) agent_save = learn(GPOMDP, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) for att, method in vars(agent_save).items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) tu.assert_eq(save_attr, load_attr)
def test_GPOMDP_save(): params = dict(learning_rate=AdaptiveParameter(value=.01)) agent_path = './agentdir{}/'.format(datetime.now().strftime("%H%M%S%f")) agent_save = learn(GPOMDP, params) agent_save.save(agent_path) agent_load = Agent.load(agent_path) shutil.rmtree(agent_path) for att, method in agent_save.__dict__.items(): save_attr = getattr(agent_save, att) load_attr = getattr(agent_load, att) #print('{}: {}'.format(att, type(save_attr))) tu.assert_eq(save_attr, load_attr)
def experiment(alg, n_epochs, n_iterations, ep_per_run): np.random.seed() # MDP mdp = LQR.generate(dimensions=1) approximator = Regressor(LinearApproximator, input_shape=mdp.info.observation_space.shape, output_shape=mdp.info.action_space.shape) sigma = Regressor(LinearApproximator, input_shape=mdp.info.observation_space.shape, output_shape=mdp.info.action_space.shape) sigma_weights = 2 * np.ones(sigma.weights_size) sigma.set_weights(sigma_weights) policy = StateStdGaussianPolicy(approximator, sigma) # Agent learning_rate = AdaptiveParameter(value=.01) algorithm_params = dict(learning_rate=learning_rate) agent = alg(mdp.info, policy, **algorithm_params) # Train core = Core(agent, mdp) dataset_eval = core.evaluate(n_episodes=ep_per_run) print('policy parameters: ', policy.get_weights()) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at start : ' + str(np.mean(J))) for i in range(n_epochs): core.learn(n_episodes=n_iterations * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval = core.evaluate(n_episodes=ep_per_run) print('policy parameters: ', policy.get_weights()) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))
n_episodes_per_fit=n_ep_per_fit, render=False) J = compute_J(dataset_callback.get(), gamma=mdp.info.gamma) dataset_callback.clean() p = dist.get_parameters() print('mu: ', p[:n_weights]) print('sigma: ', p[n_weights:]) print('Reward at iteration ' + str(i) + ': ' + str(np.mean(J))) print('Press a button to visualize the segway...') input() core.evaluate(n_episodes=3, render=True) if __name__ == '__main__': algs_params = [ (REPS, { 'eps': 0.05 }), (RWR, { 'beta': 0.01 }), (PGPE, { 'learning_rate': AdaptiveParameter(value=0.3) }), ] for alg, params in algs_params: experiment(alg, params, n_epochs=20, n_episodes=100, n_ep_per_fit=25)
def test_eNAC(): params = dict(learning_rate=AdaptiveParameter(value=.01)) policy = learn(eNAC, params) w = np.array([-0.03668018, 2.05112355]) assert np.allclose(w, policy.get_weights())
def test_GPOMDP(): params = dict(learning_rate=AdaptiveParameter(value=.01)) policy = learn(GPOMDP, params) w = np.array([-0.07623939, 2.05232858]) assert np.allclose(w, policy.get_weights())
def test_REINFORCE(): params = dict(learning_rate=AdaptiveParameter(value=.01)) policy = learn(REINFORCE, params) w = np.array([-0.0084793, 2.00536528]) assert np.allclose(w, policy.get_weights())
distribution = GaussianCholeskyDistribution(mu, sigma) # Agent agent = alg(mdp.info, distribution, policy, **params) # Train core = Core(agent, mdp) dataset_eval = core.evaluate(n_episodes=ep_per_run) print('distribution parameters: ', distribution.get_parameters()) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at start : ' + str(np.mean(J))) for i in range(n_epochs): core.learn(n_episodes=fit_per_run * ep_per_run, n_episodes_per_fit=ep_per_run) dataset_eval = core.evaluate(n_episodes=ep_per_run) print('distribution parameters: ', distribution.get_parameters()) J = compute_J(dataset_eval, gamma=mdp.info.gamma) print('J at iteration ' + str(i) + ': ' + str(np.mean(J))) if __name__ == '__main__': learning_rate = AdaptiveParameter(value=0.05) algs = [REPS, RWR, PGPE] params = [{'eps': 0.5}, {'beta': 0.7}, {'learning_rate': learning_rate}] for alg, params in zip(algs, params): print(alg.__name__) experiment(alg, params, n_epochs=4, fit_per_run=10, ep_per_run=100)