def __init__(self, model, window_size=32, *args, **kwargs): self.initial_model = model # Initial model provided self.model = GMM() self.model.copy_model(self.initial_model) # Model used for training self.window_size = window_size self.burn_in_steps = 1000 super(SAC_GMM_Agent, self).__init__(*args, **kwargs)
def main(cfg): logger = logging.getLogger('tacto.renderer') logger.propagate = False env = custom_sawyer_peg_env(cfg.env) for model_name in cfg.model_names: print(model_name) model = GMM(add_cwd(model_name)) accuracy, mean_return, mean_length = model.evaluate(env=env, **cfg.test) logger = logging.getLogger(__name__) logger.info("Accuracy: %.2f, Mean return: %.2f, mean length: %.2f" % (accuracy, mean_return, mean_length))
def main(cfg): logger = logging.getLogger('tacto.renderer') logger.propagate = False logger = logging.getLogger('env.sawyer_peg_env') logger.propagate = False logger = logging.getLogger('pybulletX._wrapper') logger.propagate = False #Hyperparams type = "pose" # "pose" or "force" demonstration_dir = add_cwd("demonstrations_txt") K = 3 budget = 20 #Start matlab log_likelihood = [] best_ret = 0 if not drlfads.USE_MATLAB: raise NotImplementedError(f'This function requires matlab') eng = matlab.engine.start_matlab() eng.addpath(add_cwd(str(Path(__file__).parents[0]))) env = custom_sawyer_peg_env(cfg.env) for _ in range(budget): name = "gmm_peg_%s_%d" % (type, K) bll = eng.train_model(demonstration_dir, name, type, K, 1) print("model trained, final log likelihood:", bll) # Test new configurations if not bll in log_likelihood: # Evaluate model in actual environment log_likelihood.append(bll) model = GMM(name + ".mat") accuracy, mean_return, mean_length = model.evaluate(env=env, **cfg.test) print("Accuracy:", accuracy, "Mean return:", mean_return, "Mean length:", mean_length) if mean_return > best_ret: print("Best model so far!") best_ret = mean_return model.save_model(name + ".npy") eng.quit()
def objective(self, x): model = GMM() model.copy_model(self.initial_model) model.update_gaussians(np.asarray(x)) accuracy, mean_return, mean_length = model.evaluate(self.env, max_steps=600, num_episodes=1) print("Accuracy:", accuracy, "mean_return:", mean_return) return -mean_return
def main(cfg): # Do not show tacto renderer output logger = logging.getLogger('tacto.renderer') logger.propagate = False env = custom_sawyer_peg_env(cfg.env) gmm_model = GMM(add_cwd(cfg.gmm_model)) agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent) agent.load(add_cwd(cfg.test.model_name)) stats = agent.evaluate(**cfg.test.run) logger = logging.getLogger(__name__) logger.info(stats) agent.env.close()
def main(cfg): # Do not show tacto renderer output logger = logging.getLogger('tacto.renderer') logger.propagate = False for i in range(cfg.train.num_random_seeds): # Training env = custom_sawyer_peg_env(cfg.env) gmm_model = GMM(str(Path(cfg.gmm_model).absolute())) agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent) save_filename = get_save_filename("sac_gmm", cfg, i) agent.train(**cfg.train.run, save_filename=save_filename) agent.env.close() # Testing agent.env = custom_sawyer_peg_env(cfg.env) agent.evaluate(**cfg.test.run) agent.env.close()
def main(): # Environment hyperparameters env_params = { "show_gui": False, "with_force": False, "with_joint": False, "relative": True, "with_noise": False, "dt": 0.05 } env = custom_sawyer_peg_env(**env_params) # Evaluation parameters model_name = "models/GMM_models/gmm_peg_v2_pose_9.npy" model = GMM(model_name) optimizer = GMMOptimizer(env, model) res = optimizer.optimize() print(res.x) model.update_gaussians(np.asarray(res.x)) new_model_name = "models/optimizer/test.npy" model.save_model(new_model_name) print("Best model - Average reward:", -res.fun) print("Model saved as:", new_model_name)
class SAC_GMM_Agent(SAC_Agent): def __init__(self, model, window_size=32, *args, **kwargs): self.initial_model = model # Initial model provided self.model = GMM() self.model.copy_model(self.initial_model) # Model used for training self.window_size = window_size self.burn_in_steps = 1000 super(SAC_GMM_Agent, self).__init__(*args, **kwargs) def get_action_space(self): if not hasattr(self, 'action_space'): priors_high = np.ones(self.model.priors.size) mu_high = np.ones(self.model.mu.size) action_high = np.concatenate((priors_high, mu_high), axis=-1) action_low = -action_high self.action_space = gym.spaces.Box(action_low, action_high) return self.action_space def update_gaussians(self, gmm_change): # change of priors range: [-0.1, 0.1] priors = gmm_change[:self.model.priors.size] priors = priors.reshape(self.model.priors.shape) * 0.1 # change of mus range: [-0.01, 0.01] mu = gmm_change[self.model.priors.size:] mu = mu.reshape(self.model.mu.shape) * 0.01 change_dict = {"mu": mu, "prior": priors} self.model.update_gaussians(change_dict) def evaluate(self, num_episodes=5, render=False): succesful_episodes, episodes_returns, episodes_lengths = 0, [], [] for episode in range(1, num_episodes + 1): observation = self.env.reset() episode_return, episode_length, left_steps = 0, 0, self.env.max_episode_steps while left_steps > 0: self.model.copy_model(self.initial_model) gmm_change = self.get_action_from_observation( observation, deterministic=True) self.update_gaussians(gmm_change) model_reward = 0 for step in range(self.window_size): vel = self.model.predict_velocity_from_observation( observation) observation, reward, done, info = self.env.step(vel) model_reward += reward episode_length += 1 left_steps -= 1 if render: self.env.render() if done or left_steps <= 0: break episode_return += model_reward if done: break if render: self.env.render() if ("success" in info) and info['success']: succesful_episodes += 1 episodes_returns.append(episode_return) episodes_lengths.append(episode_length) accuracy = succesful_episodes / num_episodes return accuracy, np.mean(episodes_returns), np.mean(episodes_lengths) def train_episode(self, episode, exploration_episodes, log, render): sac_steps = 0 episode_return, episode_length, left_steps = 0, 0, self.env.max_episode_steps ep_critic_loss, ep_actor_loss, ep_alpha_loss = 0, 0, 0 observation = self.env.reset() while left_steps > 0: self.model.copy_model(self.initial_model) if self.training_step < self.burn_in_steps: gmm_change = np.zeros(self.action_space.shape) else: gmm_change = self.get_action_from_observation( observation, deterministic=False) self.update_gaussians(gmm_change) model_reward = 0 curr_observation = observation for step in range(self.window_size): vel = self.model.predict_velocity_from_observation( curr_observation) curr_observation, reward, done, info = self.env.step(vel) model_reward += reward episode_length += 1 left_steps -= 1 if render: self.env.render() if done or left_steps <= 0: break critic_loss, actor_loss, alpha_loss = self.update( observation, gmm_change, curr_observation, model_reward, done, log) observation = curr_observation episode_return += model_reward ep_critic_loss += critic_loss ep_actor_loss += actor_loss ep_alpha_loss += alpha_loss self.training_step += 1 # SAC_Steps in total sac_steps += 1 # SAC_Steps in this episode if render: self.env.render() if done: break if log: self.log_scalar('Train/Episode/critic_loss', ep_critic_loss / sac_steps, episode) self.log_scalar('Train/Episode/actor_loss', ep_actor_loss / sac_steps, episode) self.log_scalar('Train/Episode/alpha_loss', ep_alpha_loss / sac_steps, episode) self.log_episode_information(episode_return, episode_length, episode, "Train") return episode_return, episode_length