def main(cfg): env = custom_sawyer_peg_env(cfg.env) agent = SAC_Agent(env, **cfg.agent) agent.load(add_cwd(cfg.test.model_name)) stats = agent.evaluate(**cfg.test.run) logger = logging.getLogger(__name__) logger.info(stats)
def main(cfg): logger = logging.getLogger('tacto.renderer') logger.propagate = False env = custom_sawyer_peg_env(cfg.env) pd = PegPD(env) succesful_episodes = 0 while succesful_episodes < cfg.number_demonstrations: episode_return = 0 observation = env.reset() observations = [dict_arrays_to_list(observation)] pd.reset() for t in range(env.max_episode_steps): action = pd.get_action() observation, reward, done, info = env.step(action) observations.append(dict_arrays_to_list(observation)) episode_return += reward if done: if info["success"]: filename = "demonstration_%d.json" % (succesful_episodes + 1) save_file(observations, cfg.output_dir, filename) succesful_episodes += 1 break print("Episode_return", episode_return, "Epsiode_length", t) print("Total succesful episodes : %d/%d" % (succesful_episodes, cfg.number_demonstrations)) env.close()
def main(cfg): # Do not show tacto renderer output logger = logging.getLogger('tacto.renderer') logger.propagate = False for i in range(cfg.train.num_random_seeds): # Training env = custom_sawyer_peg_env(cfg.env) gmm_model = GMM(str(Path(cfg.gmm_model).absolute())) agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent) save_filename = get_save_filename("sac_gmm", cfg, i) agent.train(**cfg.train.run, save_filename=save_filename) agent.env.close() # Testing agent.env = custom_sawyer_peg_env(cfg.env) agent.evaluate(**cfg.test.run) agent.env.close()
def train(config, params=None): #TODO: Save models logger = logging.getLogger(__name__) budget = 300 env = custom_sawyer_peg_env(params.env) agent = SAC_Agent(env, **config) agent.train(**params.train, num_episodes=int(budget)) accuracy, val_return, val_length = agent.evaluate(**params.validation) agent.env.close() logger.info("Final return reported to the optimizer: %2f" % val_return) tune.report(val_return=val_return, val_accuracy=accuracy)
def main(cfg): logger = logging.getLogger('tacto.renderer') logger.propagate = False env = custom_sawyer_peg_env(cfg.env) for model_name in cfg.model_names: print(model_name) model = GMM(add_cwd(model_name)) accuracy, mean_return, mean_length = model.evaluate(env=env, **cfg.test) logger = logging.getLogger(__name__) logger.info("Accuracy: %.2f, Mean return: %.2f, mean length: %.2f" % (accuracy, mean_return, mean_length))
def main(cfg): # Do not show tacto renderer output logger = logging.getLogger('tacto.renderer') logger.propagate = False for i in range(cfg.train.num_random_seeds): # Training env = custom_sawyer_peg_env(cfg.env) agent = SAC_Agent(env, **cfg.agent) save_filename = get_save_filename("sac_peg", cfg, i) agent.train(**cfg.train.run, save_filename=save_filename) agent.env.close()
def main(cfg): # Do not show tacto renderer output logger = logging.getLogger('tacto.renderer') logger.propagate = False env = custom_sawyer_peg_env(cfg.env) gmm_model = GMM(add_cwd(cfg.gmm_model)) agent = SAC_GMM_Agent(env=env, model=gmm_model, **cfg.agent) agent.load(add_cwd(cfg.test.model_name)) stats = agent.evaluate(**cfg.test.run) logger = logging.getLogger(__name__) logger.info(stats) agent.env.close()
def main(cfg): env = custom_sawyer_peg_env(cfg.env) agent = SAC_Agent(env, cfg.agent) agent.load(add_cwd(cfg.test.model_name)) min_seq = 5 force_sequences = { "successes": { "left": [], "right": [] }, "failures": { "left": [], "right": [] } } successes, failures = 0, 0 while successes < min_seq or failures < min_seq: force_readings = {"left": [], "right": []} state = env.reset() for step in range(env.max_episode_steps): force_readings["left"].append(state[-2]) force_readings["right"].append(state[-1]) action = agent.getAction(state, deterministic=True) next_state, reward, done, info = env.step(action) state = next_state if done: break if "success" in info and info['success']: if successes < min_seq: force_sequences["successes"]["left"].append( force_readings["left"]) force_sequences["successes"]["right"].append( force_readings["right"]) successes += 1 else: if failures < min_seq: force_sequences["failures"]["left"].append( force_readings["left"]) force_sequences["failures"]["right"].append( force_readings["right"]) failures += 1 plot_force_sequences(force_sequences)
def main(cfg): logger = logging.getLogger('tacto.renderer') logger.propagate = False logger = logging.getLogger('env.sawyer_peg_env') logger.propagate = False logger = logging.getLogger('pybulletX._wrapper') logger.propagate = False #Hyperparams type = "pose" # "pose" or "force" demonstration_dir = add_cwd("demonstrations_txt") K = 3 budget = 20 #Start matlab log_likelihood = [] best_ret = 0 if not drlfads.USE_MATLAB: raise NotImplementedError(f'This function requires matlab') eng = matlab.engine.start_matlab() eng.addpath(add_cwd(str(Path(__file__).parents[0]))) env = custom_sawyer_peg_env(cfg.env) for _ in range(budget): name = "gmm_peg_%s_%d" % (type, K) bll = eng.train_model(demonstration_dir, name, type, K, 1) print("model trained, final log likelihood:", bll) # Test new configurations if not bll in log_likelihood: # Evaluate model in actual environment log_likelihood.append(bll) model = GMM(name + ".mat") accuracy, mean_return, mean_length = model.evaluate(env=env, **cfg.test) print("Accuracy:", accuracy, "Mean return:", mean_return, "Mean length:", mean_length) if mean_return > best_ret: print("Best model so far!") best_ret = mean_return model.save_model(name + ".npy") eng.quit()
def compute(self, config, budget, working_directory, *args, **kwargs): env = custom_sawyer_peg_env(self.cfg.env) agent = SAC_Agent(env, **config) self.logger.info("Starting agent with budget %d" % budget) self.logger.info("Configuration: %s" % json.dumps(config)) save_dir = "models/iteration_%d" % self.iteration self.logger.info("Save directory: %s" % save_dir) save_filename = self.get_save_filename(self.cfg.env.observation) agent.train(**self.cfg.train, num_episodes = int(budget), save_filename=save_filename, save_dir=save_dir) accuracy, val_return, val_length = agent.evaluate(**self.cfg.validation) self.logger.info("Final return reported to the optimizer: %2f" % val_return) self.iteration += 1 agent.env.close() # Release memory del agent.replay_buffer gc.collect() return ({'loss': - val_return, # remember: HpBandSter always minimizes! 'info': { 'val_episode_length': val_length, 'accuracy': accuracy } })
def main(): # Environment hyperparameters env_params = { "show_gui": False, "with_force": False, "with_joint": False, "relative": True, "with_noise": False, "dt": 0.05 } env = custom_sawyer_peg_env(**env_params) # Evaluation parameters model_name = "models/GMM_models/gmm_peg_v2_pose_9.npy" model = GMM(model_name) optimizer = GMMOptimizer(env, model) res = optimizer.optimize() print(res.x) model.update_gaussians(np.asarray(res.x)) new_model_name = "models/optimizer/test.npy" model.save_model(new_model_name) print("Best model - Average reward:", -res.fun) print("Model saved as:", new_model_name)