warning_str = ( f"WARN: Inference could not be run for agent `{name}` as it was not " f"found for the `{args.env_name}` environment.") print(colorize(warning_str, "yellow")) continue # Get environment action and observation space dimensions a_lowerbound = env.action_space.low a_upperbound = env.action_space.high s_dim = env.observation_space.shape[0] a_dim = env.action_space.shape[0] # Initiate the LAC policy policy = LAC(a_dim, s_dim, act_limits={ "low": a_lowerbound, "high": a_upperbound }) # Retrieve all trained policies for a given agent print("Looking for policies...") policy_list = os.listdir(model_path) policy_list = [ policy_name for policy_name in policy_list if osp.exists( osp.abspath( osp.join(model_path, policy_name, "policy/checkpoint"))) ] policy_list = [int(item) for item in policy_list if item.isnumeric()] policy_list.sort() # Check if a given policy exists for the current agent
# Check if model exists if not os.path.exists(MODEL_PATH): print( f"Shutting down robustness eval since model `{args.model_name}` was " f"not found for the `{args.env_name}` environment.") sys.exit(0) # Get environment action and observation space dimensions a_lowerbound = env.action_space.low a_upperbound = env.action_space.high s_dim = env.observation_space.shape[0] a_dim = env.action_space.shape[0] # Create policy policy = LAC(a_dim, s_dim) # Retrieve agents print("Looking for policies (rollouts)...") rollout_list = os.listdir(MODEL_PATH) rollout_list = [ rollout_name for rollout_name in rollout_list if os.path.exists( os.path.abspath(MODEL_PATH + "/" + rollout_name + "/policy/checkpoint")) ] rollout_list = [int(item) for item in rollout_list if item.isnumeric()] rollout_list.sort() # Sort rollouts_list # Check if model exists if not rollout_list: print(
def dynamic(policy_path, env_name, env_params, alg_params, eval_params): """Performs dynamic robustness evaluation. Args: policy_path (str): Log path. env_name (str): The gym environment you want to use. alg_params (dict): Dictionary containing the algorithm parameters. """ # Retrieve environment env = get_env_from_name(env_name, ENV_SEED) # Get trained policy s_dim = env.observation_space.shape[0] a_dim = env.action_space.shape[0] policy = LAC(a_dim, s_dim) # Configure logger log_path = policy_path + "/eval/dynamic/" + eval_params[ "additional_description"] eval_params.update({"magnitude": 0}) logger.configure(dir=log_path, format_strs=["csv"]) # Evaluate policy results _, paths = evaluation(policy_path, env_name, env, env_params, eval_params, policy) max_len = 0 print(len(paths)) for path in paths["s"]: path_length = len(path) if path_length > max_len: max_len = path_length average_path = np.average(np.array(paths["s"]), axis=0) std_path = np.std(np.array(paths["s"]), axis=0) for i in range(max_len): logger.logkv("average_path", average_path[i]) logger.logkv("std_path", std_path[i]) logger.logkv("reference", paths["reference"][0][i]) logger.dumpkvs() if eval_params["directly_show"]: fig = plt.figure(figsize=(9, 6)) ax = fig.add_subplot(111) if eval_params["plot_average"]: t = range(max_len) ax.plot(t, average_path, color="red") ax.fill_between( t, average_path - std_path, average_path + std_path, color="red", alpha=0.1, ) plt.show() else: for path in paths["s"]: path_length = len(path) print(path_length) t = range(path_length) path = np.array(path) # Ex3_EKF fig = plt.figure() ax = fig.add_subplot(111) ax.plot(t, path, color="red") plt.show() ax.plot(t, np.array(path), color="blue", label="0.1") plt.show()