def run_experiment(agents, environment, args, seed=None): """Run a set of experiments.""" seed = args.seed if seed is None else seed df = pd.DataFrame() for name, agent in agents.items(): set_random_seed(seed) print(f"Running agent {name} on {args.env_name}") evaluate_policy(agent, environment) store_value_function(agent, environment, episode=0) rollout_agent( agent=agent, environment=environment, num_episodes=args.num_episodes, max_steps=args.max_steps, callback_frequency=1, callbacks=[evaluate_policy, store_value_function], ) df_ = pd.DataFrame(agent.logger.statistics) df_["name"] = name df_["seed"] = args.seed df_["time"] = np.arange(len(df_)) df_["duals"] = np.empty((len(df_)), dtype=object) duals = agent.logger.all["dual_loss"] for i in range(len(df_)): df_.at[i, "duals"] = duals[i * args.num_iter : (i + 1) * args.num_iter] df = pd.concat((df, df_), sort=False) return df
def init_experiment(args, **kwargs): """Initialize experiment.""" arg_dict = vars(args) arg_dict.update(kwargs) arg_dict.update(parse_config_file(args.agent_config)) arg_dict = {k: v for k, v in arg_dict.items() if v is not None} env_config = parse_config_file(args.env_config) args.max_steps = env_config.get("max_steps", 1000) # %% Set Random seeds. set_random_seed(args.seed) # %% Initialize environment. if env_config["name"] in gym_envs: environment = GymEnvironment(env_config["name"], seed=args.seed) else: env_name, env_task = env_config["name"].split("/") environment = DMSuiteEnvironment(env_name, env_task, seed=args.seed) # %% Initialize module. agent_module = importlib.import_module("rllib.agent") agent = getattr(agent_module, f"{args.agent}Agent").default( environment, reward_transformer=RewardTransformer( scale=arg_dict.get("reward_scale", 1.0)), **arg_dict, ) agent.logger.save_hparams(arg_dict) return agent, environment
"""Python Script Template.""" from rllib.environment import GymEnvironment from rllib.util.utilities import set_random_seed from qreps.environment.random_action_wrapper import RandomActionWrapper import os from exps.utilities import parse_arguments, run_experiment from exps.environments.utilities import get_saddle_agents, get_benchmark_agents args = parse_arguments() args.env_name = "RiverSwim-v0" args.lr = 0.01 args.eta = 5.0 set_random_seed(args.seed) env = GymEnvironment(args.env_name, seed=args.seed) env.add_wrapper(RandomActionWrapper, p=args.random_action_p) agents = get_saddle_agents(env, **vars(args)) agents.update(get_benchmark_agents(env, **vars(args))) df = run_experiment(agents, env, args) df.to_pickle(f"river_swim_results_{args.seed}.pkl") os.system("python river_swim_plot.py")