} assert os.path.isfile(os.path.join( PATHS['model'], 'best_model.zip')), "No model file found in %s" % PATHS['model'] assert os.path.isfile( PATHS['scenerios_json_path'] ), "No scenario file named %s" % PATHS['scenerios_json_path'] # initialize hyperparams params = load_hyperparameters_json(agent_hyperparams, PATHS) print("START RUNNING AGENT: %s" % params['agent_name']) print_hyperparameters(params) # initialize task manager task_manager = get_predefined_task(mode='ScenerioTask', PATHS=PATHS) # initialize gym env env = DummyVecEnv([ lambda: FlatlandEnv(task_manager, PATHS.get('robot_setting'), PATHS.get('robot_as'), params['reward_fnc'], params['discrete_action_space'], goal_radius=0.50, max_steps_per_episode=350) ]) if params['normalize']: env = VecNormalize(env, training=False, norm_obs=True, norm_reward=False,
# initialize hyperparameters (save to/ load from json) hyperparams_obj = agent_hyperparams(AGENT_NAME, robot, gamma, n_steps, ent_coef, learning_rate, vf_coef, max_grad_norm, gae_lambda, batch_size, n_epochs, clip_range, reward_fnc, discrete_action_space, normalize, task_mode, start_stage) params = initialize_hyperparameters(agent_name=AGENT_NAME, PATHS=PATHS, hyperparams_obj=hyperparams_obj, load_target=args.load) # instantiate gym environment n_envs = 1 task_manager = get_predefined_task(params['task_mode'], params['curr_stage'], PATHS) env = DummyVecEnv([ lambda: FlatlandEnv(task_manager, PATHS.get('robot_setting'), PATHS.get('robot_as'), params['reward_fnc'], params['discrete_action_space'], goal_radius=1.00, max_steps_per_episode=200) ] * n_envs) if params['normalize']: env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_reward=15)
# generate agent name and model specific paths AGENT_NAME = get_agent_name(args) print("________ STARTING TRAINING WITH: %s ________\n" % AGENT_NAME) PATHS = get_paths(AGENT_NAME, args) if args.n is None: n_timesteps = 6000 else: n_timesteps = args.n # instantiate gym environment n_envs = 1 task = get_predefined_task("random") env = DummyVecEnv([lambda: FlatlandEnv(task, PATHS.get('robot_setting'), PATHS.get('robot_as'), discrete_action_space)] * n_envs) # instantiate eval environment eval_env = Monitor(FlatlandEnv(task, PATHS.get('robot_setting'), PATHS.get('robot_as'), discrete_action_space), PATHS.get('eval')) eval_env = EvalCallback(eval_env, n_eval_episodes=10, eval_freq=250, log_path=PATHS.get('eval'), best_model_save_path=PATHS.get('model'), deterministic=True) # determine mode if args.custom_mlp: # custom mlp flag model = PPO("MlpPolicy", env, policy_kwargs = dict(net_arch = args.net_arch, activation_fn = get_act_fn(args.act_fn)), gamma = gamma, n_steps = n_steps, ent_coef = ent_coef, learning_rate = learning_rate, vf_coef = vf_coef, max_grad_norm = max_grad_norm, gae_lambda = gae_lambda, batch_size = batch_size, n_epochs = n_epochs, clip_range = clip_range, tensorboard_log = PATHS.get('tb'), verbose = 1)