def experiment_setup(args): if args.vae_dist_help: load_vaes(args) #since some extensions of the envs use the distestimator this load is used with the interval wrapper#todo use other? load_field_parameters(args) if args.dist_estimator_type is not None: temp_env = make_temp_env(args) load_dist_estimator(args, temp_env) del temp_env env = make_env(args) env_test = make_env(args) if args.goal_based: args.obs_dims = list(goal_based_process(env.reset()).shape) args.acts_dims = [env.action_space.shape[0]] args.compute_reward = env.compute_reward args.compute_distance = env.compute_distance if args.imaginary_obstacle_transitions: #relative small buffer size so it always have most recent collisions args.imaginary_buffer = ReplayBuffer_Imaginary( args, buffer_size=args.im_buffer_size) args.buffer = buffer = ReplayBuffer_Episodic(args) args.learner = learner = create_learner(args) args.agent = agent = create_agent(args) args.logger.info('*** network initialization complete ***') args.tester = tester = Tester(args) args.logger.info('*** tester initialization complete ***') args.timesteps = env.env.env.spec.max_episode_steps return env, env_test, agent, buffer, learner, tester
def __init__(self, args): self.args = args self.env = make_env(args) self.env_test = make_env(args) self.env_List = [] for i in range(args.episodes): self.env_List.append(make_env(args)) self.agent = create_agent(args) self.achieved_trajectory_pool = TrajectoryPool(args, args.hgg_pool_size) self.stop_hgg_threshold = self.args.stop_hgg_threshold self.stop = False self.learn_calls = 0
def experiment_setup(args): env = make_env(args) args.acts_dims = env.acts_dims args.obs_dims = env.obs_dims args.buffer = buffer = create_buffer(args) args.agent = agent = create_agent(args) args.agent_graph = agent.graph args.learner = learner = create_learner(args) args.logger.info('*** network initialization complete ***') args.tester = tester = Tester(args) args.logger.info('*** tester initialization complete ***') return env, agent, buffer, learner, tester
def experiment_setup(args): env = make_env(args) env_test = make_env(args) if args.goal_based: args.obs_dims = list(goal_based_process(env.reset()).shape) args.acts_dims = [env.action_space.shape[0]] args.compute_reward = env.compute_reward args.compute_distance = env.compute_distance args.buffer = buffer = ReplayBuffer_Episodic(args) args.learner = learner = create_learner(args) args.agent = agent = create_agent(args) args.logger.info('*** network initialization complete ***') args.tester = tester = Tester(args) args.logger.info('*** tester initialization complete ***') return env, env_test, agent, buffer, learner, tester