Пример #1
0
def experiment_setup(args):
    if args.vae_dist_help:
        load_vaes(args)

    #since some extensions of the envs use the distestimator this load is used with the interval wrapper#todo use other?
    load_field_parameters(args)
    if args.dist_estimator_type is not None:
        temp_env = make_temp_env(args)
        load_dist_estimator(args, temp_env)
        del temp_env

    env = make_env(args)
    env_test = make_env(args)

    if args.goal_based:

        args.obs_dims = list(goal_based_process(env.reset()).shape)
        args.acts_dims = [env.action_space.shape[0]]
        args.compute_reward = env.compute_reward
        args.compute_distance = env.compute_distance

    if args.imaginary_obstacle_transitions:
        #relative small buffer size so it always have most recent collisions
        args.imaginary_buffer = ReplayBuffer_Imaginary(
            args, buffer_size=args.im_buffer_size)
    args.buffer = buffer = ReplayBuffer_Episodic(args)
    args.learner = learner = create_learner(args)
    args.agent = agent = create_agent(args)
    args.logger.info('*** network initialization complete ***')
    args.tester = tester = Tester(args)
    args.logger.info('*** tester initialization complete ***')
    args.timesteps = env.env.env.spec.max_episode_steps

    return env, env_test, agent, buffer, learner, tester
Пример #2
0
 def __init__(self, args):
     self.args = args
     self.env = make_env(args)
     self.env_test = make_env(args)
     self.env_List = []
     for i in range(args.episodes):
         self.env_List.append(make_env(args))
     self.agent = create_agent(args)
     self.achieved_trajectory_pool = TrajectoryPool(args, args.hgg_pool_size)
     self.stop_hgg_threshold = self.args.stop_hgg_threshold
     self.stop = False
     self.learn_calls = 0
Пример #3
0
def experiment_setup(args):
    env = make_env(args)
    args.acts_dims = env.acts_dims
    args.obs_dims = env.obs_dims

    args.buffer = buffer = create_buffer(args)
    args.agent = agent = create_agent(args)
    args.agent_graph = agent.graph
    args.learner = learner = create_learner(args)
    args.logger.info('*** network initialization complete ***')
    args.tester = tester = Tester(args)
    args.logger.info('*** tester initialization complete ***')

    return env, agent, buffer, learner, tester
def experiment_setup(args):
    env = make_env(args)
    env_test = make_env(args)
    if args.goal_based:
        args.obs_dims = list(goal_based_process(env.reset()).shape)
        args.acts_dims = [env.action_space.shape[0]]
        args.compute_reward = env.compute_reward
        args.compute_distance = env.compute_distance

    args.buffer = buffer = ReplayBuffer_Episodic(args)
    args.learner = learner = create_learner(args)
    args.agent = agent = create_agent(args)
    args.logger.info('*** network initialization complete ***')
    args.tester = tester = Tester(args)
    args.logger.info('*** tester initialization complete ***')

    return env, env_test, agent, buffer, learner, tester