Exemplo n.º 1
0
 def __init__(self, network, batch_size=128, max_epochs=20, optimizer=optim.Adam):
     self.network = network
     self.batch_size = batch_size
     self.max_epochs = max_epochs
     self.optimizer = optimizer(self.network.parameters())
     if gpu_enabled():
         self.network.cuda()
Exemplo n.º 2
0
 def __init__(self, encoder, decoder, latent_dim, step_dim, obs_dim, act_dim, policy, env, optimizer=None, loss_type='mse',
              init_kl_weight=.001, max_kl_weight=.1, kl_mul=1.07, vae_loss_weight=1, lr=1e-3, bc_weight=100, ego=False, egoidx=None):
     self.encoder = encoder
     self.obs_dim = obs_dim
     self.act_dim = act_dim
     self.decoder = decoder
     self.ego = ego
     self.egoidx = egoidx
     self.bc_weight = bc_weight
     self.env = env()
     self.policy = policy
     self.unit_n = Normal(Variable(torch.zeros(1, latent_dim)),
                          log_var=Variable(torch.zeros(1, latent_dim)))
     self.latent_dim = latent_dim
     self.step_dim = step_dim
     self.init_kl_weight = init_kl_weight
     self.max_kl_weight = max_kl_weight
     self.kl_mul = kl_mul
     if optimizer is None:
         optimizer = Adam(self.get_params(), lr=lr, eps=1e-5)
     self.loss_type = loss_type 
     self.vae_loss_weight = vae_loss_weight
     self.optimizer = optimizer
     if gpu_enabled():
         self.encoder.cuda()
         self.decoder.cuda()
Exemplo n.º 3
0
    def __init__(self, shape, demean=True, destd=True, clip=10.0):
        super(ObsNorm, self).__init__()
        self.demean = demean
        self.destd = destd
        self.clip = clip
        self.shape = shape
        self.register_buffer('count', torch.zeros(1).double() + 1e-2)
        self.register_buffer('sum', torch.zeros(shape).double())
        self.register_buffer('sum_sqr', torch.zeros(shape).double() + 1e-2)

        self.register_buffer('mean', torch.zeros(shape),)
        self.register_buffer('std', torch.ones(shape))

        if gpu_enabled():
            self.cuda()
Exemplo n.º 4
0
    def __init__(
            self,
            env,
            env_name,
            policy,
            baseline,
            obs_dim,
            action_dim,
            save_step=20,
            plot=False,
            plot_itr_threshold=0,
            plot_every=10,
            n_itr=500,
            start_itr=0,
            batch_size=1,
            max_path_length=500,
            discount=0.99,
            sampler=None,
            n_vectorized_envs=None,
            center_adv=True,
            fit_baseline=True,
            use_gae=False,
            gae_tau=0.95,
            entropy_bonus=0,
            alter_sd_fn=None,
            alter_sd_args=None,
            env_obj=None,
    ):
        self.env = env
        if env_obj is None:
            self.env_obj = env()
        else:
            self.env_obj = env_obj
        self.env_name = env_name
        self.policy = policy
        self.baseline = baseline
        self.obs_dim = obs_dim
        self.action_dim = action_dim
        self.save_step = save_step
        self.plot = plot
        self.plot_itr_threshold = plot_itr_threshold
        self.n_itr = n_itr
        self.start_itr = start_itr
        self.batch_size = batch_size
        self.max_path_length = max_path_length
        self.discount = discount
        self.center_adv = center_adv
        self.fit_baseline = fit_baseline
        self.alter_sd_fn = alter_sd_fn
        self.alter_sd_args = alter_sd_args
        self.use_gae = use_gae
        self.gae_tau = gae_tau
        self.entropy_bonus = entropy_bonus
        self.plot_every = plot_every

        if sampler is None:
            if n_vectorized_envs is None:
                n_vectorized_envs = max(1, int(np.ceil(batch_size / max_path_length)))

            sampler = VectorizedSampler(env_name=env_name, env=env, policy=policy, n_envs=n_vectorized_envs)

        self.sampler = sampler

        if gpu_enabled():
            self.policy.cuda()
Exemplo n.º 5
0
    def __init__(
        self,
        env,
        env_name,
        policy,
        policy_ex,
        encoder,
        decoder,
        max_path_length,
        obs_dim,
        action_dim,
        step_dim,
        policy_algo,
        policy_ex_algo,
        dataset,
        latent_dim,
        vae,
        plan_horizon,
        max_horizon,
        mpc_batch,
        rand_per_mpc_step,
        mpc_explore,
        mpc_explore_batch,
        reset_ent,
        vae_train_steps,
        mpc_explore_len,
        true_reward_scale,
        discount_factor,
        reward_fn,
        block_config,
        consis_finetuning=False,
        add_frac=1,
        batch_size=1000,
        random_sample_size=10,
        plot_size=5,
    ):
        self.reward_fn = reward_fn
        self.plan_horizon = plan_horizon
        self.max_horizon = max_horizon
        self.mpc_batch = mpc_batch
        self.rand_per_mpc_step = rand_per_mpc_step
        self.mpc_explore = mpc_explore
        self.mpc_explore_batch = mpc_explore_batch
        self.add_frac = add_frac
        self.reset_ent = reset_ent
        self.vae_train_steps = vae_train_steps
        self.mpc_explore_len = mpc_explore_len
        self.consis_finetuning = consis_finetuning
        self.true_reward_scale = true_reward_scale
        self.discount_factor = discount_factor

        self.block_config = block_config
        self.env = env
        self.env_obj = self.env()
        self.env_name = env_name
        self.policy = policy
        self.policy_ex = policy_ex
        self.obs_dim = obs_dim
        self.action_dim = action_dim
        self.step_dim = step_dim
        self.encoder = encoder
        self.decoder = decoder
        self.max_path_length = max_path_length
        self.policy_algo = policy_algo
        self.policy_ex_algo = policy_ex_algo
        self.dataset = dataset
        self.latent_dim = latent_dim
        self.vae = vae
        self.batch_size = batch_size
        self.policy_algo = policy_algo
        self.plot_sampler = None
        self.add_sampler = None
        self.random_sample_size = random_sample_size
        self.plot_size = plot_size
        self.random_sampler = VectorizedSampler(env=self.env,
                                                env_name=env_name,
                                                policy=self.policy,
                                                n_envs=random_sample_size)
        self.sampler = VectorizedSampler(
            env=self.env,
            env_name=env_name,
            policy=self.policy,
            n_envs=random_sample_size
        )  #ParVectorizedSampler(env=self.env, env_name=env_name, policy=self.policy, n_envs=12)
        self.sampler_mpc = VectorizedSampler(env=self.env,
                                             env_name=env_name,
                                             policy=self.policy,
                                             n_envs=mpc_batch,
                                             ego=vae.ego,
                                             egoidx=vae.egoidx)
        self.sampler_ex = VectorizedSampler(
            env=self.env,
            env_name=env_name,
            policy=self.policy_ex,
            n_envs=random_sample_size,
            ego=vae.ego,
            egoidx=vae.egoidx
        )  #ParVectorizedSampler(env=self.env, env_name=env_name, policy=self.policy_ex, n_envs=12)

        self.plot_sampler = VectorizedSampler(env=self.env,
                                              env_name=env_name,
                                              policy=self.policy,
                                              n_envs=plot_size)
        self.action_space = self.env().action_space

        if gpu_enabled():
            self.policy.cuda()
            self.policy_ex.cuda()
            self.encoder.cuda()
            self.decoder.cuda()