Exemplo n.º 1
0
 def get_action(self, obs):
     obs, goals, taus = split_flat_obs(obs[None],
                                       self.env.observation_space.low.size,
                                       self.env.goal_dim)
     sampled_actions = self.sample_actions()
     first_sampled_actions = sampled_actions.copy()
     actions = ptu.np_to_var(sampled_actions)
     next_obs = self.expand_np_to_var(obs[0])
     goals = self.expand_np_to_var(goals[0])
     taus = self.expand_np_to_var(taus[0])
     costs = 0
     for i in range(self.mpc_horizon):
         curr_obs = next_obs
         if i > 0:
             sampled_actions = self.sample_actions()
             actions = ptu.np_to_var(sampled_actions)
         flat_obs = merge_into_flat_obs(
             curr_obs,
             goals,
             taus,
         )
         obs_delta = self.debug_qf(flat_obs,
                                   actions,
                                   return_internal_prediction=True)
         next_obs = curr_obs + obs_delta
         next_features = self.env.convert_obs_to_goals(next_obs)
         costs += (next_features[:, :7] - goals[:, :7])**2
     costs_np = ptu.get_numpy(costs).sum(1)
     min_i = np.argmin(costs_np)
     return first_sampled_actions[min_i], {}
Exemplo n.º 2
0
 def get_batch(self, train=True):
     dataset = self.train_dataset if train else self.test_dataset
     ind = np.random.randint(0, len(dataset['z']), self.batch_size)
     return {
         'z': ptu.np_to_var(dataset['z'][ind, :]),
         'z_proj': ptu.np_to_var(dataset['z_proj'][ind, :]),
     }
    def next_state(self, state, action):
        if self.cheat:
            next_states = self.qf.eval_np(
                observations=state[None],
                actions=action[None],
                goals=state[None],
                num_steps_left=np.array([[self.num_steps_left]]),
                return_predictions=True,
            )
            return next_states[0]
        num_steps_left = ptu.np_to_var(self.num_steps_left * np.ones(
            (self.sample_size, 1)))
        obs_dim = state.shape[0]
        states = self.expand_np_to_var(state)
        actions = self.expand_np_to_var(action)
        next_states_np = np.zeros((self.sample_size, obs_dim))
        next_states = ptu.np_to_var(next_states_np, requires_grad=True)
        optimizer = optim.Adam([next_states], self.learning_rate)

        for _ in range(self.num_optimization_steps):
            losses = -self.qf(
                observations=states,
                actions=actions,
                goals=next_states,
                num_steps_left=num_steps_left,
            )
            loss = losses.mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        losses_np = ptu.get_numpy(losses)
        best_action_i = np.argmin(losses_np)
        return ptu.get_numpy(next_states[best_action_i, :])
Exemplo n.º 4
0
    def get_action(self, current_ob, goal, num_steps_left):
        if (self.replan_every_time_step
                or self.t_in_plan == self.planning_horizon
                or self.last_solution is None):
            if self.dynamic_lm and self.best_obs_seq is not None:
                error = np.linalg.norm(current_ob -
                                       self.best_obs_seq[self.t_in_plan + 1])
                self.update_lagrange_multiplier(error)

            full_solution = self.replan(current_ob, goal)

            x_torch = ptu.np_to_var(full_solution, requires_grad=True)
            current_ob_torch = ptu.np_to_var(current_ob)

            obs, next_obs = self.batchify(x_torch, current_ob_torch)
            actions = self.tdm_policy(
                observations=obs,
                goals=next_obs,
                num_steps_left=self.num_steps_left_pytorch,
            )
            self.best_action_seq = ptu.get_numpy(actions)
            self.best_obs_seq = np.array([current_ob] +
                                         [ptu.get_numpy(o) for o in next_obs])

            self.last_solution = full_solution
            self.t_in_plan = 0

        agent_info = dict(
            best_action_seq=self.best_action_seq[self.t_in_plan:],
            best_obs_seq=self.best_obs_seq[self.t_in_plan:],
        )
        action = self.best_action_seq[self.t_in_plan]
        self.t_in_plan += 1

        return action, agent_info
Exemplo n.º 5
0
 def denormalize(self, v):
     mean = ptu.np_to_var(self.mean, requires_grad=False)
     std = ptu.np_to_var(self.std, requires_grad=False)
     if v.dim() == 2:
         mean = mean.unsqueeze(0)
         std = std.unsqueeze(0)
     return mean + v * std
Exemplo n.º 6
0
def pretrain_encoder(encoder, opt):
    losses = []
    for _ in range(1000):
        x_np, y_np = swirl_data(BS)
        x = ptu.np_to_var(x_np)
        y = ptu.np_to_var(y_np)
        y_hat = encoder.encode(x)
        loss = ((y_hat - y) ** 2).mean()
        opt.zero_grad()
        loss.backward()
        opt.step()

        losses.append(loss.data.numpy())

    if VERBOSE:
        x_np, y_np = swirl_data(N_VIS)
        x = ptu.np_to_var(x_np)
        y_hat = encoder.encode(x)
        y_hat_np = y_hat.data.numpy()
        x_hat_np = t_to_xy(y_hat_np[:, 0])

        plt.subplot(2, 1, 1)
        plt.plot(np.array(losses))
        plt.title("Training Loss")

        plt.subplot(2, 1, 2)
        plt.plot(x_np[:, 0], x_np[:, 1], '.')
        plt.plot(x_hat_np[:, 0], x_hat_np[:, 1], '.')
        plt.title("Samples")
        plt.legend(["Samples", "Estimates"])
        plt.show()
Exemplo n.º 7
0
    def get_action(self, ob):
        if self.last_solution is None or not self.warm_start:
            init_solution = []
            for _ in range(self.planning_horizon):
                init_solution.append(
                    np.repeat(ob[None], self.num_particles, axis=0))

            self.last_solution = np.hstack(init_solution)

        ob = self._expand_np_to_var(ob)
        actions_np = np.hstack(
            [self.sample_actions() for _ in range(self.planning_horizon)])
        actions = ptu.np_to_var(actions_np)
        next_states = ptu.np_to_var(self.last_solution, requires_grad=True)

        optimizer = optim.Adam([next_states], lr=self.learning_rate)
        for i in range(self.num_grad_steps):
            constraint_loss = self.constraint_fctn(ob, actions, next_states)
            optimizer.zero_grad()
            constraint_loss.sum().backward()
            optimizer.step()

        final_loss = (self.cost_function(ob, actions, next_states) +
                      self.lagrange_multiplier *
                      self.constraint_fctn(ob, actions, next_states))
        self.last_solution = ptu.get_numpy(next_states)
        final_loss_np = ptu.get_numpy(final_loss).sum(axis=1)
        min_i = np.argmin(final_loss_np)
        action = actions_np[min_i, :self.action_dim]
        return action, {}
Exemplo n.º 8
0
def train_encoder(encoder, decoder, encoder_opt):
    batch, true_latents = swirl_data(BS)
    batch = ptu.np_to_var(batch)

    latents, means, log_stds, stds = encoder.get_encoding_and_suff_stats(
        batch
    )
    kl = kl_to_prior(means, log_stds, stds)

    latents = encoder.encode(batch)
    decoder_output = decoder(latents)
    decoder_means = decoder_output[:, 0:2]
    decoder_log_stds = decoder_output[:, 2:4]
    distribution = Normal(decoder_means, decoder_log_stds.exp())
    reconstruction_log_prob = distribution.log_prob(batch).sum(dim=1)

    # elbo = - kl + reconstruction_log_prob
    # loss = - elbo.mean()
    loss = - reconstruction_log_prob.mean()
    # This is the second place where we cheat:
    latent_loss = ((ptu.np_to_var(true_latents) - latents) ** 2).mean()
    loss = loss# + latent_loss
    encoder_opt.zero_grad()
    loss.backward()
    encoder_opt.step()
    return loss
Exemplo n.º 9
0
 def _encode(self, imgs, noisy, clip_std=None, batch_size=None):
     if batch_size is None:
         mu, logvar = self.vae.encode(ptu.np_to_var(imgs))
     else:
         imgs = imgs.reshape(-1, self.vae.imlength)
         n = imgs.shape[0]
         mu, logvar = None, None
         for i in range(0, n, batch_size):
             batch_mu, batch_logvar = self.vae.encode(
                 ptu.np_to_var(imgs[i:i + batch_size]))
             if mu is None:
                 mu = batch_mu
                 logvar = batch_logvar
             else:
                 mu = torch.cat((mu, batch_mu), dim=0)
                 logvar = torch.cat((logvar, batch_logvar), dim=0)
     std = logvar.mul(0.5).exp_()
     if clip_std is None:
         clip_std = self.clip_encoding_std
     if clip_std:
         vae_std = np.copy(self.vae.dist_std)
         vae_std = ptu.np_to_var(vae_std)
         std = torch.min(std, vae_std)
     if noisy:
         eps = ptu.Variable(std.data.new(std.size()).normal_())
         sample = eps.mul(std).add_(mu)
     else:
         sample = mu
     return ptu.get_numpy(sample), ptu.get_numpy(mu), ptu.get_numpy(std)
Exemplo n.º 10
0
 def get_np_action(self, state_np, goal_state_np):
     return ptu.get_numpy(
         self.policy(
             ptu.np_to_var(np.expand_dims(state_np, 0)),
             ptu.np_to_var(np.expand_dims(goal_state_np, 0)),
             self._tau_expanded_torch,
         ).squeeze(0))
Exemplo n.º 11
0
 def next_state(self, state, action, goal_state, discount):
     state = ptu.np_to_var(np.expand_dims(state, 0))
     action = ptu.np_to_var(np.expand_dims(action, 0))
     goal_state = ptu.np_to_var(np.expand_dims(goal_state, 0))
     discount = ptu.np_to_var(np.array([[discount]]))
     return ptu.get_numpy(
         self.qf(state, action, goal_state, discount) + state)[0]
Exemplo n.º 12
0
 def evaluate(x, y):
     action = np.array([x, y])
     action = ptu.np_to_var(action).unsqueeze(0)
     state = ptu.np_to_var(start_state).unsqueeze(0)
     goal_states = ptu.np_to_var(goal_state).unsqueeze(0)
     discount = ptu.np_to_var(np.array([[0]]))
     out = qf(state, action, goal_states, discount)
     return out.data.numpy()
Exemplo n.º 13
0
    def get_action(self, obs):
        sampled_actions = self.sample_actions()
        first_sampled_actions = sampled_actions.copy()
        all_actions_np = [first_sampled_actions]
        actions = ptu.np_to_var(sampled_actions)
        next_obs = self.expand_np_to_var(obs)
        all_obs_torch = [next_obs]
        costs = 0
        all_costs = []
        for i in range(self.mpc_horizon):
            curr_obs = next_obs
            if i > 0:
                sampled_actions = self.sample_actions()
                all_actions_np.append(sampled_actions)
                actions = ptu.np_to_var(sampled_actions)
            next_obs = curr_obs + self.dynamics_model(curr_obs, actions)
            all_obs_torch.append(next_obs)
            new_costs = self.cost_fn(
                ptu.get_numpy(curr_obs),
                ptu.get_numpy(actions),
                ptu.get_numpy(next_obs),
            )
            costs = costs + new_costs
            all_costs.append(new_costs)

        # Reward sum of costs or just last time step?
        # min_i = np.argmin(costs)
        min_costs = np.array(all_costs).min(0)
        min_i = np.argmin(min_costs)

        # For Point2d u-shaped wall
        # best_action_seq = [action_t[min_i, :] for action_t in all_actions_np]
        # best_obs_seq = [
        #     ptu.get_numpy(ob_t[min_i, :]) for ob_t in all_obs_torch
        # ]
        #
        # real_obs_seq = self.env.wrapped_env.wrapped_env.true_states(obs, best_action_seq)
        # self.ax1.clear()
        # self.env.wrapped_env.wrapped_env.plot_trajectory(
        #     self.ax1,
        #     np.array(best_obs_seq),
        #     np.array(best_action_seq),
        #     goal=self.env.wrapped_env.wrapped_env._target_position,
        # )
        # self.ax1.set_title("imagined")
        # self.ax2.clear()
        # self.env.wrapped_env.wrapped_env.plot_trajectory(
        #     self.ax2,
        #     np.array(real_obs_seq),
        #     np.array(best_action_seq),
        #     goal=self.env.wrapped_env.wrapped_env._target_position,
        # )
        # self.ax2.set_title("real")
        # plt.draw()
        # plt.pause(0.001)

        return first_sampled_actions[min_i], {}
Exemplo n.º 14
0
 def normalize(self, v, clip_range=None):
     if clip_range is None:
         clip_range = self.default_clip_range
     mean = ptu.np_to_var(self.mean, requires_grad=False)
     std = ptu.np_to_var(self.std, requires_grad=False)
     if v.dim() == 2:
         # Unsqueeze along the batch use automatic broadcasting
         mean = mean.unsqueeze(0)
         std = std.unsqueeze(0)
     return torch.clamp((v - mean) / std, -clip_range, clip_range)
Exemplo n.º 15
0
    def _realistic_subgoal_reward(self, subgoals, use_double=True):
        if type(subgoals) is np.ndarray:
            subgoals = ptu.np_to_var(subgoals, double=use_double)

        if hasattr(self, "true_prior_distr"):
            log_prob = self.true_prior_distr.log_prob(subgoals)
            log_prob = torch.sum(log_prob, dim=-1)
            return log_prob
        else:
            return ptu.np_to_var(np.zeros(subgoals.shape[:-1]))
Exemplo n.º 16
0
 def get_batch(self, train=True):
     dataset = self.train_dataset if train else self.test_dataset
     ind = np.random.randint(0, len(dataset['obs']), self.batch_size)
     samples_obs = dataset['obs'][ind, :]
     samples_actions = dataset['actions'][ind, :]
     samples_next_obs = dataset['next_obs'][ind, :]
     return {
         'obs': ptu.np_to_var(samples_obs),
         'actions': ptu.np_to_var(samples_actions),
         'next_obs': ptu.np_to_var(samples_next_obs),
     }
Exemplo n.º 17
0
    def _np_to_pytorch(self, ob_np, goal_np, taus_np, batch_size=1):
        ob_np = np.tile(ob_np, (batch_size, 1, 1))
        goal_np = np.tile(goal_np, (batch_size, 1, 1))
        taus_np = np.tile(taus_np.reshape((1, self.num_subprobs, 1)),
                          (batch_size, 1, 1))

        ob = ptu.np_to_var(ob_np, double=self.use_double)
        goal = ptu.np_to_var(goal_np, double=self.use_double)
        taus = ptu.np_to_var(taus_np, double=self.use_double)

        return ob, goal, taus
Exemplo n.º 18
0
 def _action_cost(self, x, current_ob, goal):
     x = ptu.np_to_var(x, requires_grad=True)
     actions = x.unsqueeze(0)
     current_obs = ptu.np_to_var(current_ob[None])
     goals = ptu.np_to_var(goal[None])
     num_steps_left = ptu.np_to_var(np.zeros((1,1)))
     prob_reach = self.beta_q(current_obs, actions, goals, num_steps_left)
     loss = - prob_reach
     loss_np = ptu.get_numpy(prob_reach)[0].astype(np.float64)
     loss.backward()
     gradient_np = ptu.get_numpy(x.grad).astype(np.float64)
     return loss_np, gradient_np
Exemplo n.º 19
0
    def __init__(self,
                 representation_size,
                 input_size,
                 hidden_sizes=list([64, 128, 64]),
                 init_w=1e-3,
                 hidden_init=ptu.fanin_init,
                 output_activation=identity,
                 output_scale=1,
                 layer_norm=False,
                 normalize=True,
                 train_data_mean=None,
                 train_data_std=None,
                 **kwargs):
        self.save_init_params(locals())
        super().__init__()
        self.representation_size = representation_size
        self.hidden_init = hidden_init
        self.output_activation = output_activation
        self.dist_mu = np.zeros(self.representation_size)
        self.dist_std = np.ones(self.representation_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.init_w = init_w
        hidden_sizes = list(hidden_sizes)
        self.input_size = input_size
        self.encoder = TwoHeadMlp(hidden_sizes,
                                  representation_size,
                                  representation_size,
                                  input_size,
                                  layer_norm=layer_norm,
                                  hidden_init=hidden_init,
                                  output_activation=output_activation,
                                  init_w=init_w)
        hidden_sizes.reverse()
        self.decoder = Mlp(hidden_sizes,
                           input_size,
                           representation_size,
                           layer_norm=layer_norm,
                           hidden_init=hidden_init,
                           output_activation=output_activation,
                           init_w=init_w)
        self.output_scale = output_scale

        self.normalize = normalize
        if train_data_mean is None:
            self.train_data_mean = ptu.np_to_var(np.zeros(input_size))
        else:
            self.train_data_mean = train_data_mean
        if train_data_std is None:
            self.train_data_std = ptu.np_to_var(np.ones(input_size))
        else:
            self.train_data_std = train_data_std
Exemplo n.º 20
0
    def __init__(
            self,
            train_dataset,
            test_dataset,
            model,
            batch_size=128,
            beta=0.5,
            beta_schedule=None,
            lr=1e-3,
            extra_recon_logging=dict(),
            recon_weights=None,
            recon_loss_type='mse',
            **kwargs
    ):
        assert recon_loss_type in ['mse', 'wse']
        self.batch_size = batch_size
        self.beta = beta
        self.beta_schedule = beta_schedule
        if self.beta_schedule is None:
            self.beta_schedule = ConstantSchedule(self.beta)

        if ptu.gpu_enabled():
            model.cuda()

        self.model = model
        self.representation_size = model.representation_size

        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.train_dataset, self.test_dataset = train_dataset, test_dataset
        assert self.train_dataset['next_obs'].dtype == np.float32
        assert self.test_dataset['next_obs'].dtype ==np.float32
        assert self.train_dataset['obs'].dtype == np.float32
        assert self.test_dataset['obs'].dtype == np.float32
        self.normalize = model.normalize
        self.mse = nn.MSELoss()

        if self.normalize:
            self.train_data_mean = ptu.np_to_var(np.mean(self.train_dataset['next_obs'], axis=0))
            np_std = np.std(self.train_dataset['next_obs'], axis=0)
            for i in range(len(np_std)):
                if np_std[i] < 1e-3:
                    np_std[i] = 1.0
            self.train_data_std = ptu.np_to_var(np_std)

            self.model.train_data_mean = self.train_data_mean
            self.model.train_data_std = self.train_data_std

        self.extra_recon_logging = extra_recon_logging
        self.recon_weights = recon_weights
        self.recon_loss_type = recon_loss_type
Exemplo n.º 21
0
 def cost_function(self, x, current_ob):
     self.forward -= time.time()
     x = ptu.np_to_var(x, requires_grad=True)
     current_ob = ptu.np_to_var(current_ob)
     loss = (self.lagrange_multipler *
             self._feasibility_cost_function(x, current_ob) +
             self._env_cost_function(x, current_ob))
     loss_np = ptu.get_numpy(loss)[0].astype(np.float64)
     self.forward += time.time()
     self.backward -= time.time()
     loss.squeeze(0).backward()
     gradient_np = ptu.get_numpy(x.grad).astype(np.float64)
     self.backward += time.time()
     return loss_np, gradient_np
Exemplo n.º 22
0
def dump_reconstructions(vae_env, epoch, n_recon=16):
    from railrl.core import logger
    import os.path as osp
    from torchvision.utils import save_image

    if vae_env.use_vae_dataset and vae_env.vae_dataset_path is not None:
        from multiworld.core.image_env import normalize_image
        from railrl.misc.asset_loader import local_path_from_s3_or_local_path
        filename = local_path_from_s3_or_local_path(vae_env.vae_dataset_path)
        dataset = np.load(filename).item()
        sampled_idx = np.random.choice(dataset['next_obs'].shape[0], n_recon)
        if vae_env.vae_input_key_prefix == 'state':
            states = dataset['next_obs'][sampled_idx]
            imgs = ptu.np_to_var(
                vae_env.wrapped_env.states_to_images(states)
            )
            recon_samples, _, _ = vae_env.vae(ptu.np_to_var(states))
            recon_imgs = ptu.np_to_var(
                vae_env.wrapped_env.states_to_images(ptu.get_numpy(recon_samples))
            )
        else:
            imgs = ptu.np_to_var(
                normalize_image(dataset['next_obs'][sampled_idx])
            )
            recon_imgs, _, _, _ = vae_env.vae(imgs)
        del dataset
    else:
        return

    comparison = torch.cat([
        imgs.narrow(start=0, length=vae_env.wrapped_env.image_length, dimension=1).contiguous().view(
            -1,
            vae_env.wrapped_env.channels,
            vae_env.wrapped_env.imsize,
            vae_env.wrapped_env.imsize
        ),
        recon_imgs.contiguous().view(
            n_recon,
            vae_env.wrapped_env.channels,
            vae_env.wrapped_env.imsize,
            vae_env.wrapped_env.imsize
        )[:n_recon]
    ])

    if epoch is not None:
        save_dir = osp.join(logger.get_snapshot_dir(), 'r_%d.png' % epoch)
    else:
        save_dir = osp.join(logger.get_snapshot_dir(), 'r.png')
    save_image(comparison.data.cpu(), save_dir, nrow=n_recon)
Exemplo n.º 23
0
 def _cost_function(self, x, order):
     x = ptu.np_to_var(x, requires_grad=True)
     loss = 0
     for action, next_state in self.split(x):
         next_features_predicted = next_state[self.goal_slice]
         desired_features = ptu.np_to_var(
             self.env.multitask_goal[self.multitask_goal_slice] *
             np.ones(next_features_predicted.shape))
         diff = next_features_predicted - desired_features
         loss += (diff**2).sum()
     if order == 0:
         return ptu.get_numpy(loss)[0]
     elif order == 1:
         loss.squeeze(0).backward()
         return ptu.get_numpy(x.grad)
Exemplo n.º 24
0
 def get_loss(training=False):
     buffer = replay_buffer.get_replay_buffer(training)
     batch = buffer.random_batch(batch_size)
     obs = ptu.np_to_var(batch['observations'], requires_grad=False)
     goals = ptu.np_to_var(batch['goal_states'], requires_grad=False)
     goal = goal_chooser(obs, goals)
     actions = argmax_q(obs, goal, discount)
     final_state_predicted = goal_conditioned_model(
         obs,
         actions,
         goal,
         discount,
     ) + obs
     rewards = goal_chooser.reward_function(final_state_predicted, goals)
     return -rewards.mean()
Exemplo n.º 25
0
 def choose_action_to_reach_adam(self, current_ob, goal):
     n_parts = 100
     x0 = np.vstack([
         self.env.action_space.sample()
         for _ in range(n_parts)
     ])
     current_obs = ptu.np_to_var(current_ob).unsqueeze(0).repeat(n_parts, 1)
     goals = ptu.np_to_var(goal).unsqueeze(0).repeat(n_parts, 1)
     num_steps_left = ptu.np_to_var(np.zeros((n_parts, 1)))
     best_action, _ = fmin_adam_torch(
         self._action_cost_batch,
         x0,
         f_args=(current_obs, goals, num_steps_left),
     )
     return best_action
Exemplo n.º 26
0
 def get_action(self, obs):
     action_inits = self.sample_actions()
     actions = ptu.np_to_var(action_inits, requires_grad=True)
     obs = self.expand_np_to_var(obs)
     optimizer = optim.Adam([actions], self.learning_rate)
     losses = -self.qf(
         obs,
         actions,
         self._goal_batch,
         self._tau_batch,
     )
     for _ in range(self.num_gradient_steps):
         loss = losses.mean()
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
         losses = -self.qf(
             obs,
             actions,
             self._goal_batch,
             self._tau_batch,
         )
     losses_np = ptu.get_numpy(losses)
     best_action_i = np.argmin(losses_np)
     return ptu.get_numpy(actions[best_action_i, :]), {}
Exemplo n.º 27
0
 def replan(self, current_ob, goal):
     if self.last_solution is None or not self.warm_start:
         solution = []
         for i in range(self.planning_horizon):
             solution.append(current_ob)
         self.last_solution = np.hstack(solution)
     self.desired_features_torch = ptu.np_to_var(goal[None].repeat(
         self.planning_horizon, 0))
     self.forward = self.backward = 0
     start = time.time()
     x, f, d = optimize.fmin_l_bfgs_b(self.cost_function,
                                      self.last_solution,
                                      args=(current_ob, ),
                                      bounds=self.bounds,
                                      **self.solver_kwargs)
     total = time.time() - start
     self.totals.append(total)
     # print("total forward: {}".format(self.forward))
     # print("total backward: {}".format(self.backward))
     # print("total: {}".format(total))
     # print("extra: {}".format(total - self.forward - self.backward))
     # print("total mean: {}".format(np.mean(self.totals)))
     warnflag = d['warnflag']
     if warnflag != 0:
         if warnflag == 1:
             print("too many function evaluations or too many iterations")
         else:
             print(d['task'])
     return x
Exemplo n.º 28
0
def simulate_policy(args):
    ptu.set_gpu_mode(True)
    model = pickle.load(open(args.file, "rb"))  # joblib.load(args.file)
    model.to(ptu.device)
    imgs = np.load(args.imgfile)
    import ipdb
    ipdb.set_trace()
    z = model.encode(ptu.np_to_var(imgs))
    samples = model.decode(z).cpu()

    recon_imgs = samples.data.view(64, model.input_channels, model.imsize,
                                   model.imsize)
    recon_imgs = recon_imgs.cpu()
    grid = make_grid(recon_imgs, nrow=8)
    ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).numpy()
    im = Image.fromarray(ndarr)
    im.show()
    # cv2.imshow('img', im)
    # cv2.waitKey(1)
    # for sample in samples:
    #     tensor = tensor.cpu()
    #     img = ptu.get_numpy(tensor)
    comparison = torch.cat([
        recon_imgs,
        imgs,
    ])
    save_dir = osp.join(logger.get_snapshot_dir(), 'r%d.png' % epoch)
    save_image(comparison.data.cpu(), save_dir, nrow=n)
Exemplo n.º 29
0
 def replan(self, current_ob, goal):
     if self.last_solution is None or not self.warm_start:
         solution = []
         for i in range(self.planning_horizon):
             solution.append(np.zeros(self.action_dim))
             solution.append(current_ob)
         self.last_solution = np.hstack(solution)
     self.desired_features_torch = ptu.np_to_var(
         goal[None].repeat(self.planning_horizon, 0)
     )
     self.forward = self.backward = 0
     start = time.time()
     x, f, d = optimize.fmin_l_bfgs_b(
         self.cost_function,
         self.last_solution,
         args=(current_ob,),
         bounds=self.bounds,
         **self.solver_kwargs
     )
     self.t1 = np.array([
         1, 0, 1, 0,
         1, 0, 2, 0,
         0, 1, 2, 1,
         0, 1, 2, 2,
     ])
     self.t2 = np.array([
         1, 0, 2, 0,
         0, 1, 2, 1,
         0, 1, 2, 2,
         -1, 1, 1, 3,
     ])
     total = time.time() - start
     self.totals.append(total)
     process_lbfgs_debug_dict(d)
     return x
Exemplo n.º 30
0
def fmin_adam_torch(
        batch_torch_f,
        x0_np,
        f_args=None,
        f_kwargs=None,
        lr=1e-3,
        num_steps=100,
):
    if f_args is None:
        f_args = tuple()
    if f_kwargs is None:
        f_kwargs = {}

    x = ptu.np_to_var(x0_np, requires_grad=True)
    optimizer = Adam([x], lr=lr)
    for _ in range(num_steps):
        loss = batch_torch_f(x, *f_args, **f_kwargs).sum()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    final_values_np = ptu.get_numpy(batch_torch_f(x, *f_args, **f_kwargs))
    final_x_np = ptu.get_numpy(x)
    min_i = np.argmin(final_values_np)
    return final_x_np[min_i], final_values_np[min_i]