示例#1
0
def test_actions_to_onehot():
    all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]])
    action_size = [2, 1, 3]
    oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size)
    expected_result = [
        torch.tensor([[0, 1], [0, 1]], dtype=torch.float),
        torch.tensor([[1], [1]], dtype=torch.float),
        torch.tensor([[0, 0, 1], [0, 0, 1]], dtype=torch.float),
    ]
    for res, exp in zip(oh_actions, expected_result):
        assert torch.equal(res, exp)
示例#2
0
    def _condense_q_streams(
            self, q_output: Dict[str, torch.Tensor],
            discrete_actions: torch.Tensor) -> Dict[str, torch.Tensor]:
        condensed_q_output = {}
        onehot_actions = ModelUtils.actions_to_onehot(discrete_actions,
                                                      self.act_size)
        for key, item in q_output.items():
            branched_q = ModelUtils.break_into_branches(item, self.act_size)
            only_action_qs = torch.stack([
                torch.sum(_act * _q, dim=1, keepdim=True)
                for _act, _q in zip(onehot_actions, branched_q)
            ])

            condensed_q_output[key] = torch.mean(only_action_qs, dim=0)
        return condensed_q_output
示例#3
0
 def to_flat(self, discrete_branches: List[int]) -> torch.Tensor:
     """
     Flatten this AgentAction into a single torch Tensor of dimension (batch, num_continuous + num_one_hot_discrete).
     Discrete actions are converted into one-hot and concatenated with continuous actions.
     :param discrete_branches: List of sizes for discrete actions.
     :return: Tensor of flattened actions.
     """
     # if there are any discrete actions, create one-hot
     if self.discrete_list is not None and self.discrete_list:
         discrete_oh = ModelUtils.actions_to_onehot(self.discrete_tensor,
                                                    discrete_branches)
         discrete_oh = torch.cat(discrete_oh, dim=1)
     else:
         discrete_oh = torch.empty(0)
     return torch.cat([self.continuous_tensor, discrete_oh], dim=-1)
示例#4
0
def test_predict_minimum_training():
    # of 5 numbers, predict index of min
    np.random.seed(1336)
    torch.manual_seed(1336)
    n_k = 5
    size = n_k + 1
    embedding_size = 64
    entity_embeddings = EntityEmbeddings(size, [size],
                                         embedding_size, [n_k],
                                         concat_self=False)
    transformer = ResidualSelfAttention(embedding_size)
    l_layer = LinearEncoder(embedding_size, 2, n_k)
    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        list(entity_embeddings.parameters()) + list(transformer.parameters()) +
        list(l_layer.parameters()),
        lr=0.001,
        weight_decay=1e-6,
    )

    batch_size = 200
    onehots = ModelUtils.actions_to_onehot(
        torch.range(0, n_k - 1).unsqueeze(1), [n_k])[0]
    onehots = onehots.expand((batch_size, -1, -1))
    losses = []
    for _ in range(400):
        num = np.random.randint(0, n_k)
        inp = torch.rand((batch_size, num + 1, 1))
        with torch.no_grad():
            # create the target : The minimum
            argmin = torch.argmin(inp, dim=1)
            argmin = argmin.squeeze()
            argmin = argmin.detach()
        sliced_oh = onehots[:, :num + 1]
        inp = torch.cat([inp, sliced_oh], dim=2)

        embeddings = entity_embeddings(inp, [inp])
        masks = EntityEmbeddings.get_masks([inp])
        prediction = transformer(embeddings, masks)
        prediction = l_layer(prediction)
        ce = loss(prediction, argmin)
        losses.append(ce.item())
        print(ce.item())
        optimizer.zero_grad()
        ce.backward()
        optimizer.step()
    assert np.array(losses[-20:]).mean() < 0.1
示例#5
0
 def forward(self, action: AgentAction) -> torch.Tensor:
     """
     Returns a tensor corresponding the flattened action
     :param action: An AgentAction object
     """
     action_list: List[torch.Tensor] = []
     if self._specs.continuous_size > 0:
         action_list.append(action.continuous_tensor)
     if self._specs.discrete_size > 0:
         flat_discrete = torch.cat(
             ModelUtils.actions_to_onehot(
                 torch.as_tensor(action.discrete_tensor, dtype=torch.long),
                 self._specs.discrete_branches,
             ),
             dim=1,
         )
         action_list.append(flat_discrete)
     return torch.cat(action_list, dim=1)
    def predict_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
        """
        Uses the current state embedding and the action of the mini_batch to predict
        the next state embedding.
        """
        if self._policy_specs.is_action_continuous():
            action = ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
        else:
            action = torch.cat(
                ModelUtils.actions_to_onehot(
                    ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
                    self._policy_specs.discrete_action_branches,
                ),
                dim=1,
            )
        forward_model_input = torch.cat(
            (self.get_current_state(mini_batch), action), dim=1
        )

        return self.forward_model_next_state_prediction(forward_model_input)
 def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
     """
     Computes the inverse loss for a mini_batch. Corresponds to the error on the
     action prediction (given the current and next state).
     """
     predicted_action = self.predict_action(mini_batch)
     actions = AgentAction.from_dict(mini_batch)
     _inverse_loss = 0
     if self._action_spec.continuous_size > 0:
         sq_difference = (
             actions.continuous_tensor - predicted_action.continuous
         ) ** 2
         sq_difference = torch.sum(sq_difference, dim=1)
         _inverse_loss += torch.mean(
             ModelUtils.dynamic_partition(
                 sq_difference,
                 ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
                 2,
             )[1]
         )
     if self._action_spec.discrete_size > 0:
         true_action = torch.cat(
             ModelUtils.actions_to_onehot(
                 actions.discrete_tensor, self._action_spec.discrete_branches
             ),
             dim=1,
         )
         cross_entropy = torch.sum(
             -torch.log(predicted_action.discrete + self.EPSILON) * true_action,
             dim=1,
         )
         _inverse_loss += torch.mean(
             ModelUtils.dynamic_partition(
                 cross_entropy,
                 ModelUtils.list_to_tensor(
                     mini_batch["masks"], dtype=torch.float
                 ),  # use masks not action_masks
                 2,
             )[1]
         )
     return _inverse_loss
 def compute_inverse_loss(self, mini_batch: AgentBuffer) -> torch.Tensor:
     """
     Computes the inverse loss for a mini_batch. Corresponds to the error on the
     action prediction (given the current and next state).
     """
     predicted_action = self.predict_action(mini_batch)
     if self._policy_specs.is_action_continuous():
         sq_difference = (
             ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
             - predicted_action
         ) ** 2
         sq_difference = torch.sum(sq_difference, dim=1)
         return torch.mean(
             ModelUtils.dynamic_partition(
                 sq_difference,
                 ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
                 2,
             )[1]
         )
     else:
         true_action = torch.cat(
             ModelUtils.actions_to_onehot(
                 ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
                 self._policy_specs.discrete_action_branches,
             ),
             dim=1,
         )
         cross_entropy = torch.sum(
             -torch.log(predicted_action + self.EPSILON) * true_action, dim=1
         )
         return torch.mean(
             ModelUtils.dynamic_partition(
                 cross_entropy,
                 ModelUtils.list_to_tensor(
                     mini_batch["masks"], dtype=torch.float
                 ),  # use masks not action_masks
                 2,
             )[1]
         )
示例#9
0
    def _update_batch(self, mini_batch_demo: Dict[str, np.ndarray],
                      n_sequences: int) -> Dict[str, float]:
        """
        Helper function for update_batch.
        """
        vec_obs = [ModelUtils.list_to_tensor(mini_batch_demo["vector_obs"])]
        act_masks = None
        if self.policy.use_continuous_act:
            expert_actions = ModelUtils.list_to_tensor(
                mini_batch_demo["actions"])
        else:
            raw_expert_actions = ModelUtils.list_to_tensor(
                mini_batch_demo["actions"], dtype=torch.long)
            expert_actions = ModelUtils.actions_to_onehot(
                raw_expert_actions, self.policy.act_size)
            act_masks = ModelUtils.list_to_tensor(
                np.ones(
                    (
                        self.n_sequences * self.policy.sequence_length,
                        sum(self.policy.behavior_spec.action_spec.
                            discrete_branches),
                    ),
                    dtype=np.float32,
                ))

        memories = []
        if self.policy.use_recurrent:
            memories = torch.zeros(1, self.n_sequences, self.policy.m_size)

        if self.policy.use_vis_obs:
            vis_obs = []
            for idx, _ in enumerate(
                    self.policy.actor_critic.network_body.visual_processors):
                vis_ob = ModelUtils.list_to_tensor(
                    mini_batch_demo["visual_obs%d" % idx])
                vis_obs.append(vis_ob)
        else:
            vis_obs = []

        (
            selected_actions,
            clipped_actions,
            all_log_probs,
            _,
            _,
        ) = self.policy.sample_actions(
            vec_obs,
            vis_obs,
            masks=act_masks,
            memories=memories,
            seq_len=self.policy.sequence_length,
            all_log_probs=True,
        )
        bc_loss = self._behavioral_cloning_loss(clipped_actions, all_log_probs,
                                                expert_actions)
        self.optimizer.zero_grad()
        bc_loss.backward()

        self.optimizer.step()
        run_out = {"loss": bc_loss.item()}
        return run_out