Пример #1
0
 def forward(self, embeddings: FloatTensorType) -> FloatTensorType:
     match_shape(embeddings, ..., self.dim)
     # We add a dimension so that matmul performs a matrix-vector product.
     return (torch.matmul(self.linear_transformation,
                          embeddings.unsqueeze(-1)).squeeze(-1)
             + self.translation)
Пример #2
0
    def prepare_negatives(
        self,
        pos_input: EntityList,
        pos_embs: FloatTensorType,
        module: AbstractEmbedding,
        type_: Negatives,
        num_uniform_neg: int,
        rel: Union[int, LongTensorType],
        entity_type: str,
        operator: Union[None, AbstractOperator, AbstractDynamicOperator],
    ) -> Tuple[FloatTensorType, Mask]:
        """Given some chunked positives, set up chunks of negatives.

        This function operates on one side (left-hand or right-hand) at a time.
        It takes all the information about the positives on that side (the
        original input value, the corresponding embeddings, and the module used
        to convert one to the other). It then produces negatives for that side
        according to the specified mode. The positive embeddings come in in
        chunked form and the negatives are produced within each of these chunks.
        The negatives can be either none, or the positives from the same chunk,
        or all the possible entities. In the second mode, uniformly-sampled
        entities can also be appended to the per-chunk negatives (each chunk
        having a different sample). This function returns both the chunked
        embeddings of the negatives and a mask of the same size as the chunked
        positives-vs-negatives scores, whose non-zero elements correspond to the
        scores that must be ignored.

        """
        num_pos = len(pos_input)
        num_chunks, chunk_size, dim = match_shape(pos_embs, -1, -1, -1)
        last_chunk_size = num_pos - (num_chunks - 1) * chunk_size

        ignore_mask: Mask = []
        if type_ is Negatives.NONE:
            neg_embs = pos_embs.new_empty((num_chunks, 0, dim))
        elif type_ is Negatives.UNIFORM:
            uniform_neg_embs = module.sample_entities(num_chunks,
                                                      num_uniform_neg)
            neg_embs = self.adjust_embs(uniform_neg_embs, rel, entity_type,
                                        operator)
        elif type_ is Negatives.BATCH_UNIFORM:
            neg_embs = pos_embs
            if num_uniform_neg > 0:
                try:
                    uniform_neg_embs = module.sample_entities(
                        num_chunks, num_uniform_neg)
                except NotImplementedError:
                    pass  # only use pos_embs i.e. batch negatives
                else:
                    neg_embs = torch.cat(
                        [
                            pos_embs,
                            self.adjust_embs(uniform_neg_embs, rel,
                                             entity_type, operator),
                        ],
                        dim=1,
                    )

            chunk_indices = torch.arange(chunk_size,
                                         dtype=torch.long,
                                         device=pos_embs.device)
            last_chunk_indices = chunk_indices[:last_chunk_size]
            # Ignore scores between positive pairs.
            ignore_mask.append(
                (slice(num_chunks - 1), chunk_indices, chunk_indices))
            ignore_mask.append((-1, last_chunk_indices, last_chunk_indices))
            # In the last chunk, ignore the scores between the positives that
            # are not padding (i.e., the first last_chunk_size ones) and the
            # negatives that are padding (i.e., all of them except the first
            # last_chunk_size ones). Stop the last slice at chunk_size so that
            # it doesn't also affect the uniformly-sampled negatives.
            ignore_mask.append(
                (-1, slice(last_chunk_size), slice(last_chunk_size,
                                                   chunk_size)))

        elif type_ is Negatives.ALL:
            pos_input_ten = pos_input.to_tensor()
            neg_embs = self.adjust_embs(
                module.get_all_entities().expand(num_chunks, -1, dim),
                rel,
                entity_type,
                operator,
            )

            if num_uniform_neg > 0:
                logger.warning("Adding uniform negatives makes no sense "
                               "when already using all negatives")

            chunk_indices = torch.arange(chunk_size,
                                         dtype=torch.long,
                                         device=pos_embs.device)
            last_chunk_indices = chunk_indices[:last_chunk_size]
            # Ignore scores between positive pairs: since the i-th such pair has
            # the pos_input[i] entity on this side, ignore_mask[i, pos_input[i]]
            # must be set to 1 for every i. This becomes slightly more tricky as
            # the rows may be wrapped into multiple chunks (the last of which
            # may be smaller).
            ignore_mask.append((
                torch.arange(num_chunks - 1,
                             dtype=torch.long,
                             device=pos_embs.device).unsqueeze(1),
                chunk_indices.unsqueeze(0),
                pos_input_ten[:-last_chunk_size].view(num_chunks - 1,
                                                      chunk_size),
            ))
            ignore_mask.append(
                (-1, last_chunk_indices, pos_input_ten[-last_chunk_size:]))

        else:
            raise NotImplementedError("Unknown negative type %s" % type_)

        return neg_embs, ignore_mask
Пример #3
0
    def forward_direction_agnostic(
        self,
        src: EntityList,
        dst: EntityList,
        rel: Union[int, LongTensorType],
        src_entity_type: str,
        dst_entity_type: str,
        src_operator: Union[None, AbstractOperator, AbstractDynamicOperator],
        dst_operator: Union[None, AbstractOperator, AbstractDynamicOperator],
        src_module: AbstractEmbedding,
        dst_module: AbstractEmbedding,
        src_pos: FloatTensorType,
        dst_pos: FloatTensorType,
        chunk_size: int,
        src_negative_sampling_method: Negatives,
        dst_negative_sampling_method: Negatives,
    ):
        num_pos = len(src)
        assert len(dst) == num_pos

        src_pos = self.adjust_embs(src_pos, rel, src_entity_type, src_operator)
        dst_pos = self.adjust_embs(dst_pos, rel, dst_entity_type, dst_operator)

        num_chunks = ceil_of_ratio(num_pos, chunk_size)
        src_dim = src_pos.size(-1)
        dst_dim = dst_pos.size(-1)
        if num_pos < num_chunks * chunk_size:
            src_padding = src_pos.new_zeros(()).expand(
                (num_chunks * chunk_size - num_pos, src_dim))
            src_pos = torch.cat((src_pos, src_padding), dim=0)
            dst_padding = dst_pos.new_zeros(()).expand(
                (num_chunks * chunk_size - num_pos, dst_dim))
            dst_pos = torch.cat((dst_pos, dst_padding), dim=0)
        src_pos = src_pos.view((num_chunks, chunk_size, src_dim))
        dst_pos = dst_pos.view((num_chunks, chunk_size, dst_dim))

        src_neg, src_ignore_mask = self.prepare_negatives(
            src,
            src_pos,
            src_module,
            src_negative_sampling_method,
            self.num_uniform_negs,
            rel,
            src_entity_type,
            src_operator,
        )
        dst_neg, dst_ignore_mask = self.prepare_negatives(
            dst,
            dst_pos,
            dst_module,
            dst_negative_sampling_method,
            self.num_uniform_negs,
            rel,
            dst_entity_type,
            dst_operator,
        )

        pos_scores, src_neg_scores, dst_neg_scores = self.comparator(
            src_pos, dst_pos, src_neg, dst_neg)

        pos_scores = pos_scores.float()
        src_neg_scores = src_neg_scores.float()
        dst_neg_scores = dst_neg_scores.float()

        # The masks tell us which negative scores (i.e., scores for non-existing
        # edges) must be ignored because they come from pairs we don't actually
        # intend to compare (say, positive pairs or interactions with padding).
        # We do it by replacing them with a "very negative" value so that they
        # are considered spot-on predictions with minimal impact on the loss.
        for ignore_mask in src_ignore_mask:
            src_neg_scores[ignore_mask] = -1e9
        for ignore_mask in dst_ignore_mask:
            dst_neg_scores[ignore_mask] = -1e9

        # De-chunk the scores and ignore the ones whose positives were padding.
        pos_scores = pos_scores.flatten(0, 1)[:num_pos]
        src_neg_scores = src_neg_scores.flatten(0, 1)[:num_pos]
        dst_neg_scores = dst_neg_scores.flatten(0, 1)[:num_pos]
        reg = None
        if self.regularizer is not None:
            assert (src_operator is None) != (
                dst_operator is
                None), "Exactly one of src or dst operator should be None"
            operator = src_operator if src_operator is not None else dst_operator
            if self.num_dynamic_rels > 0:
                reg = self.regularizer.forward_dynamic(src_pos, dst_pos,
                                                       operator, rel)
            else:
                reg = self.regularizer.forward(src_pos, dst_pos, operator)

        return pos_scores, src_neg_scores, dst_neg_scores, reg
Пример #4
0
 def prepare(self, embs: FloatTensorType) -> FloatTensorType:
     # Dividing by the norm costs N * dim divisions, multiplying by the
     # reciprocal of the norm costs N divisions and N * dim multiplications.
     # The latter one is faster.
     norm = embs.norm(2, dim=-1)
     return embs * norm.reciprocal().unsqueeze(-1)
def save_embeddings(hf: h5py.File, embeddings: FloatTensorType) -> None:
    hf.create_dataset(EMBEDDING_DATASET, data=embeddings.numpy())
Пример #6
0
 def prepare_embs_for_reg(self, embs: FloatTensorType) -> FloatTensorType:
     return embs.abs()
Пример #7
0
 def forward(
     self, embeddings: FloatTensorType, operator_idxs: LongTensorType
 ) -> FloatTensorType:
     match_shape(embeddings, ..., self.dim)
     match_shape(operator_idxs, *embeddings.size()[:-1])
     return self.diagonals.to(device=embeddings.device)[operator_idxs] * embeddings
Пример #8
0
 def forward(
     self, embeddings: FloatTensorType, operator_idxs: LongTensorType
 ) -> FloatTensorType:
     match_shape(embeddings, ..., self.dim)
     match_shape(operator_idxs, *embeddings.size()[:-1])
     return embeddings