def unique_sequences( src: _k2.RaggedInt, need_num_repeats: bool = True, need_new2old_indexes: bool = False) -> \ Tuple[_k2.RaggedInt, Optional[_k2.RaggedInt], Optional[torch.Tensor]]: # noqa '''Remove repeated sequences. If `src` has two axes, this will return the unique sub-lists (in a possibly different order, but without repeats). If `src` has 3 axes, it will do the above but separately for each index on axis 0; if more than 3 axes, the earliest axes will be ignored. Caution: It does not completely guarantee that all unique sequences will be present in the output, as it relies on a hash and ignores collisions. If several sequences have the same hash, only one of them is kept, even if the actual content in the sequence is different. Caution: Even if there are no repeated sequences, the output may be different from `src`. That is, `new2old_indexes` may NOT be an identity map even if nothing was removed. Args: src: The input ragged tensor. Must have `src.num_axes() == 2` or `src_num_axes() == 3` need_num_repeats: If True, it also returns the number of repeats of each sequence. need_new2old_indexes: If true, it returns an extra 1-D tensor `new2old_indexes`. If `src` has 2 axes, this tensor contains `src_idx0`; if `src` has 3 axes, this tensor contains `src_idx01`. Caution: For repeated sublists, only one of them is kept. The choice of which one to keep is **deterministic** and is an implementation detail. Returns: Returns a tuple containing: - ans: A ragged tensor with the same number of axes as `src` and possibly fewer elements due to removing repeated sequences on the last axis (and with the last-but-one indexes possibly in a different order). - num_repeats: A tensor containing number of repeats of each returned sequence if `need_num_repeats` is True; it is None otherwise. If it is not None, num_repeats.num_axes() is always 2. If ans.num_axes() is 2, then num_repeats.dim0() == 1 and num_repeats.num_elements() == ans.dim0(). If ans.num_axes() is 3, then num_repeats.dim0() == ans.dim0() and num_repeats.num_elements() == ans.tot_size(1). - new2old_indexes: A 1-D tensor whose i-th element specifies the input sublist that the i-th output sublist corresponds to. ''' return _k2.unique_sequences(src, need_num_repeats=need_num_repeats, need_new2old_indexes=need_new2old_indexes)
def unique_sequences( src: _k2.RaggedInt, need_num_repeats: bool = True ) -> Tuple[_k2.RaggedInt, Optional[_k2.RaggedInt]]: # noqa '''Remove repeated sequences. If `src` has two axes, this will return the unique sub-lists (in a possibly different order, but without repeats). If `src` has 3 axes, it will do the above but separately for each index on axis 0; if more than 3 axes, the earliest axes will be ignored. Caution: It does not completely guarantee that all unique sequences will be present in the output, as it relies on a hash and ignores collisions. If several sequences have the same hash, only one of them is kept, even if the actual content in the sequence is different. Args: src: The input ragged tensor. Must have `src.num_axes() == 2` or `src_num_axes() == 3` need_num_repeats: If True, it also returns the number of repeats of each sequence. Returns: Returns a tuple containing: - ans: A ragged tensor with the same number of axes as `src` and possibly fewer elements due to removing repeated sequences on the last axis (and with the last-but-one indexes possibly in a different order). - num_repeats: A tensor containing number of repeats of each returned sequence if `need_num_repeats` is True; it is None otherwise. If it is not None, num_repeats.num_axes() is always 2. If ans.num_axes() is 2, then num_repeats.dim0() == 1 and num_repeats.num_elements() == ans.dim0(). If ans.num_axes() is 3, then num_repeats.dim0() == ans.dim0() and num_repeats.num_elements() == ans.tot_size(1). ''' return _k2.unique_sequences(src, need_num_repeats=need_num_repeats)