Пример #1
0
class DaliRnntIterator(object):
    """
    Returns batches of data for RNN-T training:
    preprocessed_signal, preprocessed_signal_length, transcript, transcript_length

    This iterator is not meant to be the entry point to Dali processing pipeline.
    Use DataLoader instead.
    """
    def __init__(self,
                 dali_pipelines,
                 transcripts,
                 tokenizer,
                 batch_size,
                 shard_size,
                 pipeline_type,
                 normalize_transcripts=False):
        self.normalize_transcripts = normalize_transcripts
        self.tokenizer = tokenizer
        self.batch_size = batch_size
        from nvidia.dali.plugin.pytorch import DALIGenericIterator
        from nvidia.dali.plugin.base_iterator import LastBatchPolicy

        # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe
        if pipeline_type == 'val':
            self.dali_it = DALIGenericIterator(
                dali_pipelines, ["audio", "label", "audio_shape"],
                reader_name="Reader",
                dynamic_shape=True,
                auto_reset=True,
                last_batch_policy=LastBatchPolicy.PARTIAL)
        else:
            self.dali_it = DALIGenericIterator(
                dali_pipelines, ["audio", "label", "audio_shape"],
                size=shard_size,
                dynamic_shape=True,
                auto_reset=True,
                last_batch_padded=True,
                last_batch_policy=LastBatchPolicy.PARTIAL)

        self.tokenize(transcripts)

    def tokenize(self, transcripts):
        transcripts = [transcripts[i] for i in range(len(transcripts))]
        if self.normalize_transcripts:
            transcripts = [
                normalize_string(t, self.tokenizer.charset,
                                 punctuation_map(self.tokenizer.charset))
                for t in transcripts
            ]
        transcripts = [self.tokenizer.tokenize(t) for t in transcripts]
        transcripts = [torch.tensor(t) for t in transcripts]
        self.tr = np.array(transcripts, dtype=object)
        self.t_sizes = torch.tensor([len(t) for t in transcripts],
                                    dtype=torch.int32)
        self._min_token_sequence_len = int(torch.min(self.t_sizes))
        self._max_token_sequence_len = int(torch.max(self.t_sizes))
        self.padded_tr = [
            np.pad(t, (0, self._max_token_sequence_len - ts))
            for t, ts in zip(self.tr, self.t_sizes)
        ]
        self.padded_tr = np.array(self.padded_tr)

    def _gen_transcripts(self, labels, normalize_transcripts: bool = True):
        """
        Generate transcripts in format expected by NN
        """
        ids = labels.flatten().numpy()
        transcripts = self.padded_tr[ids]
        # padding below not required anymore cause padding is done apriori
        #transcripts = torch.nn.utils.rnn.pad_sequence(transcripts, batch_first=True)

        return transcripts, self.t_sizes[ids]

    def __next__(self):
        data = self.dali_it.__next__()
        audio, audio_shape = data[0]["audio"], data[0]["audio_shape"][:, 1]
        if audio.shape[0] == 0:
            # empty tensor means, other GPUs got last samples from dataset
            # and this GPU has nothing to do; calling `__next__` raises StopIteration
            return self.dali_it.__next__()
        audio = audio[:, :, :audio_shape.max()]  # the last batch
        transcripts, transcripts_lengths = self._gen_transcripts(
            data[0]["label"])
        return audio, audio_shape, transcripts, transcripts_lengths

    def next(self):
        return self.__next__()

    def __iter__(self):
        return self

    @property
    def min_token_sequence_len(self):
        return self._min_token_sequence_len

    @property
    def max_token_sequence_len(self):
        return self._max_token_sequence_len
Пример #2
0
class DaliJasperIterator(object):
    """
    Returns batches of data for Jasper training:
    preprocessed_signal, preprocessed_signal_length, transcript, transcript_length

    This iterator is not meant to be the entry point to Dali processing pipeline.
    Use DataLoader instead.
    """
    def __init__(self, dali_pipelines, transcripts, symbols, batch_size,
                 reader_name, train_iterator: bool):
        self.transcripts = transcripts
        self.symbols = symbols
        self.batch_size = batch_size
        from nvidia.dali.plugin.pytorch import DALIGenericIterator
        from nvidia.dali.plugin.base_iterator import LastBatchPolicy

        # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe
        self.dali_it = DALIGenericIterator(
            dali_pipelines, ["audio", "label", "audio_shape"],
            reader_name=reader_name,
            dynamic_shape=True,
            auto_reset=True,
            last_batch_policy=LastBatchPolicy.PARTIAL)

    @staticmethod
    def _str2list(s: str):
        """
        Returns list of floats, that represents given string.
        '0.' denotes separator
        '1.' denotes 'a'
        '27.' denotes "'"
        Assumes, that the string is lower case.
        """
        list = []
        for c in s:
            if c == "'":
                list.append(27.)
            else:
                list.append(max(0., ord(c) - 96.))
        return list

    @staticmethod
    def _pad_lists(lists: list, pad_val=0):
        """
        Pads lists, so that all have the same size.
        Returns list with actual sizes of corresponding input lists
        """
        max_length = 0
        sizes = []
        for li in lists:
            sizes.append(len(li))
            max_length = max_length if len(li) < max_length else len(li)
        for li in lists:
            li += [pad_val] * (max_length - len(li))
        return sizes

    def _gen_transcripts(self, labels, normalize_transcripts: bool = True):
        """
        Generate transcripts in format expected by NN
        """
        lists = [
            self._str2list(
                normalize_string(self.transcripts[lab.item()], self.symbols,
                                 punctuation_map(self.symbols)))
            for lab in labels
        ] if normalize_transcripts else [
            self._str2list(self.transcripts[lab.item()]) for lab in labels
        ]
        sizes = self._pad_lists(lists)
        return torch.tensor(lists).cuda(), torch.tensor(
            sizes, dtype=torch.int32).cuda()

    def __next__(self):
        data = self.dali_it.__next__()
        transcripts, transcripts_lengths = self._gen_transcripts(
            data[0]["label"])
        return data[0]["audio"], data[0][
            "audio_shape"][:, 1], transcripts, transcripts_lengths

    def next(self):
        return self.__next__()

    def __iter__(self):
        return self
Пример #3
0
class DALIDataloader():
    """DataLoader for Nvidia DALI augmentation pipeline,
    to handle non-DALI augmentations, this loader utilize Ray to paralellize augmentation
    for every sample in a batch
    """
    def __init__(self,
                 dataset: Type[BasicDatasetWrapper],
                 batch_size: int = 1,
                 num_thread: int = 1,
                 device_id: int = 0,
                 collate_fn: Type[Callable] = None,
                 shuffle: bool = True):
        """Initialization

        Args:
            dataset (Type[BasicDatasetWrapper]): dataset object to be adapted into DALI format
            batch_size (int): How many samples per batch to load
            num_thread (int, optional): Number of CPU threads used by the pipeline. Defaults to 1.
            device_id (int, optional): GPU id to be used for pipeline. Defaults to 0.
            collate_fn (Type[Callable], optional): merges a list of samples to form a mini-batch of Tensor(s). Defaults to None.
            shuffle (bool, optional): set to True to have the data reshuffled at every epoch. Defaults to True.
        """

        iterator = DALIIteratorWrapper(dataset,
                                       batch_size=batch_size,
                                       shuffle=shuffle,
                                       device_id=device_id)

        self.dataset = iterator.dataset
        self.image_auto_pad = self.dataset.image_auto_pad
        self.data_format = dataset.data_format
        self.preprocess_args = iterator.dataset.preprocess_args

        # Initialize DALI only augmentations
        self.augmentations_list = self.dataset.augmentations_list

        dali_augments = None
        external_augments = None
        normalize = True
        if self.dataset.stage == 'train' and self.augmentations_list is not None:
            external_augments = []
            # Handler if using Nvidia DALI, if DALI augmentations is used in experiment file, it must be in the first order
            aug_module_sequence = [
                augment.module for augment in self.augmentations_list
            ]
            if 'nvidia_dali' in aug_module_sequence and aug_module_sequence[
                    0] != 'nvidia_dali':
                raise RuntimeError(
                    'Nvidia DALI augmentation module must be in the first order of the "augmentations" list!, found {}'
                    .format(aug_module_sequence[0]))

            for augment in self.augmentations_list:
                module_name = augment.module
                module_args = augment.args
                if not isinstance(module_args, dict):
                    raise TypeError(
                        "expect augmentation module's args value to be dictionary, got %s"
                        % type(module_args))
                tf_kwargs = module_args
                tf_kwargs['data_format'] = self.data_format
                augments = create_transform(module_name, **tf_kwargs)
                if module_name == 'nvidia_dali':
                    dali_augments = augments
                else:
                    external_augments.append(augments)

            self.external_executors = None
            # If there are any external augments
            if len(external_augments) != 0:
                # do not apply normalization and channel format swap in DALI pipeline
                normalize = False

                # Instantiate external augments executor
                ray.init(ignore_reinit_error=True)
                transforms_list_ref = ray.put(external_augments)
                data_format_ref = ray.put(self.data_format)
                preprocess_args_ref = ray.put(self.preprocess_args)

                self.external_executors = [
                    ExternalAugmentsExecutor.remote(transforms_list_ref,
                                                    data_format_ref,
                                                    preprocess_args_ref,
                                                    self.image_auto_pad)
                    for i in range(batch_size)
                ]

        pipeline = DALIExternalSourcePipeline(dataset_iterator=iterator,
                                              batch_size=batch_size,
                                              num_threads=num_thread,
                                              device_id=device_id,
                                              dali_augments=dali_augments,
                                              normalize=normalize)
        self.labels_pad_value = pipeline.labels_pad_value
        self.original_data_layout = copy.copy(pipeline.original_data_layout)
        self.original_data_layout.remove('images')

        # Additional field to retrieve image shape
        self.output_map = pipeline.pipeline_output_data_layout
        self.dali_pytorch_loader = DALIGenericIterator(
            pipelines=[pipeline],
            output_map=self.output_map,
            size=iterator.size,
            dynamic_shape=True,
            fill_last_batch=False,
            last_batch_padded=True,
            auto_reset=True)
        self.collate_fn = collate_fn
        self.size = self.dali_pytorch_loader.size
        self.batch_size = batch_size

    def __iter__(self):
        return self

    def __next__(self):
        output = self.dali_pytorch_loader.__next__()[
            0]  # Vortex doesn't support multiple pipelines yet

        # Prepare Pytorch style data loader output
        batch = []
        for i in range(len(output['images'])):
            image = output['images'][i].type(torch.float32)

            # DALI still have flaws about padding image to square, this is the workaround by bringing the image shape before padding
            pre_padded_image_size = output['pre_padded_image_shape'][i].cpu(
            )[:2].type(torch.float32)

            if self.image_auto_pad:
                input_size = self.preprocess_args.input_size
                padded_image_size = torch.tensor([input_size, input_size
                                                  ]).type(torch.float32)
                diff_ratio = pre_padded_image_size / padded_image_size
            else:
                image = image[:, :pre_padded_image_size[0].type(torch.int).
                              item(), :pre_padded_image_size[1].type(torch.int
                                                                     ).item()]

            # Prepare labels array
            aug_labels = dict()
            for layout in self.original_data_layout:
                label_output = output[layout][i].numpy()

                # Remove padded value from DALI, this assume that labels dimension 1 shape is same
                rows_with_padded_value = np.unique(
                    np.where(label_output == self.labels_pad_value)[0])
                label_output = np.delete(label_output,
                                         rows_with_padded_value,
                                         axis=0)

                # Placeholder to combine all labels
                if layout == 'original_labels':
                    ret_targets = label_output
                else:
                    if self.image_auto_pad:
                        # DALI still have flaws about padding image to square,
                        # this is the workaround by bringing the image shape before padding
                        label_output = self._fix_coordinates(
                            label_output, layout, diff_ratio)
                    aug_labels[layout] = label_output

            # Modify labels placeholder with augmented labels
            for label_key in self.data_format:
                if label_key in self.original_data_layout:
                    label_data_format = self.data_format[label_key]
                    augmented_label = aug_labels[label_key]

                    # Refactor reshaped landmarks and apply asymmetric coordinates fixing if needed
                    if label_key == 'landmarks':
                        nrof_obj_landmarks = int(
                            augmented_label.size /
                            len(self.data_format['landmarks']['indices']))

                        # Reshape to shape [nrof_objects,nrof_points]
                        augmented_label = augmented_label.reshape(
                            nrof_obj_landmarks,
                            len(self.data_format['landmarks']['indices']))

                        # Coordinates sequence fixing for asymmetric landmarks
                        if 'asymm_pairs' in self.data_format['landmarks']:
                            # Extract flip flag from pipeline output
                            # import pdb; pdb.set_trace()
                            flip_flags = np.array([
                                output[key][i].numpy()
                                for key in output.keys()
                                if key.startswith('flip_flag_')
                            ])
                            flip_count = np.sum(flip_flags)

                            # if flip count mod 2 is even, skip coordinates sequence flipping
                            if flip_count % 2 == 1:
                                n_keypoints = int(len(augmented_label[0]) / 2)
                                augmented_label = augmented_label.reshape(
                                    (-1, n_keypoints, 2))

                                # For each index keypoints pair, swap the position
                                for keypoint_pair in self.data_format.landmarks.asymm_pairs:
                                    keypoint_pair = np.array(keypoint_pair)
                                    augmented_label[:,
                                                    keypoint_pair, :] = augmented_label[:,
                                                                                        keypoint_pair[::
                                                                                                      -1], :]
                                    # Convert back to original format
                                augmented_label = augmented_label.reshape(
                                    (-1, n_keypoints * 2))

                    # Put back augmented labels in the placeholder array for returned labels
                    np.put_along_axis(
                        ret_targets,
                        values=augmented_label,
                        axis=label_data_format['axis'],
                        indices=np.array(
                            label_data_format['indices'])[np.newaxis, :])

            if list(self.data_format.keys()) == ['class_label']:
                ret_targets = ret_targets.flatten().astype('int')
            batch.append((image, torch.tensor(ret_targets)))

        # Apply external (non-DALI) augments, utilizing ray
        if self.external_executors:
            batch = [(image.cpu(), target) for image, target in batch]
            batch_ref = ray.put(batch)
            batch_futures = [
                self.external_executors[index].run.remote(batch_ref, index)
                for index in range(len(batch))
            ]
            batch = ray.get(batch_futures)
        #

        if self.collate_fn is None:
            self.collate_fn = torch.utils.data._utils.collate.default_collate
        return self.collate_fn(batch)

    def __len__(self):
        if self.size % self.batch_size == 0:
            return self.size // self.batch_size
        else:
            return self.size // self.batch_size + 1

    def _fix_coordinates(self, labels, label_key, diff_ratio):
        """Fix coordinates label after image padding which break original image wh ratio

        Args:
            labels ([type]): [description]
            label_key ([type]): [description]
            diff_ratio ([type]): [description]

        Returns:
            [type]: [description]
        """

        diff_ratio = diff_ratio.numpy()
        labels[:, ::2] = labels[:, ::2] * diff_ratio[1]
        labels[:, 1::2] = labels[:, 1::2] * diff_ratio[0]
        return labels