示例#1
0
 def create_validation_data_loader(
     self,
     data: Dataset,
     **kwargs,
 ) -> DataLoader:
     input_names = get_hybrid_forward_input_names(DeepStateTrainingNetwork)
     with env._let(max_idle_transforms=maybe_len(data) or 0):
         instance_splitter = self._create_instance_splitter("validation")
     return ValidationDataLoader(
         dataset=data,
         transform=instance_splitter + SelectFields(input_names),
         batch_size=self.batch_size,
         stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
     )
示例#2
0
 def create_training_data_loader(
     self,
     data: Dataset,
     **kwargs,
 ) -> DataLoader:
     with env._let(max_idle_transforms=maybe_len(data) or 0):
         train_transform = (self._create_instance_splitter("training") +
                            self._create_post_split_transform() +
                            SelectFields(["past_target", "valid_length"]))
     return TrainDataLoader(
         train_transform.apply(Cyclic(data)),
         batch_size=self.batch_size,
         stack_fn=self._stack_fn(),
         decode_fn=partial(as_in_context, ctx=self.trainer.ctx),
     )
示例#3
0
 def create_validation_data_loader(
     self,
     data: Dataset,
     **kwargs,
 ) -> DataLoader:
     with env._let(max_idle_transforms=maybe_len(data) or 0):
         validation_transform = (
             self._create_instance_splitter("validation") +
             self._create_post_split_transform() +
             SelectFields(["past_target", "valid_length"]))
     return ValidationDataLoader(
         validation_transform.apply(data),
         batch_size=self.batch_size,
         stack_fn=self._stack_fn(),
     )
示例#4
0
 def create_training_data_loader(
     self,
     data: Dataset,
     **kwargs,
 ) -> DataLoader:
     input_names = get_hybrid_forward_input_names(CanonicalTrainingNetwork)
     with env._let(max_idle_transforms=maybe_len(data) or 0):
         instance_splitter = self._create_instance_splitter("training")
     return TrainDataLoader(
         dataset=data,
         transform=instance_splitter + SelectFields(input_names),
         batch_size=self.batch_size,
         stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
         decode_fn=partial(as_in_context, ctx=self.trainer.ctx),
         **kwargs,
     )
示例#5
0
    def train_model(
        self,
        training_data: Dataset,
        validation_data: Optional[Dataset] = None,
        num_workers: int = 0,
        prefetch_factor: int = 2,
        shuffle_buffer_length: Optional[int] = None,
        cache_data: bool = False,
        **kwargs,
    ) -> TrainOutput:
        transformation = self.create_transformation()

        trained_net = self.create_training_network(self.trainer.device)

        input_names = get_module_forward_input_names(trained_net)

        with env._let(max_idle_transforms=maybe_len(training_data) or 0):
            training_instance_splitter = self.create_instance_splitter(
                "training")
        training_iter_dataset = TransformedIterableDataset(
            dataset=training_data,
            transform=transformation + training_instance_splitter +
            SelectFields(input_names),
            is_train=True,
            shuffle_buffer_length=shuffle_buffer_length,
            cache_data=cache_data,
        )

        training_data_loader = DataLoader(
            training_iter_dataset,
            batch_size=self.trainer.batch_size,
            num_workers=num_workers,
            prefetch_factor=prefetch_factor,
            pin_memory=True,
            worker_init_fn=self._worker_init_fn,
            **kwargs,
        )

        validation_data_loader = None
        if validation_data is not None:
            with env._let(max_idle_transforms=maybe_len(validation_data) or 0):
                validation_instance_splitter = self.create_instance_splitter(
                    "validation")
            validation_iter_dataset = TransformedIterableDataset(
                dataset=validation_data,
                transform=transformation + validation_instance_splitter +
                SelectFields(input_names),
                is_train=True,
                cache_data=cache_data,
            )
            validation_data_loader = DataLoader(
                validation_iter_dataset,
                batch_size=self.trainer.batch_size,
                num_workers=num_workers,
                prefetch_factor=prefetch_factor,
                pin_memory=True,
                worker_init_fn=self._worker_init_fn,
                **kwargs,
            )

        self.trainer(
            net=trained_net,
            train_iter=training_data_loader,
            validation_iter=validation_data_loader,
        )

        return TrainOutput(
            transformation=transformation,
            trained_net=trained_net,
            predictor=self.create_predictor(transformation, trained_net,
                                            self.trainer.device),
        )