Пример #1
0
    def train_generator_step(self, configuration: Configuration, metadata: Metadata,
                             architecture: Architecture) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # conditional
        if "conditional" in architecture.arguments:
            # for now uniform distribution is used but could be controlled in a different way
            # also this works for both binary and categorical dependent variables
            number_of_conditions = metadata.get_dependent_variable().get_size()
            condition = to_gpu_if_available(FloatTensor(configuration.batch_size).uniform_(0, number_of_conditions))
        # non-conditional
        else:
            condition = None

        # generate a full batch of fake features
        fake_features = self.sample_fake(architecture, configuration.batch_size, condition=condition)

        # calculate loss
        loss = architecture.generator_loss(architecture, fake_features, condition=condition)

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Пример #2
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"], architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # pre-processing
        imputation = create_component(architecture, metadata, configuration.imputation)

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        # load the features
        features = to_gpu_if_available(torch.from_numpy(np.load(configuration.features)).float())
        missing_mask = to_gpu_if_available(torch.from_numpy(np.load(configuration.missing_mask)).float())

        # initial imputation
        batch = pre_processing.transform({"features": features, "missing_mask": missing_mask})

        # generate the model outputs
        output = self.impute(configuration, metadata, architecture, batch)

        # imputation
        output = compose_with_mask(mask=missing_mask, differentiable=False, where_one=output, where_zero=features)

        # post-process
        output = post_processing.transform(output)

        # save the imputation
        output = to_cpu_if_was_in_gpu(output)
        output = output.numpy()
        np.save(configuration.output, output)
Пример #3
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(
            configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata,
                                           architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"],
                                    architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # load the features
        features = to_gpu_if_available(
            torch.from_numpy(np.load(configuration.features)).float())

        # conditional
        if "labels" in configuration:
            condition = to_gpu_if_available(
                torch.from_numpy(np.load(configuration.labels)).float())
        else:
            condition = None

        # encode
        with torch.no_grad():
            code = architecture.autoencoder.encode(features,
                                                   condition=condition)["code"]

        # save the code
        code = to_cpu_if_was_in_gpu(code)
        code = code.numpy()
        np.save(configuration.output, code)
Пример #4
0
    def train_discriminator_step(self, configuration: Configuration, metadata: Metadata, architecture: Architecture,
                                 batch: Batch) -> float:
        # clean previous gradients
        architecture.discriminator_optimizer.zero_grad()

        # generate a batch of fake features with the same size as the real feature batch
        fake_features = self.sample_fake(architecture, len(batch["features"]), condition=batch.get("labels"))
        fake_features = fake_features.detach()  # do not propagate to the generator

        # calculate loss
        loss = architecture.discriminator_loss(architecture,
                                               batch["features"],
                                               fake_features,
                                               condition=batch.get("labels"))

        # calculate gradients
        loss.backward()

        # update the discriminator weights
        architecture.discriminator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Пример #5
0
    def train_generator_step(configuration: Configuration, metadata: Metadata,
                             architecture: Architecture,
                             batch: Batch) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # generate a batch of fake features with the same size as the real feature batch
        generated = architecture.generator(batch["features"],
                                           missing_mask=batch["missing_mask"])
        # replace the missing features by the generated
        imputed = compose_with_mask(
            mask=batch["missing_mask"],
            differentiable=True,  # now there are no NaNs and this should be used
            where_one=generated,
            where_zero=batch["raw_features"])
        # generate hint
        hint = generate_hint(batch["missing_mask"],
                             configuration.hint_probability, metadata)

        # calculate loss
        loss = architecture.generator_loss(architecture=architecture,
                                           features=batch["raw_features"],
                                           generated=generated,
                                           imputed=imputed,
                                           hint=hint,
                                           non_missing_mask=inverse_mask(
                                               batch["missing_mask"]))

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Пример #6
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(
                configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        architecture_configuration = load_configuration(
            configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata,
                                           architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"],
                                    architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        samples = []

        # create the strategy if defined
        if "strategy" in configuration:
            # validate strategy name is present
            if "factory" not in configuration.strategy:
                raise Exception(
                    "Missing factory name while creating sample strategy.")

            # validate strategy name
            strategy_name = configuration.strategy.factory
            if strategy_name not in strategy_class_by_name:
                raise Exception(
                    "Invalid factory name '{}' while creating sample strategy."
                    .format(strategy_name))

            # create the strategy
            strategy_class = strategy_class_by_name[strategy_name]
            strategy = strategy_class(**configuration.strategy.get(
                "arguments", default={}, transform_default=False))

        # use the default strategy
        else:
            strategy = DefaultSampleStrategy()

        # this is only to pass less parameters back and forth
        sampler = Sampler(self, configuration, metadata, architecture,
                          post_processing)

        # while more samples are needed
        start = 0
        while start < configuration.sample_size:
            # do not calculate gradients
            with torch.no_grad():
                # sample:
                # the task delegates to the strategy and passes the sampler object to avoid passing even more parameters
                #   the strategy may prepare additional sampling arguments (e.g. condition)
                #   the strategy delegates to the sampler object
                #     the sampler object delegates back to the task adding parameters that it was keeping
                #       the task child class does the actual sampling depending on the model
                #     the sampler object applies post-processing
                #   the strategy may apply filtering to the samples (e.g. rejection)
                # the task finally gets the sample
                batch_samples = strategy.generate_sample(
                    sampler, configuration, metadata)

            # transform back the samples
            batch_samples = to_cpu_if_was_in_gpu(batch_samples)
            batch_samples = batch_samples.numpy()

            # if the batch is not empty
            if len(batch_samples) > 0:
                # do not go further than the desired number of samples
                end = min(start + len(batch_samples),
                          configuration.sample_size)
                # limit the samples taken from the batch based on what is missing
                batch_samples = batch_samples[:min(len(batch_samples), end -
                                                   start), :]
                # if it is the first batch
                if len(samples) == 0:
                    samples = batch_samples
                # if its not the first batch we have to concatenate
                else:
                    samples = np.concatenate((samples, batch_samples), axis=0)
                # move to next batch
                start = end

        # save the samples
        np.save(configuration.output, samples)
Пример #7
0
    def impute(self, configuration: Configuration, metadata: Metadata,
               architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
        # loss function
        loss_function = create_component(architecture, metadata,
                                         configuration.reconstruction_loss)
        masked_loss_function = MaskedReconstructionLoss(loss_function)
        batch_size = batch["features"].shape[0] * batch["features"].shape[1]
        # we need the non missing mask for the loss
        non_missing_mask = inverse_mask(batch["missing_mask"])

        # initial noise
        noise = to_gpu_if_available(
            FloatTensor(len(batch["features"]),
                        architecture.arguments.noise_size).normal_())
        noise.requires_grad_()

        # it is not the generator what we are updating
        # it is the noise
        optimizer = Adam([noise],
                         weight_decay=0,
                         lr=configuration.noise_learning_rate)
        architecture.generator.eval()

        # logger
        log_path = create_parent_directories_if_needed(configuration.logs)
        logger = TrainLogger(self.logger, log_path, False)

        # initial generation
        logger.start_timer()
        generated = architecture.generator(noise,
                                           condition=batch.get("labels"))

        # iterate until we reach the maximum number of iterations or until the non missing loss is too small
        max_iterations = configuration.max_iterations
        for iteration in range(1, max_iterations + 1):
            # compute the loss on the non-missing values
            non_missing_loss = masked_loss_function(generated,
                                                    batch["features"],
                                                    non_missing_mask)
            logger.log(iteration, max_iterations, "non_missing_loss",
                       to_cpu_if_was_in_gpu(non_missing_loss).item())

            # this loss only makes sense if the ground truth is present
            # only used for debugging
            if configuration.get("log_missing_loss", False):
                # this part should not affect the gradient calculation
                with torch.no_grad():
                    missing_loss = masked_loss_function(
                        generated, batch["raw_features"],
                        batch["missing_mask"])
                    logger.log(iteration, max_iterations, "missing_loss",
                               to_cpu_if_was_in_gpu(missing_loss).item())

                    loss = loss_function(generated,
                                         batch["raw_features"]) / batch_size
                    logger.log(iteration, max_iterations, "loss",
                               to_cpu_if_was_in_gpu(loss).item())

            # if the generation is good enough we stop
            if to_cpu_if_was_in_gpu(non_missing_loss).item(
            ) < configuration.get("tolerance", 1e-5):
                break

            # clear previous gradients
            optimizer.zero_grad()
            # compute the gradients
            non_missing_loss.backward()
            # update the noise
            optimizer.step()

            # generate next
            logger.start_timer()
            generated = architecture.generator(noise,
                                               condition=batch.get("labels"))

        return generated
Пример #8
0
 def val_batch(architecture: Architecture, batch: Batch,
               post_processing: PostProcessing) -> float:
     generated = architecture.generator(batch["features"],
                                        missing_mask=batch["missing_mask"])
     loss = architecture.val_loss(post_processing, generated, batch)
     return to_cpu_if_was_in_gpu(loss).item()
Пример #9
0
 def to_cpu_if_was_in_gpu(self) -> None:
     for name, component in self.items():
         if isinstance(component, Module):  # skip optimizers
             self[name] = to_cpu_if_was_in_gpu(component)