示例#1
0
    def forward(self, architecture: Architecture, real_features: Tensor, fake_features: Tensor,
                **additional_inputs: Tensor) -> Tensor:
        loss = super(WGANCriticLossWithGradientPenalty, self).forward(
            architecture, real_features, fake_features, **additional_inputs)

        # calculate gradient penalty
        alpha = rand(len(real_features), 1)
        alpha = alpha.expand(real_features.size())
        alpha = to_gpu_if_available(alpha)

        interpolates = alpha * real_features + ((1 - alpha) * fake_features)
        interpolates.requires_grad_()

        # we do not interpolate the conditions because they are the same for fake and real features
        discriminator_interpolates = architecture.discriminator(interpolates, **additional_inputs)

        gradients = grad(outputs=discriminator_interpolates,
                         inputs=interpolates,
                         grad_outputs=to_gpu_if_available(ones_like(discriminator_interpolates)),
                         create_graph=True,
                         retain_graph=True,
                         only_inputs=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.weight

        # return total loss
        return loss + gradient_penalty
示例#2
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"], architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # pre-processing
        imputation = create_component(architecture, metadata, configuration.imputation)

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        # load the features
        features = to_gpu_if_available(torch.from_numpy(np.load(configuration.features)).float())
        missing_mask = to_gpu_if_available(torch.from_numpy(np.load(configuration.missing_mask)).float())

        # initial imputation
        batch = pre_processing.transform({"features": features, "missing_mask": missing_mask})

        # generate the model outputs
        output = self.impute(configuration, metadata, architecture, batch)

        # imputation
        output = compose_with_mask(mask=missing_mask, differentiable=False, where_one=output, where_zero=features)

        # post-process
        output = post_processing.transform(output)

        # save the imputation
        output = to_cpu_if_was_in_gpu(output)
        output = output.numpy()
        np.save(configuration.output, output)
示例#3
0
    def train_generator_step(self, configuration: Configuration, metadata: Metadata,
                             architecture: Architecture) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # conditional
        if "conditional" in architecture.arguments:
            # for now uniform distribution is used but could be controlled in a different way
            # also this works for both binary and categorical dependent variables
            number_of_conditions = metadata.get_dependent_variable().get_size()
            condition = to_gpu_if_available(FloatTensor(configuration.batch_size).uniform_(0, number_of_conditions))
        # non-conditional
        else:
            condition = None

        # generate a full batch of fake features
        fake_features = self.sample_fake(architecture, configuration.batch_size, condition=condition)

        # calculate loss
        loss = architecture.generator_loss(architecture, fake_features, condition=condition)

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
示例#4
0
 def generate_sample(self, configuration: Configuration, metadata: Metadata,
                     architecture: Architecture,
                     **additional_inputs: Tensor) -> Tensor:
     code = to_gpu_if_available(
         FloatTensor(configuration.batch_size,
                     architecture.arguments.code_size).normal_())
     architecture.autoencoder.eval()
     return architecture.autoencoder.decode(code, **additional_inputs)
示例#5
0
def generate_hint(missing_mask: Tensor, hint_probability: float,
                  metadata: Metadata) -> Tensor:
    # the GAIN paper goes on and on about using a more complex hint mechanism
    # but then in the online code example they use this technique
    # see: https://github.com/jsyoon0823/GAIN/issues/2

    # create a mask with "hint probability" of having ones
    hint_mask = to_gpu_if_available(
        generate_mask_for(missing_mask, hint_probability, metadata))
    # leave the mask untouched where there are hints (hint_mask=1)
    # but put zeros where there are no hints (hint_mask=0)
    return missing_mask * hint_mask
示例#6
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(
            configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata,
                                           architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"],
                                    architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # load the features
        features = to_gpu_if_available(
            torch.from_numpy(np.load(configuration.features)).float())

        # conditional
        if "labels" in configuration:
            condition = to_gpu_if_available(
                torch.from_numpy(np.load(configuration.labels)).float())
        else:
            condition = None

        # encode
        with torch.no_grad():
            code = architecture.autoencoder.encode(features,
                                                   condition=condition)["code"]

        # save the code
        code = to_cpu_if_was_in_gpu(code)
        code = code.numpy()
        np.save(configuration.output, code)
示例#7
0
    def forward(self, inputs: Tensor) -> Tensor:
        # dropout only during training
        if self.training:
            # create a missing mask using the drop probability
            drop_mask = to_gpu_if_available(
                generate_mask_for(inputs, self.drop_probability,
                                  self.metadata))

            # put zeros where the drop mask is one and leave the inputs where the drop mask is zero
            return compose_with_mask(mask=drop_mask,
                                     where_one=torch.zeros_like(inputs),
                                     where_zero=inputs,
                                     differentiable=True)

        # don't touch the inputs during evaluation
        else:
            return inputs
示例#8
0
    def forward(self, architecture: Architecture, real_features: Tensor,
                fake_features: Tensor, **additional_inputs: Tensor) -> Tensor:
        # real loss
        real_predictions = architecture.discriminator(real_features,
                                                      **additional_inputs)
        positive_labels = generate_positive_labels(len(real_predictions),
                                                   self.smooth_positive_labels)
        real_loss = self.bce_loss(real_predictions, positive_labels)

        # fake loss
        fake_predictions = architecture.discriminator(fake_features,
                                                      **additional_inputs)
        negative_labels = to_gpu_if_available(zeros(len(fake_predictions)))
        fake_loss = self.bce_loss(fake_predictions, negative_labels)

        # total loss
        return real_loss + fake_loss
示例#9
0
 def generate_sample(self, sampler: Sampler, configuration: Configuration,
                     metadata: Metadata) -> Tensor:
     condition = to_gpu_if_available(
         torch.ones(configuration.batch_size, dtype=torch.float) *
         self.condition)
     return sampler.generate_sample(condition=condition)
示例#10
0
 def sample_fake(self, architecture: Architecture, size: int, **additional_inputs: Tensor) -> Tensor:
     # for now the noise comes from a normal distribution but could be other distribution
     noise = to_gpu_if_available(FloatTensor(size, architecture.arguments.noise_size).normal_())
     return architecture.generator(noise, **additional_inputs)
示例#11
0
    def impute(self, configuration: Configuration, metadata: Metadata,
               architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
        # loss function
        loss_function = create_component(architecture, metadata,
                                         configuration.reconstruction_loss)
        masked_loss_function = MaskedReconstructionLoss(loss_function)
        batch_size = batch["features"].shape[0] * batch["features"].shape[1]
        # we need the non missing mask for the loss
        non_missing_mask = inverse_mask(batch["missing_mask"])

        # initial noise
        noise = to_gpu_if_available(
            FloatTensor(len(batch["features"]),
                        architecture.arguments.noise_size).normal_())
        noise.requires_grad_()

        # it is not the generator what we are updating
        # it is the noise
        optimizer = Adam([noise],
                         weight_decay=0,
                         lr=configuration.noise_learning_rate)
        architecture.generator.eval()

        # logger
        log_path = create_parent_directories_if_needed(configuration.logs)
        logger = TrainLogger(self.logger, log_path, False)

        # initial generation
        logger.start_timer()
        generated = architecture.generator(noise,
                                           condition=batch.get("labels"))

        # iterate until we reach the maximum number of iterations or until the non missing loss is too small
        max_iterations = configuration.max_iterations
        for iteration in range(1, max_iterations + 1):
            # compute the loss on the non-missing values
            non_missing_loss = masked_loss_function(generated,
                                                    batch["features"],
                                                    non_missing_mask)
            logger.log(iteration, max_iterations, "non_missing_loss",
                       to_cpu_if_was_in_gpu(non_missing_loss).item())

            # this loss only makes sense if the ground truth is present
            # only used for debugging
            if configuration.get("log_missing_loss", False):
                # this part should not affect the gradient calculation
                with torch.no_grad():
                    missing_loss = masked_loss_function(
                        generated, batch["raw_features"],
                        batch["missing_mask"])
                    logger.log(iteration, max_iterations, "missing_loss",
                               to_cpu_if_was_in_gpu(missing_loss).item())

                    loss = loss_function(generated,
                                         batch["raw_features"]) / batch_size
                    logger.log(iteration, max_iterations, "loss",
                               to_cpu_if_was_in_gpu(loss).item())

            # if the generation is good enough we stop
            if to_cpu_if_was_in_gpu(non_missing_loss).item(
            ) < configuration.get("tolerance", 1e-5):
                break

            # clear previous gradients
            optimizer.zero_grad()
            # compute the gradients
            non_missing_loss.backward()
            # update the noise
            optimizer.step()

            # generate next
            logger.start_timer()
            generated = architecture.generator(noise,
                                               condition=batch.get("labels"))

        return generated
示例#12
0
 def create(self, architecture: Architecture, metadata: Metadata,
            arguments: Configuration) -> Any:
     return MeanAndModesImputationLayer(
         to_gpu_if_available(
             torch.from_numpy(np.load(arguments.path)).float()),
         **arguments.get_all_defined(["differentiable"]))
示例#13
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        datasets = Datasets()
        for dataset_name, dataset_path in configuration.data.items():
            datasets[dataset_name] = to_gpu_if_available(torch.from_numpy(np.load(dataset_path)).float())

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        create_parent_directories_if_needed(configuration.checkpoints.output)
        checkpoints = Checkpoints()

        # no input checkpoint by default
        checkpoint = None

        # continue from an output checkpoint (has priority over input checkpoint)
        if configuration.checkpoints.get("continue_from_output", default=False) \
                and checkpoints.exists(configuration.checkpoints.output):
            checkpoint = checkpoints.load(configuration.checkpoints.output)
        # continue from an input checkpoint
        elif "input" in configuration.checkpoints:
            checkpoint = checkpoints.load(configuration.checkpoints.input)
            if configuration.checkpoints.get("ignore_input_epochs", default=False):
                checkpoint["epoch"] = 0
            if configuration.checkpoints.get("use_best_input", default=False):
                checkpoint["architecture"] = checkpoint.pop("best_architecture")
                checkpoint.pop("best_epoch")
                checkpoint.pop("best_metric")

        # if there is no starting checkpoint then initialize
        if checkpoint is None:
            architecture.initialize()

            checkpoint = {
                "architecture": checkpoints.extract_states(architecture),
                "epoch": 0
            }
        # if there is a starting checkpoint then load it
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        log_path = create_parent_directories_if_needed(configuration.logs)
        logger = TrainLogger(self.logger, log_path, checkpoint["epoch"] > 0)

        # pre-processing
        if "imputation" in configuration:
            imputation = create_component(architecture, metadata, configuration.imputation)
        else:
            imputation = None

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        for epoch in range(checkpoint["epoch"] + 1, configuration.epochs + 1):
            # train discriminator and generator
            logger.start_timer()

            metrics = self.train_epoch(configuration, metadata, architecture, datasets, pre_processing, post_processing)

            for metric_name, metric_value in metrics.items():
                logger.log(epoch, configuration.epochs, metric_name, metric_value)

            # update the checkpoint
            checkpoint["architecture"] = checkpoints.extract_states(architecture)
            checkpoint["epoch"] = epoch

            # if the best architecture parameters should be kept
            if "keep_checkpoint_by_metric" in configuration:
                # get the metric used to compare checkpoints
                checkpoint_metric = metrics[configuration.keep_checkpoint_by_metric]

                # check if this is the best checkpoint (or the first)
                if "best_metric" not in checkpoint or checkpoint_metric < checkpoint["best_metric"]:
                    checkpoint["best_architecture"] = checkpoint["architecture"]
                    checkpoint["best_epoch"] = epoch
                    checkpoint["best_metric"] = checkpoint_metric

            # save checkpoint
            checkpoints.delayed_save(checkpoint, configuration.checkpoints.output, configuration.checkpoints.max_delay)

        # force save of last checkpoint
        checkpoints.save(checkpoint, configuration.checkpoints.output)

        # finish
        logger.close()
示例#14
0
def generate_positive_labels(size: int, smooth: bool):
    if smooth:
        return to_gpu_if_available(FloatTensor(size).uniform_(0.9, 1))
    else:
        return to_gpu_if_available(ones(size))
示例#15
0
 def to_gpu_if_available(self) -> None:
     for name, component in self.items():
         if isinstance(component, Module):  # skip optimizers
             self[name] = to_gpu_if_available(component)