Exemplo n.º 1
0
def main():
    options_parser = argparse.ArgumentParser(
        description="Run augmentation tasks.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    MultiProcessTaskRunner(AugmentationWorker).timed_run(
        load_configuration(options.configuration_file))
Exemplo n.º 2
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"], architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # pre-processing
        imputation = create_component(architecture, metadata, configuration.imputation)

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        # load the features
        features = to_gpu_if_available(torch.from_numpy(np.load(configuration.features)).float())
        missing_mask = to_gpu_if_available(torch.from_numpy(np.load(configuration.missing_mask)).float())

        # initial imputation
        batch = pre_processing.transform({"features": features, "missing_mask": missing_mask})

        # generate the model outputs
        output = self.impute(configuration, metadata, architecture, batch)

        # imputation
        output = compose_with_mask(mask=missing_mask, differentiable=False, where_one=output, where_zero=features)

        # post-process
        output = post_processing.transform(output)

        # save the imputation
        output = to_cpu_if_was_in_gpu(output)
        output = output.numpy()
        np.save(configuration.output, output)
Exemplo n.º 3
0
    def process(self, inputs: Any) -> None:
        # prepare the outputs
        outputs = dict(inputs)

        # load the scale transform
        # remove the path from the outputs
        scale_transform = load_scale_transform(outputs.pop("scale_transform"))

        # load the imputation task configuration
        impute_task = load_configuration(outputs.pop("impute_task"))

        # the imputation task output exists
        if os.path.exists(impute_task.arguments.output):
            # losses
            mse_loss_function = MSELoss()
            rmse_loss_function = RMSE()
            mr_loss_function = MultiReconstructionLoss(load_metadata(impute_task.arguments.metadata))

            # load the scaled data
            scaled_inputs = torch.from_numpy(np.load(impute_task.arguments.features))
            scaled_imputed = torch.from_numpy(np.load(impute_task.arguments.output))
            # compute the scaled metrics
            outputs["scaled_mse"] = mse_loss_function(scaled_imputed, scaled_inputs).item()
            outputs["scaled_rmse"] = rmse_loss_function(scaled_imputed, scaled_inputs).item()
            outputs["scaled_mr"] = mr_loss_function(scaled_imputed, scaled_inputs).item()

            # apply the inverse scale transform to recover the original unscaled data
            inputs = torch.from_numpy(scale_transform.inverse_transform(scaled_inputs.numpy()))
            imputed = torch.from_numpy(scale_transform.inverse_transform(scaled_imputed.numpy()))
            # compute the unscaled metrics
            outputs["mse"] = mse_loss_function(imputed, inputs).item()
            outputs["rmse"] = rmse_loss_function(imputed, inputs).item()
            outputs["mr"] = mr_loss_function(imputed, inputs).item()

        # if the task was not run
        else:
            self.logger.info("{} does not exist.".format(impute_task.arguments.output))

        # send the outputs
        self.send_output(outputs)
Exemplo n.º 4
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(
            configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata,
                                           architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"],
                                    architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # load the features
        features = to_gpu_if_available(
            torch.from_numpy(np.load(configuration.features)).float())

        # conditional
        if "labels" in configuration:
            condition = to_gpu_if_available(
                torch.from_numpy(np.load(configuration.labels)).float())
        else:
            condition = None

        # encode
        with torch.no_grad():
            code = architecture.autoencoder.encode(features,
                                                   condition=condition)["code"]

        # save the code
        code = to_cpu_if_was_in_gpu(code)
        code = code.numpy()
        np.save(configuration.output, code)
Exemplo n.º 5
0
    def process(self, inputs: Any) -> None:
        from deep_generative_models.tasks.runner import TaskRunner  # import here to avoid circular dependency
        task_runner = TaskRunner(logger=self.logger)

        start_time = time.time()
        output = {"has_error": False, "worker": self.worker_number}

        try:
            assert type(inputs) == str, "Inputs must be configuration paths."
            output["path"] = inputs
            configuration = load_configuration(inputs)

            if "gpu_device" in self.worker_configuration:
                output["gpu_device"] = self.worker_configuration.gpu_device
                with torch.cuda.device(self.worker_configuration.gpu_device):
                    task_runner.run(configuration)
            else:
                task_runner.run(configuration)
        except Exception as e:
            output["has_error"] = True
            output["error"] = repr(e)

        output["time"] = time.time() - start_time
        self.send_output(output)
Exemplo n.º 6
0
from typing import List

from torch import Tensor, FloatTensor

from deep_generative_models.architecture import Architecture
from deep_generative_models.configuration import Configuration, load_configuration
from deep_generative_models.gpu import to_gpu_if_available
from deep_generative_models.metadata import Metadata
from deep_generative_models.tasks.sample import Sample


class SampleGAN(Sample):

    def mandatory_architecture_components(self) -> List[str]:
        return ["generator"]

    def generate_sample(self, configuration: Configuration, metadata: Metadata, architecture: Architecture,
                        **additional_inputs: Tensor) -> Tensor:
        noise = to_gpu_if_available(FloatTensor(configuration.batch_size, architecture.arguments.noise_size).normal_())
        architecture.generator.eval()
        return architecture.generator(noise, **additional_inputs)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Sample from GAN.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    SampleGAN().timed_run(load_configuration(options.configuration))
Exemplo n.º 7
0
import argparse

from deep_generative_models.configuration import load_configuration
from deep_generative_models.tasks.encode import Encode

if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Encode with DeNoisingAutoEncoder.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    Encode().timed_run(load_configuration(options.configuration))
Exemplo n.º 8
0
import argparse

from torch import Tensor

from deep_generative_models.architecture import Architecture
from deep_generative_models.configuration import load_configuration
from deep_generative_models.tasks.gan_with_autoencoder.train import TrainGANWithAutoencoder


class TrainMedGAN(TrainGANWithAutoencoder):

    def sample_fake(self, architecture: Architecture, size: int, **additional_inputs: Tensor) -> Tensor:
        fake_code = super(TrainMedGAN, self).sample_fake(architecture, size, **additional_inputs)
        return architecture.autoencoder.decode(fake_code, **additional_inputs)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Train MedGAN.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    TrainMedGAN().timed_run(load_configuration(options.configuration))
Exemplo n.º 9
0
        inputs = torch.from_numpy(np.load(configuration.inputs))
        missing_mask = torch.from_numpy(np.load(configuration.missing_mask))

        assert inputs.shape == missing_mask.shape

        # the model need np.nan in the missing values to work
        inputs = compose_with_mask(missing_mask,
                                   where_one=torch.empty_like(inputs).fill_(np.nan),
                                   where_zero=inputs,
                                   differentiable=False)  # cannot be differentiable with nans!

        # create the model
        model = IterativeImputer(random_state=configuration.get("seed", 0),
                                 estimator=ExtraTreeRegressor(),
                                 missing_values=np.nan)

        # go back to torch (annoying)
        model.fit(inputs.numpy())

        # save the model
        with open(create_parent_directories_if_needed(configuration.outputs), "wb") as model_file:
            pickle.dump(model, model_file)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Train Miss Forest.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    TrainMissForest().timed_run(load_configuration(options.configuration))
Exemplo n.º 10
0
 def run(self, configuration: Configuration) -> None:
     from deep_generative_models.tasks.runner import TaskRunner  # import here to avoid circular dependency
     task_runner = TaskRunner()
     for child_configuration_path in configuration.tasks:
         task_runner.run(load_configuration(child_configuration_path))
Exemplo n.º 11
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(
                configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        architecture_configuration = load_configuration(
            configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata,
                                           architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"],
                                    architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        samples = []

        # create the strategy if defined
        if "strategy" in configuration:
            # validate strategy name is present
            if "factory" not in configuration.strategy:
                raise Exception(
                    "Missing factory name while creating sample strategy.")

            # validate strategy name
            strategy_name = configuration.strategy.factory
            if strategy_name not in strategy_class_by_name:
                raise Exception(
                    "Invalid factory name '{}' while creating sample strategy."
                    .format(strategy_name))

            # create the strategy
            strategy_class = strategy_class_by_name[strategy_name]
            strategy = strategy_class(**configuration.strategy.get(
                "arguments", default={}, transform_default=False))

        # use the default strategy
        else:
            strategy = DefaultSampleStrategy()

        # this is only to pass less parameters back and forth
        sampler = Sampler(self, configuration, metadata, architecture,
                          post_processing)

        # while more samples are needed
        start = 0
        while start < configuration.sample_size:
            # do not calculate gradients
            with torch.no_grad():
                # sample:
                # the task delegates to the strategy and passes the sampler object to avoid passing even more parameters
                #   the strategy may prepare additional sampling arguments (e.g. condition)
                #   the strategy delegates to the sampler object
                #     the sampler object delegates back to the task adding parameters that it was keeping
                #       the task child class does the actual sampling depending on the model
                #     the sampler object applies post-processing
                #   the strategy may apply filtering to the samples (e.g. rejection)
                # the task finally gets the sample
                batch_samples = strategy.generate_sample(
                    sampler, configuration, metadata)

            # transform back the samples
            batch_samples = to_cpu_if_was_in_gpu(batch_samples)
            batch_samples = batch_samples.numpy()

            # if the batch is not empty
            if len(batch_samples) > 0:
                # do not go further than the desired number of samples
                end = min(start + len(batch_samples),
                          configuration.sample_size)
                # limit the samples taken from the batch based on what is missing
                batch_samples = batch_samples[:min(len(batch_samples), end -
                                                   start), :]
                # if it is the first batch
                if len(samples) == 0:
                    samples = batch_samples
                # if its not the first batch we have to concatenate
                else:
                    samples = np.concatenate((samples, batch_samples), axis=0)
                # move to next batch
                start = end

        # save the samples
        np.save(configuration.output, samples)
Exemplo n.º 12
0
                     self).mandatory_arguments() + ["model"]

    def impute(self, configuration: Configuration, metadata: Metadata,
               scaled_inputs: Tensor, missing_mask: Tensor) -> Tensor:
        with open(configuration.model, "rb") as model_file:
            model = pickle.load(model_file)

        # the model need np.nan in the missing values to work
        scaled_inputs = compose_with_mask(
            missing_mask,
            where_one=torch.empty_like(scaled_inputs).fill_(np.nan),
            where_zero=scaled_inputs,
            differentiable=False)  # cannot be differentiable with nans!

        # impute with the scikit-learn model
        imputed = model.transform(scaled_inputs)

        # go back to torch (annoying)
        return torch.from_numpy(imputed).float()


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Impute with Miss Forest.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    MissForestImputation().timed_run(load_configuration(options.configuration))
Exemplo n.º 13
0
                # count how many ones non missing values the variable has and subtract the ones
                zero_count = non_missing_mask[:, index].sum() - one_count
                # fill with a one if there are more ones than zeros
                # if not fill with a zero
                filling_value = (1 if one_count >= zero_count else 0)
            # categorical
            elif variable_metadata.is_categorical():
                # how many ones per column (per categorical variable value)
                column_count = torch.zeros(size)
                for offset in range(size):
                    column_count[offset] = inputs[non_missing_mask[:, index + offset] == 1, index + offset].sum()
                # get the most common
                filling_value = one_hot(column_count.argmax(), num_classes=size)
            # numerical
            else:
                # take the mean of the values where the non missing mask is one
                filling_value = inputs[non_missing_mask[:, index] == 1, index].mean()
            # fill the variable
            filling_values[index:index + size] = filling_value

        # save the filling values
        np.save(configuration.outputs, filling_values.numpy())


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Calculate the mean or mode per variable.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    ComputeMeansAndModes().timed_run(load_configuration(options.configuration))
Exemplo n.º 14
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        datasets = Datasets()
        for dataset_name, dataset_path in configuration.data.items():
            datasets[dataset_name] = to_gpu_if_available(torch.from_numpy(np.load(dataset_path)).float())

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        create_parent_directories_if_needed(configuration.checkpoints.output)
        checkpoints = Checkpoints()

        # no input checkpoint by default
        checkpoint = None

        # continue from an output checkpoint (has priority over input checkpoint)
        if configuration.checkpoints.get("continue_from_output", default=False) \
                and checkpoints.exists(configuration.checkpoints.output):
            checkpoint = checkpoints.load(configuration.checkpoints.output)
        # continue from an input checkpoint
        elif "input" in configuration.checkpoints:
            checkpoint = checkpoints.load(configuration.checkpoints.input)
            if configuration.checkpoints.get("ignore_input_epochs", default=False):
                checkpoint["epoch"] = 0
            if configuration.checkpoints.get("use_best_input", default=False):
                checkpoint["architecture"] = checkpoint.pop("best_architecture")
                checkpoint.pop("best_epoch")
                checkpoint.pop("best_metric")

        # if there is no starting checkpoint then initialize
        if checkpoint is None:
            architecture.initialize()

            checkpoint = {
                "architecture": checkpoints.extract_states(architecture),
                "epoch": 0
            }
        # if there is a starting checkpoint then load it
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        log_path = create_parent_directories_if_needed(configuration.logs)
        logger = TrainLogger(self.logger, log_path, checkpoint["epoch"] > 0)

        # pre-processing
        if "imputation" in configuration:
            imputation = create_component(architecture, metadata, configuration.imputation)
        else:
            imputation = None

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        for epoch in range(checkpoint["epoch"] + 1, configuration.epochs + 1):
            # train discriminator and generator
            logger.start_timer()

            metrics = self.train_epoch(configuration, metadata, architecture, datasets, pre_processing, post_processing)

            for metric_name, metric_value in metrics.items():
                logger.log(epoch, configuration.epochs, metric_name, metric_value)

            # update the checkpoint
            checkpoint["architecture"] = checkpoints.extract_states(architecture)
            checkpoint["epoch"] = epoch

            # if the best architecture parameters should be kept
            if "keep_checkpoint_by_metric" in configuration:
                # get the metric used to compare checkpoints
                checkpoint_metric = metrics[configuration.keep_checkpoint_by_metric]

                # check if this is the best checkpoint (or the first)
                if "best_metric" not in checkpoint or checkpoint_metric < checkpoint["best_metric"]:
                    checkpoint["best_architecture"] = checkpoint["architecture"]
                    checkpoint["best_epoch"] = epoch
                    checkpoint["best_metric"] = checkpoint_metric

            # save checkpoint
            checkpoints.delayed_save(checkpoint, configuration.checkpoints.output, configuration.checkpoints.max_delay)

        # force save of last checkpoint
        checkpoints.save(checkpoint, configuration.checkpoints.output)

        # finish
        logger.close()
Exemplo n.º 15
0
import argparse

from deep_generative_models.configuration import load_configuration
from deep_generative_models.tasks.gan_with_autoencoder.sample import SampleGANWithAutoEncoder


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Sample from MedGAN.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    SampleGANWithAutoEncoder().timed_run(load_configuration(options.configuration))
Exemplo n.º 16
0
import argparse

from deep_generative_models.architecture import Architecture
from deep_generative_models.configuration import Configuration, load_configuration
from deep_generative_models.metadata import Metadata
from deep_generative_models.tasks.gan_with_autoencoder.train import TrainGANWithAutoencoder
from deep_generative_models.tasks.train import Batch


class TrainARAE(TrainGANWithAutoencoder):

    def train_discriminator_step(self, configuration: Configuration, metadata: Metadata, architecture: Architecture,
                                 batch: Batch) -> float:
        encoded_batch = dict()
        if "conditional" in architecture.arguments:
            encode_result = architecture.autoencoder.encode(batch["features"], condition=batch["labels"])
            encoded_batch["features"] = encode_result["code"]
            encoded_batch["labels"] = batch["labels"]
        else:
            encode_result = architecture.autoencoder.encode(batch["features"])
            encoded_batch["features"] = encode_result["code"]
        return super(TrainARAE, self).train_discriminator_step(configuration, metadata, architecture, encoded_batch)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Train ARAE.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    TrainARAE().timed_run(load_configuration(options.configuration))
Exemplo n.º 17
0
                if parameter.requires_grad:
                    size += parameter.numel()
    return size


class ComputeParameterSize(Task):
    def mandatory_arguments(self) -> List[str]:
        return [
            "name",
            "metadata",
            "architecture",
        ]

    def run(self, configuration: Configuration) -> None:
        metadata = load_metadata(configuration.metadata)
        architecture = create_architecture(
            metadata, load_configuration(configuration.architecture))
        size = compute_parameter_size(architecture)
        self.logger.info("{}: {:d}".format(configuration.name, size))


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Compute and print the amount of architecture parameters.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    ComputeParameterSize().timed_run(load_configuration(options.configuration))
Exemplo n.º 18
0
from deep_generative_models.configuration import Configuration, load_configuration
from deep_generative_models.imputation.basic_imputation_task import BasicImputation
from deep_generative_models.layers.normal_noise_imputation_layer import NormalNoiseImputationLayer
from deep_generative_models.metadata import Metadata


class NormalNoiseImputation(BasicImputation):
    def optional_arguments(self) -> List[str]:
        return super(NormalNoiseImputation,
                     self).optional_arguments() + ["noise_mean", "noise_std"]

    def impute(self, configuration: Configuration, metadata: Metadata,
               scaled_inputs: Tensor, missing_mask: Tensor) -> Tensor:
        optional = configuration.get_all_defined(["noise_mean", "noise_std"])
        optional["differentiable"] = False
        return NormalNoiseImputationLayer(**optional)(scaled_inputs,
                                                      missing_mask)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Impute with normal noise.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    NormalNoiseImputation().timed_run(load_configuration(
        options.configuration))
Exemplo n.º 19
0
import argparse

from torch import Tensor

from deep_generative_models.configuration import Configuration, load_configuration
from deep_generative_models.imputation.basic_imputation_task import BasicImputation
from deep_generative_models.layers.zero_imputation_layer import ZeroImputationLayer
from deep_generative_models.metadata import Metadata


class ZeroImputation(BasicImputation):

    def impute(self, configuration: Configuration, metadata: Metadata, scaled_inputs: Tensor, missing_mask: Tensor
               ) -> Tensor:
        return ZeroImputationLayer(differentiable=False)(scaled_inputs, missing_mask)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Impute with zeros.")
    options_parser.add_argument("configuration", type=str, help="Configuration json file.")
    options = options_parser.parse_args()

    ZeroImputation().timed_run(load_configuration(options.configuration))
Exemplo n.º 20
0
                                           hint=hint,
                                           non_missing_mask=inverse_mask(
                                               batch["missing_mask"]))

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()

    @staticmethod
    def val_batch(architecture: Architecture, batch: Batch,
                  post_processing: PostProcessing) -> float:
        generated = architecture.generator(batch["features"],
                                           missing_mask=batch["missing_mask"])
        loss = architecture.val_loss(post_processing, generated, batch)
        return to_cpu_if_was_in_gpu(loss).item()


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Train GAIN.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    TrainGAIN().timed_run(load_configuration(options.configuration))
Exemplo n.º 21
0
 def run(self, configuration: Configuration) -> None:
     metadata = load_metadata(configuration.metadata)
     architecture = create_architecture(
         metadata, load_configuration(configuration.architecture))
     size = compute_parameter_size(architecture)
     self.logger.info("{}: {:d}".format(configuration.name, size))
Exemplo n.º 22
0
    def mandatory_arguments(self) -> List[str]:
        return [
            "metadata",
            "missing_probability",
            "inputs",
            "outputs",
        ]

    def optional_arguments(self) -> List[str]:
        return super(GenerateMissingMask, self).optional_arguments() + ["seed"]

    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))
        metadata = load_metadata(configuration.metadata)
        inputs = torch.from_numpy(np.load(configuration.inputs))
        missing_mask = generate_mask_for(inputs,
                                         configuration.missing_probability,
                                         metadata)
        np.save(configuration.outputs, missing_mask.numpy())


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Generate a mask that indicates missing values.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    GenerateMissingMask().timed_run(load_configuration(options.configuration))
Exemplo n.º 23
0
        # wait until there is a new event
        event = outputs_queue.get(block=True)

        # write event
        if event["event_type"] == EVENT_TYPE_WRITE:
            writer.writerow(event["row"])
            f.flush()
        # exit event
        elif event["event_type"] == EVENT_TYPE_EXIT:
            break
        # something went wrong
        else:
            raise Exception("Invalid event type '{}'".format(
                event["event_type"]))

    f.close()

    multiprocessing_logger.info("Output finished.")


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(
        description="Run tasks in multi-processing mode.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    MultiProcessTaskRunner(TaskRunnerWorker).timed_run(
        load_configuration(options.configuration))
Exemplo n.º 24
0
import argparse

from deep_generative_models.configuration import load_configuration
from deep_generative_models.tasks.autoencoder.impute import ImputeWithAutoEncoder

if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Impute with VAE.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    ImputeWithAutoEncoder().timed_run(load_configuration(
        options.configuration))
Exemplo n.º 25
0
import argparse

from typing import List, Dict

from torch import Tensor

from deep_generative_models.architecture import Architecture
from deep_generative_models.configuration import load_configuration, Configuration
from deep_generative_models.metadata import Metadata
from deep_generative_models.tasks.impute import Impute


class ImputeWithGAIN(Impute):
    def mandatory_architecture_components(self) -> List[str]:
        return ["generator"]

    def impute(self, configuration: Configuration, metadata: Metadata,
               architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
        return architecture.generator(batch["features"],
                                      missing_mask=batch["missing_mask"])


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Impute with GAIN.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    ImputeWithGAIN().timed_run(load_configuration(options.configuration))
Exemplo n.º 26
0
import argparse

from deep_generative_models.configuration import load_configuration
from deep_generative_models.tasks.autoencoder.train import TrainAutoEncoder

if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Train VAE.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    TrainAutoEncoder().timed_run(load_configuration(options.configuration))
Exemplo n.º 27
0
    "MultiProcessTaskRunner": MultiProcessTaskRunner(TaskRunnerWorker)
}


class TaskRunner(Task):
    def run(self, configuration: Configuration) -> None:
        task = task_by_name[configuration.task]

        try:
            task.validate_arguments(configuration.arguments)
        except MissingArgument as e:
            raise Exception(
                "Missing argument '{}' while running task '{}'".format(
                    e.name, configuration.task))
        except InvalidArgument as e:
            raise Exception(
                "Invalid argument '{}' while running task '{}'".format(
                    e.name, configuration.task))

        task.run(configuration.arguments)


if __name__ == '__main__':
    options_parser = argparse.ArgumentParser(description="Run a task.")
    options_parser.add_argument("configuration",
                                type=str,
                                help="Configuration json file.")
    options = options_parser.parse_args()

    TaskRunner().timed_run(load_configuration(options.configuration))