def __init__(self, context: PyTorchTrialContext) -> None:
        # Read configuration
        self.context = context
        self.data_config = self.context.get_data_config()

        # Create Tensorboard logger
        self.logger = TorchWriter()

        # Create tokinizer based on the predefient vocabulary
        self.tokenizer = BertTokenizer(self.data_config["voc_path"], do_lower_case=False)

        # Label Encoder
        if self.context.get_hparam("reduce_to_binary_problem"):
            class_num = 2
        else:
            class_num = 6

        # Initialize model and wrap it in the determined api
        model = ProtTransClassification(self.data_config["pretrained_path"],
                                        class_num=class_num,
                                        classification_feature=self.context.get_hparam("classification_feature"),
                                        dropout=self.context.get_hparam("classification_dropout"),
                                        freeze_bert=self.context.get_hparam("bert_freeze"))

        optimizer = Lamb([{"params": model.wordencoding.parameters(), "lr": self.context.get_hparam("bert_lr")},
                          {"params": model.classification.parameters()}], lr=self.context.get_hparam("classification_lr"))

        self.model = self.context.wrap_model(model)
        self.optimizer = self.context.wrap_optimizer(optimizer)
Exemplo n.º 2
0
    def __init__(self, context: PyTorchTrialContext) -> None:
        self.context = context
        self.logger = TorchWriter()

        # Create a unique download directory for each rank so they don't overwrite each other.
        self.download_directory = f"/tmp/data-rank{self.context.distributed.get_rank()}"
        self.data_downloaded = False

        # Initialize the models.
        mnist_shape = (1, 28, 28)
        self.generator = self.context.wrap_model(Generator(latent_dim=self.context.get_hparam("latent_dim"),
                                                           img_shape=mnist_shape))
        self.discriminator = self.context.wrap_model(Discriminator(img_shape=mnist_shape))

        # Initialize the optimizers and learning rate scheduler.
        lr = self.context.get_hparam("lr")
        b1 = self.context.get_hparam("b1")
        b2 = self.context.get_hparam("b2")
        self.opt_g = self.context.wrap_optimizer(torch.optim.Adam(self.generator.parameters(),
                                                                  lr=lr, betas=(b1, b2)))
        self.opt_d = self.context.wrap_optimizer(torch.optim.Adam(self.discriminator.parameters(),
                                                                  lr=lr, betas=(b1, b2)))
        self.lr_g = self.context.wrap_lr_scheduler(
            lr_scheduler=LambdaLR(self.opt_g, lr_lambda=lambda epoch: 0.95 ** epoch),
            step_mode=LRScheduler.StepMode.STEP_EVERY_EPOCH,
        )
Exemplo n.º 3
0
    def create_metric_writer(
        cls: Type["PyTorchTrialController"],
    ) -> tensorboard.BatchMetricWriter:
        from determined.tensorboard.metric_writers.pytorch import TorchWriter

        writer = TorchWriter()
        return tensorboard.BatchMetricWriter(writer)
Exemplo n.º 4
0
    def __init__(self, context: DeepSpeedTrialContext) -> None:
        self.context = context
        self.exp_config = self.context.get_experiment_config()
        self.args = AttrMap(self.context.get_hparams())

        # Initalize and get arguments, timers, and Tensorboard writer.
        try:
            self.neox_args = get_neox_args(self.context)
        except:
            traceback.print_exc()
            raise InvalidHP("Could not parse neox_args.")
        self.wrapped_writer = TorchWriter()
        self.neox_args.tensorboard_writer = self.wrapped_writer.writer
        self.neox_args.configure_distributed_args()
        # The tokenizer needs to be built before model initialization in order to set the
        # required padded_vocab_size argument.
        self.neox_args.build_tokenizer()
        megatron_train.initialize_megatron(neox_args=self.neox_args)
        self.timers = megatron_utils.Timers(
            use_wandb=False,
            tensorboard_writer=self.neox_args.tensorboard_writer)

        # Model, optimizer, and learning rate.
        self.timers("model and optimizer").start()
        (
            model,
            self.optimizer,
            self.lr_scheduler,
        ) = megatron_train.setup_model_and_optimizer(neox_args=self.neox_args)
        self.model = self.context.wrap_model_engine(model)
        self.timers("model and optimizer").stop()

        # Print setup timing.
        megatron_utils.print_rank_0("done with setups ...")
        self.timers.log(["model and optimizer"])
        megatron_utils.print_rank_0("training ...")

        # For tracking.
        if not self.args.search_world_size:
            self.reducer = self.context.wrap_reducer(LMReducers(
                self.neox_args),
                                                     for_training=False,
                                                     for_validation=True)
        self.report_memory_flag = True
        self.total_train_loss_dict = {}
        self.total_val_loss_dict = {}
        self.tflops = 0
        self.reported_flops = False
        self.overflow_monitor = megatron_utils.OverflowMonitor(self.optimizer)
        self.noise_scale_logger = megatron_utils.get_noise_scale_logger(
            self.neox_args)
        self.timers("interval time").start()
Exemplo n.º 5
0
    def __init__(self, context: DeepSpeedTrialContext) -> None:
        self.context = context
        self.hparams = AttrDict(self.context.get_hparams())
        self.data_config = AttrDict(self.context.get_data_config())
        self.logger = TorchWriter()
        num_channels = data.CHANNELS_BY_DATASET[self.data_config.dataset]
        gen_net = Generator(
            self.hparams.generator_width_base, num_channels, self.hparams.noise_length
        )
        gen_net.apply(weights_init)
        disc_net = Discriminator(self.hparams.discriminator_width_base, num_channels)
        disc_net.apply(weights_init)
        gen_parameters = filter(lambda p: p.requires_grad, gen_net.parameters())
        disc_parameters = filter(lambda p: p.requires_grad, disc_net.parameters())
        ds_config = overwrite_deepspeed_config(
            self.hparams.deepspeed_config, self.hparams.get("overwrite_deepspeed_args", {})
        )
        generator, _, _, _ = deepspeed.initialize(
            model=gen_net, model_parameters=gen_parameters, config=ds_config
        )
        discriminator, _, _, _ = deepspeed.initialize(
            model=disc_net, model_parameters=disc_parameters, config=ds_config
        )

        self.generator = self.context.wrap_model_engine(generator)
        self.discriminator = self.context.wrap_model_engine(discriminator)
        self.fixed_noise = self.context.to_device(
            torch.randn(
                self.context.train_micro_batch_size_per_gpu, self.hparams.noise_length, 1, 1
            )
        )
        self.criterion = nn.BCELoss()
        # TODO: Test fp16
        self.fp16 = generator.fp16_enabled()
        self.gradient_accumulation_steps = generator.gradient_accumulation_steps()
        # Manually perform gradient accumulation.
        if self.gradient_accumulation_steps > 1:
            logging.info("Disabling automatic gradient accumulation.")
            self.context.disable_auto_grad_accumulation()
Exemplo n.º 6
0
 def create_metric_writer(
     cls: Type["DeepSpeedTrialController"],
 ) -> tensorboard.BatchMetricWriter:
     writer = TorchWriter()
     return tensorboard.BatchMetricWriter(writer)