示例#1
0
    def test_eval_binary_one_output_accuracy(self):
        batch_size = 32
        num_outputs = 1

        true_output = self.rso.rand(
            batch_size, num_outputs) * 5 - 10  # test output between -5 and 5
        true_output_binary = np.expand_dims(np.asarray(
            [0 if x < 0 else 1 for x in true_output], dtype=np.int),
                                            axis=1)

        # now, modify a subset of the netowrk output and make that the "real" output
        network_output = true_output.copy()
        target_accuracy = 0.8
        num_indices_to_modify = int(batch_size * (1 - target_accuracy))
        num_indices_unmodified = batch_size - num_indices_to_modify
        indices_to_modify = self.rso.choice(range(batch_size),
                                            num_indices_to_modify,
                                            replace=False)

        for ii in indices_to_modify:
            # flip pos to neg, neg to pos
            if network_output[ii][0] >= 0:
                network_output[ii][0] = network_output[ii][0] - 10
            else:
                network_output[ii][0] = network_output[ii][0] + 10

        # convert datatypes to what is expected during operation
        network_output_pt = torch.tensor(network_output, dtype=torch.float)
        true_output_pt = torch.tensor(true_output_binary, dtype=torch.long)

        actual_acc, n_total, n_correct = \
            _eval_acc(network_output_pt, true_output_pt, n_total=0, n_correct=0)
        expected_acc = float(batch_size -
                             num_indices_to_modify) / batch_size * 100
        expected_n_total = 32
        expected_n_correct = num_indices_unmodified
        self.assertAlmostEqual(actual_acc, expected_acc)
        self.assertEqual(n_total, expected_n_total)
        self.assertEqual(n_correct, expected_n_correct)

        # now compute the accuracy
        n_total_prev = 64
        n_correct_prev = 50
        expected_accuracy = (num_indices_unmodified + n_correct_prev) / (
            batch_size + n_total_prev) * 100
        expected_n_total = n_total_prev + batch_size
        expected_n_correct = n_correct_prev + num_indices_unmodified

        actual_acc, n_total, n_correct = \
            _eval_acc(network_output_pt, true_output_pt, n_total=n_total_prev, n_correct=n_correct_prev)
        self.assertAlmostEqual(actual_acc, expected_accuracy)
        self.assertEqual(expected_n_total, n_total)
        self.assertEqual(expected_n_correct, n_correct)
示例#2
0
    def test_running_accuracy(self):
        batch_size = 32
        num_outputs = 5

        random_mat = self.rso.rand(batch_size, num_outputs)
        row_sum = random_mat.sum(axis=1)

        # normalize the random_mat such that every row adds up to 1
        # broadcast so we can divide every element in matrix by the row's sum
        fake_network_output = random_mat / row_sum[:, None]
        network_output = np.argmax(fake_network_output, axis=1)

        # now, modify a subset of the netowrk output and make that the "real" output
        true_output = network_output.copy()
        target_accuracy = 0.8
        num_indices_to_modify = int(batch_size * (1 - target_accuracy))
        num_indices_unmodified = batch_size - num_indices_to_modify
        indices_to_modify = self.rso.choice(range(batch_size),
                                            num_indices_to_modify,
                                            replace=False)

        for ii in indices_to_modify:
            true_output[ii] = true_output[ii] + 1

        # convert datatypes to what is expected during operation
        network_output_pt = torch.tensor(fake_network_output,
                                         dtype=torch.float)
        true_output_pt = torch.tensor(true_output, dtype=torch.long)

        # now compute the accuracy
        n_total_prev = 64
        n_correct_prev = 50
        expected_accuracy = (num_indices_unmodified + n_correct_prev) / (
            batch_size + n_total_prev) * 100
        expected_n_total = n_total_prev + batch_size
        expected_n_correct = n_correct_prev + num_indices_unmodified

        actual_acc, n_total, n_correct = \
            _eval_acc(network_output_pt, true_output_pt, n_total=n_total_prev, n_correct=n_correct_prev)
        self.assertAlmostEqual(actual_acc, expected_accuracy)
        self.assertEqual(expected_n_total, n_total)
        self.assertEqual(expected_n_correct, n_correct)
示例#3
0
    def test_accuracy(self):
        cfg = DefaultOptimizerConfig()
        optimizer_obj = DefaultOptimizer(cfg)
        batch_size = cfg.training_cfg.batch_size
        num_outputs = 5

        random_mat = self.rso.rand(batch_size, num_outputs)
        row_sum = random_mat.sum(axis=1)

        # normalize the random_mat such that every row adds up to 1
        # broadcast so we can divide every element in matrix by the row's sum
        fake_network_output = random_mat / row_sum[:, None]
        network_output = np.argmax(fake_network_output, axis=1)

        # now, modify a subset of the netowrk output and make that the "real" output
        true_output = network_output.copy()
        target_accuracy = 0.8
        num_indices_to_modify = int(batch_size * (1 - target_accuracy))
        num_indices_unmodified = batch_size - num_indices_to_modify
        indices_to_modify = self.rso.choice(range(batch_size),
                                            num_indices_to_modify,
                                            replace=False)
        expected_accuracy = float(num_indices_unmodified) / float(
            batch_size) * 100

        for ii in indices_to_modify:
            true_output[ii] = true_output[ii] + 1

        # convert datatypes to what is expected during operation
        network_output_pt = torch.tensor(fake_network_output,
                                         dtype=torch.float)
        true_output_pt = torch.tensor(true_output, dtype=torch.long)

        # now compute the accuracy
        actual_acc, n_total, n_correct = \
            _eval_acc(network_output_pt, true_output_pt, n_total=0, n_correct=0)
        self.assertAlmostEqual(actual_acc, expected_accuracy)
    def train_epoch(self,   model: nn.Module, train_loader: DataLoader,
                    val_clean_loader: DataLoader, val_triggered_loader: DataLoader,
                    epoch_num: int, use_amp: bool = False):
        """
        Runs one epoch of training on the specified model

        :param model: the model to train for one epoch
        :param train_loader: a DataLoader object pointing to the training dataset
        :param val_clean_loader: a DataLoader object pointing to the validation dataset that is clean
        :param val_triggered_loader: a DataLoader object pointing to the validation dataset that is triggered
        :param epoch_num: the epoch number that is being trained
        :param use_amp: if True, uses automated mixed precision for FP16 training.
        :return: a list of statistics for batches where statistics were computed
        """

        # Probability of Adversarial attack to occur in each iteration
        attack_prob = self.optimizer_cfg.training_cfg.adv_training_ratio
        pid = os.getpid()
        train_dataset_len = len(train_loader.dataset)
        loop = tqdm(train_loader, disable=self.optimizer_cfg.reporting_cfg.disable_progress_bar)

        scaler = None
        if use_amp:
            scaler = torch.cuda.amp.GradScaler()

        train_n_correct, train_n_total = None, None

        # Define parameters of the adversarial attack
        attack_eps = float(self.optimizer_cfg.training_cfg.adv_training_eps)
        attack_iterations = int(self.optimizer_cfg.training_cfg.adv_training_iterations)
        eps_iter = (2.0 * attack_eps) / float(attack_iterations)
        attack = LinfPGDAttack(
            predict=model,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            eps=attack_eps,
            nb_iter=attack_iterations,
            eps_iter=eps_iter)

        sum_batchmean_train_loss = 0
        running_train_acc = 0
        num_batches = len(train_loader)
        model.train()
        for batch_idx, (x, y_truth) in enumerate(loop):
            x = x.to(self.device)
            y_truth = y_truth.to(self.device)

            # put network into training mode & zero out previous gradient computations
            self.optimizer.zero_grad()

            # get predictions based on input & weights learned so far
            if use_amp:
                with torch.cuda.amp.autocast():
                    # add adversarial noise via l_inf PGD attack
                    # only apply attack to attack_prob of the batches
                    if attack_prob and np.random.rand() <= attack_prob:
                        with ctx_noparamgrad_and_eval(model):
                            x = attack.perturb(x, y_truth)
                    y_hat = model(x)
                    # compute metrics
                    batch_train_loss = self._eval_loss_function(y_hat, y_truth)

            else:
                # add adversarial noise vis lin PGD attack
                if attack_prob and np.random.rand() <= attack_prob:
                    with ctx_noparamgrad_and_eval(model):
                        x = attack.perturb(x, y_truth)
                y_hat = model(x)
                batch_train_loss = self._eval_loss_function(y_hat, y_truth)

            sum_batchmean_train_loss += batch_train_loss.item()

            running_train_acc, train_n_total, train_n_correct = default_optimizer._running_eval_acc(y_hat, y_truth,
                                                                                  n_total=train_n_total,
                                                                                  n_correct=train_n_correct,
                                                                                  soft_to_hard_fn=self.soft_to_hard_fn,
                                                                                  soft_to_hard_fn_kwargs=self.soft_to_hard_fn_kwargs)

            # compute gradient
            if use_amp:
                # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
                # Backward passes under autocast are not recommended.
                # Backward ops run in the same dtype autocast chose for corresponding forward ops.
                scaler.scale(batch_train_loss).backward()
            else:
                if np.isnan(sum_batchmean_train_loss) or np.isnan(running_train_acc):
                    default_optimizer._save_nandata(x, y_hat, y_truth, batch_train_loss, sum_batchmean_train_loss, running_train_acc,
                                  train_n_total, train_n_correct, model)

                batch_train_loss.backward()

            # perform gradient clipping if configured
            if self.optimizer_cfg.training_cfg.clip_grad:
                if use_amp:
                    # Unscales the gradients of optimizer's assigned params in-place
                    scaler.unscale_(self.optimizer)

                if self.optimizer_cfg.training_cfg.clip_type == 'norm':
                    # clip_grad_norm_ modifies gradients in place
                    #  see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html
                    torch_clip_grad.clip_grad_norm_(model.parameters(), self.optimizer_cfg.training_cfg.clip_val,
                                                    **self.optimizer_cfg.training_cfg.clip_kwargs)
                elif self.optimizer_cfg.training_cfg.clip_type == 'val':
                    # clip_grad_val_ modifies gradients in place
                    #  see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html
                    torch_clip_grad.clip_grad_value_(
                        model.parameters(), self.optimizer_cfg.training_cfg.clip_val)
                else:
                    msg = "Unknown clipping type for gradient clipping!"
                    logger.error(msg)
                    raise ValueError(msg)

            if use_amp:
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(self.optimizer)
                # Updates the scale for next iteration.
                scaler.update()
            else:
                self.optimizer.step()

            # report batch statistics to tensorflow
            if self.tb_writer:
                try:
                    batch_num = int(epoch_num * num_batches + batch_idx)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-train_loss',
                                              batch_train_loss.item(), global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-running_train_acc',
                                              running_train_acc, global_step=batch_num)
                except:
                    # TODO: catch specific expcetions
                    pass

            loop.set_description('Epoch {}/{}'.format(epoch_num + 1, self.num_epochs))
            loop.set_postfix(avg_train_loss=batch_train_loss.item())

            if batch_idx % self.num_batches_per_logmsg == 0:
                logger.info('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tTrainLoss: {:.6f}\tTrainAcc: {:.6f}'.format(
                    pid, epoch_num, batch_idx * len(x), train_dataset_len,
                    100. * batch_idx / num_batches, batch_train_loss.item(), running_train_acc))

        train_stats = EpochTrainStatistics(running_train_acc, sum_batchmean_train_loss / float(num_batches))

        # if we have validation data, we compute on the validation dataset
        num_val_batches_clean = len(val_clean_loader)
        if num_val_batches_clean > 0:
            logger.info('Running Validation on Clean Data')
            running_val_clean_acc, _, _, val_clean_loss = \
                default_optimizer._eval_acc(val_clean_loader, model, self.device,
                          self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function)
        else:
            logger.info("No dataset computed for validation on clean dataset!")
            running_val_clean_acc = None
            val_clean_loss = None

        num_val_batches_triggered = len(val_triggered_loader)
        if num_val_batches_triggered > 0:
            logger.info('Running Validation on Triggered Data')
            running_val_triggered_acc, _, _, val_triggered_loss = \
                default_optimizer._eval_acc(val_triggered_loader, model, self.device,
                          self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function)
        else:
            logger.info(
                "No dataset computed for validation on triggered dataset!")
            running_val_triggered_acc = None
            val_triggered_loss = None

        validation_stats = EpochValidationStatistics(running_val_clean_acc, val_clean_loss,
                                                     running_val_triggered_acc, val_triggered_loss)
        if num_val_batches_clean > 0:
            logger.info('{}\tTrain Epoch: {} \tCleanValLoss: {:.6f}\tCleanValAcc: {:.6f}'.format(
                pid, epoch_num, val_clean_loss, running_val_clean_acc))
        if num_val_batches_triggered > 0:
            logger.info('{}\tTrain Epoch: {} \tTriggeredValLoss: {:.6f}\tTriggeredValAcc: {:.6f}'.format(
                pid, epoch_num, val_triggered_loss, running_val_triggered_acc))

        if self.tb_writer:
            try:
                batch_num = int((epoch_num + 1) * num_batches)
                if num_val_batches_clean > 0:
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-clean-val-loss', val_clean_loss, global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-clean-val_acc', running_val_clean_acc, global_step=batch_num)
                if num_val_batches_triggered > 0:
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-triggered-val-loss', val_triggered_loss, global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-triggered-val_acc', running_val_triggered_acc, global_step=batch_num)
            except:
                pass

        # update the lr-scheduler if necessary
        if self.lr_scheduler is not None:
            if self.optimizer_cfg.training_cfg.lr_scheduler_call_arg is None:
                self.lr_scheduler.step()
            elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_acc':
                val_acc = validation_stats.get_val_acc()
                if val_acc is not None:
                    self.lr_scheduler.step(val_acc)
                else:
                    msg = "val_clean_acc not defined b/c validation dataset is not defined! Ignoring LR step!"
                    logger.warning(msg)
            elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_loss':
                val_loss = validation_stats.get_val_loss()
                if val_loss is not None:
                    self.lr_scheduler.step(val_loss)
                else:
                    msg = "val_clean_loss not defined b/c validation dataset is not defined! Ignoring LR step!"
                    logger.warning(msg)
            else:
                msg = "Unknown mode for calling lr_scheduler!"
                logger.error(msg)
                raise ValueError(msg)

        return train_stats, validation_stats