Exemplo n.º 1
0
    def train(self, epoch):
        self.model.train()
        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, len_steps, len_data = 0, 0, 0

        # Training loop
        loop = tqdm(self.train_loader)
        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            if self.task == "classification":
                current_loss = self.criterion(output, target)
            elif self.task == "segmentation":
                current_loss = self.criterion(output, target.squeeze(dim=1))
            current_loss.backward()
            loss += current_loss
            self.optimizer.step()
            if self.task == "classification":
                len_steps += len(data)
                len_data = len_steps
            elif self.task == "segmentation":
                len_steps += len(data) * np.prod(data.shape[-2:])
                len_data += len(data)

            # Update metrics
            confidence, pred = F.softmax(output, dim=1).max(dim=1,
                                                            keepdim=True)
            metrics.update(pred, target, confidence)

            # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_nll": f"{(loss / len_data):05.4e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "lr": {
                "value": self.optimizer.param_groups[0]["lr"],
                "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}",
            },
            "train/loss_nll": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        if self.val_loader is not None:
            val_losses, scores_val = self.evaluate(self.val_loader,
                                                   self.prod_val_len,
                                                   split="val")
            logs_dict["val/loss_nll"] = {
                "value":
                val_losses["loss_nll"].item() / self.nsamples_val,
                "string":
                f"{(val_losses['loss_nll'].item() / self.nsamples_val):05.4e}",
            }
            for sv in scores_val:
                logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test = self.evaluate(self.test_loader,
                                                 self.prod_test_len,
                                                 split="test")
        logs_dict["test/loss_nll"] = {
            "value":
            test_losses["loss_nll"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_nll'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)

        # Scheduler step
        if self.scheduler:
            self.scheduler.step()
Exemplo n.º 2
0
    def train(self, epoch):
        self.model.train()
        self.disable_bn()
        if self.config_args["model"].get("uncertainty", None):
            self.disable_dropout()
        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, confid_loss = 0, 0
        len_steps, len_data = 0, 0

        # Training loop
        loop = tqdm(self.train_loader)

        for step, batch in enumerate(tqdm(loop, desc="Iteration")):
            batch = tuple(t.to(self.device) for t in batch)
            idx_ids, input_ids, input_mask, segment_ids, label_ids = batch

            output = self.model(input_ids,
                                segment_ids,
                                input_mask,
                                labels=None)

            print('output', output[0], output[1], torch.sigmoid(output[1]))

            current_loss = self.criterion(output, label_ids)
            """ loss """
            n_gpu = torch.cuda.device_count()
            if n_gpu > 1:
                current_loss = current_loss.mean()
            if self.config_args['training']['gradient_accumulation_steps'] > 1:
                current_loss = current_loss / self.config_args['training'][
                    'gradient_accumulation_steps']

            current_loss.backward()
            loss += current_loss

            len_steps += len(input_ids)
            len_data = len_steps

            # Update metrics
            pred = output[0].argmax(dim=1, keepdim=True)

            confidence = torch.sigmoid(output[1])
            metrics.update(idx_ids, pred, label_ids, confidence)

            pred_detach, label_detach, confidence_detach, idx_detach = pred.detach(
            ), label_ids.detach(), confidence.detach(), idx_ids.detach()

            print('pred', pred_detach.cpu())
            print('label', label_detach.cpu())
            print('idx', idx_detach.cpu())
            print('confidence', confidence_detach.cpu())

            if (step + 1) % self.config_args['training'][
                    'gradient_accumulation_steps'] == 0:
                print('optimizer step', step + 1)
                self.optimizer.step()
                self.optimizer.zero_grad()

                # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_confid": f"{(loss / len_data):05.3e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "lr": {
                "value": self.optimizer.param_groups[0]["lr"],
                "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}",
            },
            "train/loss_confid": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        val_losses, scores_val = self.evaluate(self.val_loader,
                                               self.prod_val_len,
                                               split="val")
        logs_dict["val/loss_confid"] = {
            "value":
            val_losses["loss_confid"].item() / self.nsamples_val,
            "string":
            f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}",
        }
        for sv in scores_val:
            logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test = self.evaluate(self.test_loader,
                                                 self.prod_test_len,
                                                 split="test")
        logs_dict["test/loss_confid"] = {
            "value":
            test_losses["loss_confid"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)

        # Scheduler step
        if self.scheduler:
            self.scheduler.step()
Exemplo n.º 3
0
    def train(self, epoch):
        self.model.train()

        # self.disable_bn()
        # if self.config_args["model"].get("uncertainty", None):
        #     self.disable_dropout()

        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, confid_loss = 0, 0
        len_steps, len_data = 0, 0

        # Training loop
        loop = tqdm(self.train_loader)

        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)

            # import pdb
            # pdb.set_trace()
            # print(output[0])
            # exit()

            if self.task == "classification":
                current_loss = self.criterion(output, target)
            elif self.task == "segmentation":
                current_loss = self.criterion(output, target.squeeze(dim=1))
            current_loss.backward()
            loss += current_loss
            self.optimizer.step()
            if self.task == "classification":
                len_steps += len(data)
                len_data = len_steps
            elif self.task == "segmentation":
                len_steps += len(data) * np.prod(data.shape[-2:])
                len_data += len(data)

            # Update metrics
            pred = output[0].argmax(dim=1, keepdim=True)
            confidence = torch.sigmoid(output[1])
            metrics.update(pred, target, confidence)

            # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_confid": f"{(loss / len_data):05.3e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "train/loss_confid": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        val_losses, scores_val, _ = self.evaluate(self.val_loader,
                                                  self.prod_val_len,
                                                  split="val")
        logs_dict["val/loss_confid"] = {
            "value":
            val_losses["loss_confid"].item() / self.nsamples_val,
            "string":
            f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}",
        }
        for sv in scores_val:
            logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test, _ = self.evaluate(self.test_loader,
                                                    self.prod_test_len,
                                                    split="test")
        logs_dict["test/loss_confid"] = {
            "value":
            test_losses["loss_confid"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)